linux/arch/x86/events/intel/ds.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <linux/bitops.h>
   3#include <linux/types.h>
   4#include <linux/slab.h>
   5
   6#include <asm/cpu_entry_area.h>
   7#include <asm/perf_event.h>
   8#include <asm/tlbflush.h>
   9#include <asm/insn.h>
  10#include <asm/io.h>
  11
  12#include "../perf_event.h"
  13
  14/* Waste a full page so it can be mapped into the cpu_entry_area */
  15DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
  16
  17/* The size of a BTS record in bytes: */
  18#define BTS_RECORD_SIZE         24
  19
  20#define PEBS_FIXUP_SIZE         PAGE_SIZE
  21
  22/*
  23 * pebs_record_32 for p4 and core not supported
  24
  25struct pebs_record_32 {
  26        u32 flags, ip;
  27        u32 ax, bc, cx, dx;
  28        u32 si, di, bp, sp;
  29};
  30
  31 */
  32
  33union intel_x86_pebs_dse {
  34        u64 val;
  35        struct {
  36                unsigned int ld_dse:4;
  37                unsigned int ld_stlb_miss:1;
  38                unsigned int ld_locked:1;
  39                unsigned int ld_data_blk:1;
  40                unsigned int ld_addr_blk:1;
  41                unsigned int ld_reserved:24;
  42        };
  43        struct {
  44                unsigned int st_l1d_hit:1;
  45                unsigned int st_reserved1:3;
  46                unsigned int st_stlb_miss:1;
  47                unsigned int st_locked:1;
  48                unsigned int st_reserved2:26;
  49        };
  50        struct {
  51                unsigned int st_lat_dse:4;
  52                unsigned int st_lat_stlb_miss:1;
  53                unsigned int st_lat_locked:1;
  54                unsigned int ld_reserved3:26;
  55        };
  56};
  57
  58
  59/*
  60 * Map PEBS Load Latency Data Source encodings to generic
  61 * memory data source information
  62 */
  63#define P(a, b) PERF_MEM_S(a, b)
  64#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
  65#define LEVEL(x) P(LVLNUM, x)
  66#define REM P(REMOTE, REMOTE)
  67#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
  68
  69/* Version for Sandy Bridge and later */
  70static u64 pebs_data_source[] = {
  71        P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
  72        OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),  /* 0x01: L1 local */
  73        OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
  74        OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),  /* 0x03: L2 hit */
  75        OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, NONE),  /* 0x04: L3 hit */
  76        OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, MISS),  /* 0x05: L3 hit, snoop miss */
  77        OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HIT),   /* 0x06: L3 hit, snoop hit */
  78        OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),  /* 0x07: L3 hit, snoop hitm */
  79        OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
  80        OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
  81        OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | P(SNOOP, HIT),       /* 0x0a: L3 miss, shared */
  82        OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
  83        OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | SNOOP_NONE_MISS,     /* 0x0c: L3 miss, excl */
  84        OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */
  85        OP_LH | P(LVL, IO)  | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */
  86        OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */
  87};
  88
  89/* Patch up minor differences in the bits */
  90void __init intel_pmu_pebs_data_source_nhm(void)
  91{
  92        pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
  93        pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
  94        pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
  95}
  96
  97void __init intel_pmu_pebs_data_source_skl(bool pmem)
  98{
  99        u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);
 100
 101        pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
 102        pebs_data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
 103        pebs_data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
 104        pebs_data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
 105        pebs_data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
 106}
 107
 108static u64 precise_store_data(u64 status)
 109{
 110        union intel_x86_pebs_dse dse;
 111        u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
 112
 113        dse.val = status;
 114
 115        /*
 116         * bit 4: TLB access
 117         * 1 = stored missed 2nd level TLB
 118         *
 119         * so it either hit the walker or the OS
 120         * otherwise hit 2nd level TLB
 121         */
 122        if (dse.st_stlb_miss)
 123                val |= P(TLB, MISS);
 124        else
 125                val |= P(TLB, HIT);
 126
 127        /*
 128         * bit 0: hit L1 data cache
 129         * if not set, then all we know is that
 130         * it missed L1D
 131         */
 132        if (dse.st_l1d_hit)
 133                val |= P(LVL, HIT);
 134        else
 135                val |= P(LVL, MISS);
 136
 137        /*
 138         * bit 5: Locked prefix
 139         */
 140        if (dse.st_locked)
 141                val |= P(LOCK, LOCKED);
 142
 143        return val;
 144}
 145
 146static u64 precise_datala_hsw(struct perf_event *event, u64 status)
 147{
 148        union perf_mem_data_src dse;
 149
 150        dse.val = PERF_MEM_NA;
 151
 152        if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
 153                dse.mem_op = PERF_MEM_OP_STORE;
 154        else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
 155                dse.mem_op = PERF_MEM_OP_LOAD;
 156
 157        /*
 158         * L1 info only valid for following events:
 159         *
 160         * MEM_UOPS_RETIRED.STLB_MISS_STORES
 161         * MEM_UOPS_RETIRED.LOCK_STORES
 162         * MEM_UOPS_RETIRED.SPLIT_STORES
 163         * MEM_UOPS_RETIRED.ALL_STORES
 164         */
 165        if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
 166                if (status & 1)
 167                        dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
 168                else
 169                        dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
 170        }
 171        return dse.val;
 172}
 173
 174static u64 load_latency_data(u64 status)
 175{
 176        union intel_x86_pebs_dse dse;
 177        u64 val;
 178
 179        dse.val = status;
 180
 181        /*
 182         * use the mapping table for bit 0-3
 183         */
 184        val = pebs_data_source[dse.ld_dse];
 185
 186        /*
 187         * Nehalem models do not support TLB, Lock infos
 188         */
 189        if (x86_pmu.pebs_no_tlb) {
 190                val |= P(TLB, NA) | P(LOCK, NA);
 191                return val;
 192        }
 193        /*
 194         * bit 4: TLB access
 195         * 0 = did not miss 2nd level TLB
 196         * 1 = missed 2nd level TLB
 197         */
 198        if (dse.ld_stlb_miss)
 199                val |= P(TLB, MISS) | P(TLB, L2);
 200        else
 201                val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
 202
 203        /*
 204         * bit 5: locked prefix
 205         */
 206        if (dse.ld_locked)
 207                val |= P(LOCK, LOCKED);
 208
 209        /*
 210         * Ice Lake and earlier models do not support block infos.
 211         */
 212        if (!x86_pmu.pebs_block) {
 213                val |= P(BLK, NA);
 214                return val;
 215        }
 216        /*
 217         * bit 6: load was blocked since its data could not be forwarded
 218         *        from a preceding store
 219         */
 220        if (dse.ld_data_blk)
 221                val |= P(BLK, DATA);
 222
 223        /*
 224         * bit 7: load was blocked due to potential address conflict with
 225         *        a preceding store
 226         */
 227        if (dse.ld_addr_blk)
 228                val |= P(BLK, ADDR);
 229
 230        if (!dse.ld_data_blk && !dse.ld_addr_blk)
 231                val |= P(BLK, NA);
 232
 233        return val;
 234}
 235
 236static u64 store_latency_data(u64 status)
 237{
 238        union intel_x86_pebs_dse dse;
 239        u64 val;
 240
 241        dse.val = status;
 242
 243        /*
 244         * use the mapping table for bit 0-3
 245         */
 246        val = pebs_data_source[dse.st_lat_dse];
 247
 248        /*
 249         * bit 4: TLB access
 250         * 0 = did not miss 2nd level TLB
 251         * 1 = missed 2nd level TLB
 252         */
 253        if (dse.st_lat_stlb_miss)
 254                val |= P(TLB, MISS) | P(TLB, L2);
 255        else
 256                val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
 257
 258        /*
 259         * bit 5: locked prefix
 260         */
 261        if (dse.st_lat_locked)
 262                val |= P(LOCK, LOCKED);
 263
 264        val |= P(BLK, NA);
 265
 266        return val;
 267}
 268
 269struct pebs_record_core {
 270        u64 flags, ip;
 271        u64 ax, bx, cx, dx;
 272        u64 si, di, bp, sp;
 273        u64 r8,  r9,  r10, r11;
 274        u64 r12, r13, r14, r15;
 275};
 276
 277struct pebs_record_nhm {
 278        u64 flags, ip;
 279        u64 ax, bx, cx, dx;
 280        u64 si, di, bp, sp;
 281        u64 r8,  r9,  r10, r11;
 282        u64 r12, r13, r14, r15;
 283        u64 status, dla, dse, lat;
 284};
 285
 286/*
 287 * Same as pebs_record_nhm, with two additional fields.
 288 */
 289struct pebs_record_hsw {
 290        u64 flags, ip;
 291        u64 ax, bx, cx, dx;
 292        u64 si, di, bp, sp;
 293        u64 r8,  r9,  r10, r11;
 294        u64 r12, r13, r14, r15;
 295        u64 status, dla, dse, lat;
 296        u64 real_ip, tsx_tuning;
 297};
 298
 299union hsw_tsx_tuning {
 300        struct {
 301                u32 cycles_last_block     : 32,
 302                    hle_abort             : 1,
 303                    rtm_abort             : 1,
 304                    instruction_abort     : 1,
 305                    non_instruction_abort : 1,
 306                    retry                 : 1,
 307                    data_conflict         : 1,
 308                    capacity_writes       : 1,
 309                    capacity_reads        : 1;
 310        };
 311        u64         value;
 312};
 313
 314#define PEBS_HSW_TSX_FLAGS      0xff00000000ULL
 315
 316/* Same as HSW, plus TSC */
 317
 318struct pebs_record_skl {
 319        u64 flags, ip;
 320        u64 ax, bx, cx, dx;
 321        u64 si, di, bp, sp;
 322        u64 r8,  r9,  r10, r11;
 323        u64 r12, r13, r14, r15;
 324        u64 status, dla, dse, lat;
 325        u64 real_ip, tsx_tuning;
 326        u64 tsc;
 327};
 328
 329void init_debug_store_on_cpu(int cpu)
 330{
 331        struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 332
 333        if (!ds)
 334                return;
 335
 336        wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
 337                     (u32)((u64)(unsigned long)ds),
 338                     (u32)((u64)(unsigned long)ds >> 32));
 339}
 340
 341void fini_debug_store_on_cpu(int cpu)
 342{
 343        if (!per_cpu(cpu_hw_events, cpu).ds)
 344                return;
 345
 346        wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
 347}
 348
 349static DEFINE_PER_CPU(void *, insn_buffer);
 350
 351static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
 352{
 353        unsigned long start = (unsigned long)cea;
 354        phys_addr_t pa;
 355        size_t msz = 0;
 356
 357        pa = virt_to_phys(addr);
 358
 359        preempt_disable();
 360        for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
 361                cea_set_pte(cea, pa, prot);
 362
 363        /*
 364         * This is a cross-CPU update of the cpu_entry_area, we must shoot down
 365         * all TLB entries for it.
 366         */
 367        flush_tlb_kernel_range(start, start + size);
 368        preempt_enable();
 369}
 370
 371static void ds_clear_cea(void *cea, size_t size)
 372{
 373        unsigned long start = (unsigned long)cea;
 374        size_t msz = 0;
 375
 376        preempt_disable();
 377        for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
 378                cea_set_pte(cea, 0, PAGE_NONE);
 379
 380        flush_tlb_kernel_range(start, start + size);
 381        preempt_enable();
 382}
 383
 384static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
 385{
 386        unsigned int order = get_order(size);
 387        int node = cpu_to_node(cpu);
 388        struct page *page;
 389
 390        page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
 391        return page ? page_address(page) : NULL;
 392}
 393
 394static void dsfree_pages(const void *buffer, size_t size)
 395{
 396        if (buffer)
 397                free_pages((unsigned long)buffer, get_order(size));
 398}
 399
 400static int alloc_pebs_buffer(int cpu)
 401{
 402        struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
 403        struct debug_store *ds = hwev->ds;
 404        size_t bsiz = x86_pmu.pebs_buffer_size;
 405        int max, node = cpu_to_node(cpu);
 406        void *buffer, *insn_buff, *cea;
 407
 408        if (!x86_pmu.pebs)
 409                return 0;
 410
 411        buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
 412        if (unlikely(!buffer))
 413                return -ENOMEM;
 414
 415        /*
 416         * HSW+ already provides us the eventing ip; no need to allocate this
 417         * buffer then.
 418         */
 419        if (x86_pmu.intel_cap.pebs_format < 2) {
 420                insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
 421                if (!insn_buff) {
 422                        dsfree_pages(buffer, bsiz);
 423                        return -ENOMEM;
 424                }
 425                per_cpu(insn_buffer, cpu) = insn_buff;
 426        }
 427        hwev->ds_pebs_vaddr = buffer;
 428        /* Update the cpu entry area mapping */
 429        cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
 430        ds->pebs_buffer_base = (unsigned long) cea;
 431        ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
 432        ds->pebs_index = ds->pebs_buffer_base;
 433        max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
 434        ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
 435        return 0;
 436}
 437
 438static void release_pebs_buffer(int cpu)
 439{
 440        struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
 441        void *cea;
 442
 443        if (!x86_pmu.pebs)
 444                return;
 445
 446        kfree(per_cpu(insn_buffer, cpu));
 447        per_cpu(insn_buffer, cpu) = NULL;
 448
 449        /* Clear the fixmap */
 450        cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
 451        ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
 452        dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
 453        hwev->ds_pebs_vaddr = NULL;
 454}
 455
 456static int alloc_bts_buffer(int cpu)
 457{
 458        struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
 459        struct debug_store *ds = hwev->ds;
 460        void *buffer, *cea;
 461        int max;
 462
 463        if (!x86_pmu.bts)
 464                return 0;
 465
 466        buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
 467        if (unlikely(!buffer)) {
 468                WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
 469                return -ENOMEM;
 470        }
 471        hwev->ds_bts_vaddr = buffer;
 472        /* Update the fixmap */
 473        cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
 474        ds->bts_buffer_base = (unsigned long) cea;
 475        ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
 476        ds->bts_index = ds->bts_buffer_base;
 477        max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
 478        ds->bts_absolute_maximum = ds->bts_buffer_base +
 479                                        max * BTS_RECORD_SIZE;
 480        ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
 481                                        (max / 16) * BTS_RECORD_SIZE;
 482        return 0;
 483}
 484
 485static void release_bts_buffer(int cpu)
 486{
 487        struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
 488        void *cea;
 489
 490        if (!x86_pmu.bts)
 491                return;
 492
 493        /* Clear the fixmap */
 494        cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
 495        ds_clear_cea(cea, BTS_BUFFER_SIZE);
 496        dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
 497        hwev->ds_bts_vaddr = NULL;
 498}
 499
 500static int alloc_ds_buffer(int cpu)
 501{
 502        struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
 503
 504        memset(ds, 0, sizeof(*ds));
 505        per_cpu(cpu_hw_events, cpu).ds = ds;
 506        return 0;
 507}
 508
 509static void release_ds_buffer(int cpu)
 510{
 511        per_cpu(cpu_hw_events, cpu).ds = NULL;
 512}
 513
 514void release_ds_buffers(void)
 515{
 516        int cpu;
 517
 518        if (!x86_pmu.bts && !x86_pmu.pebs)
 519                return;
 520
 521        for_each_possible_cpu(cpu)
 522                release_ds_buffer(cpu);
 523
 524        for_each_possible_cpu(cpu) {
 525                /*
 526                 * Again, ignore errors from offline CPUs, they will no longer
 527                 * observe cpu_hw_events.ds and not program the DS_AREA when
 528                 * they come up.
 529                 */
 530                fini_debug_store_on_cpu(cpu);
 531        }
 532
 533        for_each_possible_cpu(cpu) {
 534                release_pebs_buffer(cpu);
 535                release_bts_buffer(cpu);
 536        }
 537}
 538
 539void reserve_ds_buffers(void)
 540{
 541        int bts_err = 0, pebs_err = 0;
 542        int cpu;
 543
 544        x86_pmu.bts_active = 0;
 545        x86_pmu.pebs_active = 0;
 546
 547        if (!x86_pmu.bts && !x86_pmu.pebs)
 548                return;
 549
 550        if (!x86_pmu.bts)
 551                bts_err = 1;
 552
 553        if (!x86_pmu.pebs)
 554                pebs_err = 1;
 555
 556        for_each_possible_cpu(cpu) {
 557                if (alloc_ds_buffer(cpu)) {
 558                        bts_err = 1;
 559                        pebs_err = 1;
 560                }
 561
 562                if (!bts_err && alloc_bts_buffer(cpu))
 563                        bts_err = 1;
 564
 565                if (!pebs_err && alloc_pebs_buffer(cpu))
 566                        pebs_err = 1;
 567
 568                if (bts_err && pebs_err)
 569                        break;
 570        }
 571
 572        if (bts_err) {
 573                for_each_possible_cpu(cpu)
 574                        release_bts_buffer(cpu);
 575        }
 576
 577        if (pebs_err) {
 578                for_each_possible_cpu(cpu)
 579                        release_pebs_buffer(cpu);
 580        }
 581
 582        if (bts_err && pebs_err) {
 583                for_each_possible_cpu(cpu)
 584                        release_ds_buffer(cpu);
 585        } else {
 586                if (x86_pmu.bts && !bts_err)
 587                        x86_pmu.bts_active = 1;
 588
 589                if (x86_pmu.pebs && !pebs_err)
 590                        x86_pmu.pebs_active = 1;
 591
 592                for_each_possible_cpu(cpu) {
 593                        /*
 594                         * Ignores wrmsr_on_cpu() errors for offline CPUs they
 595                         * will get this call through intel_pmu_cpu_starting().
 596                         */
 597                        init_debug_store_on_cpu(cpu);
 598                }
 599        }
 600}
 601
 602/*
 603 * BTS
 604 */
 605
 606struct event_constraint bts_constraint =
 607        EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
 608
 609void intel_pmu_enable_bts(u64 config)
 610{
 611        unsigned long debugctlmsr;
 612
 613        debugctlmsr = get_debugctlmsr();
 614
 615        debugctlmsr |= DEBUGCTLMSR_TR;
 616        debugctlmsr |= DEBUGCTLMSR_BTS;
 617        if (config & ARCH_PERFMON_EVENTSEL_INT)
 618                debugctlmsr |= DEBUGCTLMSR_BTINT;
 619
 620        if (!(config & ARCH_PERFMON_EVENTSEL_OS))
 621                debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
 622
 623        if (!(config & ARCH_PERFMON_EVENTSEL_USR))
 624                debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
 625
 626        update_debugctlmsr(debugctlmsr);
 627}
 628
 629void intel_pmu_disable_bts(void)
 630{
 631        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 632        unsigned long debugctlmsr;
 633
 634        if (!cpuc->ds)
 635                return;
 636
 637        debugctlmsr = get_debugctlmsr();
 638
 639        debugctlmsr &=
 640                ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
 641                  DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
 642
 643        update_debugctlmsr(debugctlmsr);
 644}
 645
 646int intel_pmu_drain_bts_buffer(void)
 647{
 648        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 649        struct debug_store *ds = cpuc->ds;
 650        struct bts_record {
 651                u64     from;
 652                u64     to;
 653                u64     flags;
 654        };
 655        struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
 656        struct bts_record *at, *base, *top;
 657        struct perf_output_handle handle;
 658        struct perf_event_header header;
 659        struct perf_sample_data data;
 660        unsigned long skip = 0;
 661        struct pt_regs regs;
 662
 663        if (!event)
 664                return 0;
 665
 666        if (!x86_pmu.bts_active)
 667                return 0;
 668
 669        base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
 670        top  = (struct bts_record *)(unsigned long)ds->bts_index;
 671
 672        if (top <= base)
 673                return 0;
 674
 675        memset(&regs, 0, sizeof(regs));
 676
 677        ds->bts_index = ds->bts_buffer_base;
 678
 679        perf_sample_data_init(&data, 0, event->hw.last_period);
 680
 681        /*
 682         * BTS leaks kernel addresses in branches across the cpl boundary,
 683         * such as traps or system calls, so unless the user is asking for
 684         * kernel tracing (and right now it's not possible), we'd need to
 685         * filter them out. But first we need to count how many of those we
 686         * have in the current batch. This is an extra O(n) pass, however,
 687         * it's much faster than the other one especially considering that
 688         * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
 689         * alloc_bts_buffer()).
 690         */
 691        for (at = base; at < top; at++) {
 692                /*
 693                 * Note that right now *this* BTS code only works if
 694                 * attr::exclude_kernel is set, but let's keep this extra
 695                 * check here in case that changes.
 696                 */
 697                if (event->attr.exclude_kernel &&
 698                    (kernel_ip(at->from) || kernel_ip(at->to)))
 699                        skip++;
 700        }
 701
 702        /*
 703         * Prepare a generic sample, i.e. fill in the invariant fields.
 704         * We will overwrite the from and to address before we output
 705         * the sample.
 706         */
 707        rcu_read_lock();
 708        perf_prepare_sample(&header, &data, event, &regs);
 709
 710        if (perf_output_begin(&handle, &data, event,
 711                              header.size * (top - base - skip)))
 712                goto unlock;
 713
 714        for (at = base; at < top; at++) {
 715                /* Filter out any records that contain kernel addresses. */
 716                if (event->attr.exclude_kernel &&
 717                    (kernel_ip(at->from) || kernel_ip(at->to)))
 718                        continue;
 719
 720                data.ip         = at->from;
 721                data.addr       = at->to;
 722
 723                perf_output_sample(&handle, &header, &data, event);
 724        }
 725
 726        perf_output_end(&handle);
 727
 728        /* There's new data available. */
 729        event->hw.interrupts++;
 730        event->pending_kill = POLL_IN;
 731unlock:
 732        rcu_read_unlock();
 733        return 1;
 734}
 735
 736static inline void intel_pmu_drain_pebs_buffer(void)
 737{
 738        struct perf_sample_data data;
 739
 740        x86_pmu.drain_pebs(NULL, &data);
 741}
 742
 743/*
 744 * PEBS
 745 */
 746struct event_constraint intel_core2_pebs_event_constraints[] = {
 747        INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
 748        INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
 749        INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
 750        INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
 751        INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
 752        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
 753        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
 754        EVENT_CONSTRAINT_END
 755};
 756
 757struct event_constraint intel_atom_pebs_event_constraints[] = {
 758        INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
 759        INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
 760        INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
 761        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
 762        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
 763        /* Allow all events as PEBS with no flags */
 764        INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
 765        EVENT_CONSTRAINT_END
 766};
 767
 768struct event_constraint intel_slm_pebs_event_constraints[] = {
 769        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
 770        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x1),
 771        /* Allow all events as PEBS with no flags */
 772        INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
 773        EVENT_CONSTRAINT_END
 774};
 775
 776struct event_constraint intel_glm_pebs_event_constraints[] = {
 777        /* Allow all events as PEBS with no flags */
 778        INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
 779        EVENT_CONSTRAINT_END
 780};
 781
 782struct event_constraint intel_grt_pebs_event_constraints[] = {
 783        /* Allow all events as PEBS with no flags */
 784        INTEL_PLD_CONSTRAINT(0x5d0, 0xf),
 785        INTEL_PSD_CONSTRAINT(0x6d0, 0xf),
 786        EVENT_CONSTRAINT_END
 787};
 788
 789struct event_constraint intel_nehalem_pebs_event_constraints[] = {
 790        INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
 791        INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
 792        INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
 793        INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INST_RETIRED.ANY */
 794        INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
 795        INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
 796        INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
 797        INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
 798        INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
 799        INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
 800        INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
 801        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
 802        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
 803        EVENT_CONSTRAINT_END
 804};
 805
 806struct event_constraint intel_westmere_pebs_event_constraints[] = {
 807        INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
 808        INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
 809        INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
 810        INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INSTR_RETIRED.* */
 811        INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
 812        INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
 813        INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
 814        INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
 815        INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
 816        INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
 817        INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
 818        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
 819        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
 820        EVENT_CONSTRAINT_END
 821};
 822
 823struct event_constraint intel_snb_pebs_event_constraints[] = {
 824        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
 825        INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
 826        INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
 827        /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 828        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
 829        INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
 830        INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
 831        INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
 832        INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
 833        /* Allow all events as PEBS with no flags */
 834        INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
 835        EVENT_CONSTRAINT_END
 836};
 837
 838struct event_constraint intel_ivb_pebs_event_constraints[] = {
 839        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
 840        INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
 841        INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
 842        /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 843        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
 844        /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
 845        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
 846        INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
 847        INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
 848        INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
 849        INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
 850        /* Allow all events as PEBS with no flags */
 851        INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
 852        EVENT_CONSTRAINT_END
 853};
 854
 855struct event_constraint intel_hsw_pebs_event_constraints[] = {
 856        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
 857        INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
 858        /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 859        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
 860        /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
 861        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
 862        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
 863        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
 864        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
 865        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
 866        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
 867        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
 868        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
 869        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
 870        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
 871        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
 872        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
 873        /* Allow all events as PEBS with no flags */
 874        INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
 875        EVENT_CONSTRAINT_END
 876};
 877
 878struct event_constraint intel_bdw_pebs_event_constraints[] = {
 879        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
 880        INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
 881        /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 882        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
 883        /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
 884        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
 885        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
 886        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
 887        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
 888        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
 889        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
 890        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
 891        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
 892        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
 893        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
 894        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
 895        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
 896        /* Allow all events as PEBS with no flags */
 897        INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
 898        EVENT_CONSTRAINT_END
 899};
 900
 901
 902struct event_constraint intel_skl_pebs_event_constraints[] = {
 903        INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2),      /* INST_RETIRED.PREC_DIST */
 904        /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
 905        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
 906        /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
 907        INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
 908        INTEL_PLD_CONSTRAINT(0x1cd, 0xf),                     /* MEM_TRANS_RETIRED.* */
 909        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
 910        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
 911        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
 912        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
 913        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
 914        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
 915        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
 916        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
 917        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_RETIRED.* */
 918        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_L3_HIT_RETIRED.* */
 919        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_L3_MISS_RETIRED.* */
 920        /* Allow all events as PEBS with no flags */
 921        INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
 922        EVENT_CONSTRAINT_END
 923};
 924
 925struct event_constraint intel_icl_pebs_event_constraints[] = {
 926        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x100000000ULL),  /* old INST_RETIRED.PREC_DIST */
 927        INTEL_FLAGS_UEVENT_CONSTRAINT(0x0100, 0x100000000ULL),  /* INST_RETIRED.PREC_DIST */
 928        INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),  /* SLOTS */
 929
 930        INTEL_PLD_CONSTRAINT(0x1cd, 0xff),                      /* MEM_TRANS_RETIRED.LOAD_LATENCY */
 931        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),    /* MEM_INST_RETIRED.LOAD */
 932        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),    /* MEM_INST_RETIRED.STORE */
 933
 934        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */
 935
 936        INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),                /* MEM_INST_RETIRED.* */
 937
 938        /*
 939         * Everything else is handled by PMU_FL_PEBS_ALL, because we
 940         * need the full constraints from the main table.
 941         */
 942
 943        EVENT_CONSTRAINT_END
 944};
 945
 946struct event_constraint intel_spr_pebs_event_constraints[] = {
 947        INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL),   /* INST_RETIRED.PREC_DIST */
 948        INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
 949
 950        INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),
 951        INTEL_PLD_CONSTRAINT(0x1cd, 0xfe),
 952        INTEL_PSD_CONSTRAINT(0x2cd, 0x1),
 953        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),
 954        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),
 955
 956        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
 957
 958        INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
 959
 960        /*
 961         * Everything else is handled by PMU_FL_PEBS_ALL, because we
 962         * need the full constraints from the main table.
 963         */
 964
 965        EVENT_CONSTRAINT_END
 966};
 967
 968struct event_constraint *intel_pebs_constraints(struct perf_event *event)
 969{
 970        struct event_constraint *pebs_constraints = hybrid(event->pmu, pebs_constraints);
 971        struct event_constraint *c;
 972
 973        if (!event->attr.precise_ip)
 974                return NULL;
 975
 976        if (pebs_constraints) {
 977                for_each_event_constraint(c, pebs_constraints) {
 978                        if (constraint_match(c, event->hw.config)) {
 979                                event->hw.flags |= c->flags;
 980                                return c;
 981                        }
 982                }
 983        }
 984
 985        /*
 986         * Extended PEBS support
 987         * Makes the PEBS code search the normal constraints.
 988         */
 989        if (x86_pmu.flags & PMU_FL_PEBS_ALL)
 990                return NULL;
 991
 992        return &emptyconstraint;
 993}
 994
 995/*
 996 * We need the sched_task callback even for per-cpu events when we use
 997 * the large interrupt threshold, such that we can provide PID and TID
 998 * to PEBS samples.
 999 */
1000static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
1001{
1002        if (cpuc->n_pebs == cpuc->n_pebs_via_pt)
1003                return false;
1004
1005        return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
1006}
1007
1008void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
1009{
1010        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1011
1012        if (!sched_in && pebs_needs_sched_cb(cpuc))
1013                intel_pmu_drain_pebs_buffer();
1014}
1015
1016static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
1017{
1018        struct debug_store *ds = cpuc->ds;
1019        int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events);
1020        int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
1021        u64 threshold;
1022        int reserved;
1023
1024        if (cpuc->n_pebs_via_pt)
1025                return;
1026
1027        if (x86_pmu.flags & PMU_FL_PEBS_ALL)
1028                reserved = max_pebs_events + num_counters_fixed;
1029        else
1030                reserved = max_pebs_events;
1031
1032        if (cpuc->n_pebs == cpuc->n_large_pebs) {
1033                threshold = ds->pebs_absolute_maximum -
1034                        reserved * cpuc->pebs_record_size;
1035        } else {
1036                threshold = ds->pebs_buffer_base + cpuc->pebs_record_size;
1037        }
1038
1039        ds->pebs_interrupt_threshold = threshold;
1040}
1041
1042static void adaptive_pebs_record_size_update(void)
1043{
1044        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1045        u64 pebs_data_cfg = cpuc->pebs_data_cfg;
1046        int sz = sizeof(struct pebs_basic);
1047
1048        if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
1049                sz += sizeof(struct pebs_meminfo);
1050        if (pebs_data_cfg & PEBS_DATACFG_GP)
1051                sz += sizeof(struct pebs_gprs);
1052        if (pebs_data_cfg & PEBS_DATACFG_XMMS)
1053                sz += sizeof(struct pebs_xmm);
1054        if (pebs_data_cfg & PEBS_DATACFG_LBRS)
1055                sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry);
1056
1057        cpuc->pebs_record_size = sz;
1058}
1059
1060#define PERF_PEBS_MEMINFO_TYPE  (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC |   \
1061                                PERF_SAMPLE_PHYS_ADDR |                      \
1062                                PERF_SAMPLE_WEIGHT_TYPE |                    \
1063                                PERF_SAMPLE_TRANSACTION |                    \
1064                                PERF_SAMPLE_DATA_PAGE_SIZE)
1065
1066static u64 pebs_update_adaptive_cfg(struct perf_event *event)
1067{
1068        struct perf_event_attr *attr = &event->attr;
1069        u64 sample_type = attr->sample_type;
1070        u64 pebs_data_cfg = 0;
1071        bool gprs, tsx_weight;
1072
1073        if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) &&
1074            attr->precise_ip > 1)
1075                return pebs_data_cfg;
1076
1077        if (sample_type & PERF_PEBS_MEMINFO_TYPE)
1078                pebs_data_cfg |= PEBS_DATACFG_MEMINFO;
1079
1080        /*
1081         * We need GPRs when:
1082         * + user requested them
1083         * + precise_ip < 2 for the non event IP
1084         * + For RTM TSX weight we need GPRs for the abort code.
1085         */
1086        gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
1087               (attr->sample_regs_intr & PEBS_GP_REGS);
1088
1089        tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
1090                     ((attr->config & INTEL_ARCH_EVENT_MASK) ==
1091                      x86_pmu.rtm_abort_event);
1092
1093        if (gprs || (attr->precise_ip < 2) || tsx_weight)
1094                pebs_data_cfg |= PEBS_DATACFG_GP;
1095
1096        if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
1097            (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK))
1098                pebs_data_cfg |= PEBS_DATACFG_XMMS;
1099
1100        if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
1101                /*
1102                 * For now always log all LBRs. Could configure this
1103                 * later.
1104                 */
1105                pebs_data_cfg |= PEBS_DATACFG_LBRS |
1106                        ((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
1107        }
1108
1109        return pebs_data_cfg;
1110}
1111
1112static void
1113pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
1114                  struct perf_event *event, bool add)
1115{
1116        struct pmu *pmu = event->ctx->pmu;
1117        /*
1118         * Make sure we get updated with the first PEBS
1119         * event. It will trigger also during removal, but
1120         * that does not hurt:
1121         */
1122        bool update = cpuc->n_pebs == 1;
1123
1124        if (needed_cb != pebs_needs_sched_cb(cpuc)) {
1125                if (!needed_cb)
1126                        perf_sched_cb_inc(pmu);
1127                else
1128                        perf_sched_cb_dec(pmu);
1129
1130                update = true;
1131        }
1132
1133        /*
1134         * The PEBS record doesn't shrink on pmu::del(). Doing so would require
1135         * iterating all remaining PEBS events to reconstruct the config.
1136         */
1137        if (x86_pmu.intel_cap.pebs_baseline && add) {
1138                u64 pebs_data_cfg;
1139
1140                /* Clear pebs_data_cfg and pebs_record_size for first PEBS. */
1141                if (cpuc->n_pebs == 1) {
1142                        cpuc->pebs_data_cfg = 0;
1143                        cpuc->pebs_record_size = sizeof(struct pebs_basic);
1144                }
1145
1146                pebs_data_cfg = pebs_update_adaptive_cfg(event);
1147
1148                /* Update pebs_record_size if new event requires more data. */
1149                if (pebs_data_cfg & ~cpuc->pebs_data_cfg) {
1150                        cpuc->pebs_data_cfg |= pebs_data_cfg;
1151                        adaptive_pebs_record_size_update();
1152                        update = true;
1153                }
1154        }
1155
1156        if (update)
1157                pebs_update_threshold(cpuc);
1158}
1159
1160void intel_pmu_pebs_add(struct perf_event *event)
1161{
1162        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1163        struct hw_perf_event *hwc = &event->hw;
1164        bool needed_cb = pebs_needs_sched_cb(cpuc);
1165
1166        cpuc->n_pebs++;
1167        if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
1168                cpuc->n_large_pebs++;
1169        if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
1170                cpuc->n_pebs_via_pt++;
1171
1172        pebs_update_state(needed_cb, cpuc, event, true);
1173}
1174
1175static void intel_pmu_pebs_via_pt_disable(struct perf_event *event)
1176{
1177        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1178
1179        if (!is_pebs_pt(event))
1180                return;
1181
1182        if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK))
1183                cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK;
1184}
1185
1186static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
1187{
1188        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1189        struct hw_perf_event *hwc = &event->hw;
1190        struct debug_store *ds = cpuc->ds;
1191        u64 value = ds->pebs_event_reset[hwc->idx];
1192        u32 base = MSR_RELOAD_PMC0;
1193        unsigned int idx = hwc->idx;
1194
1195        if (!is_pebs_pt(event))
1196                return;
1197
1198        if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
1199                cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD;
1200
1201        cpuc->pebs_enabled |= PEBS_OUTPUT_PT;
1202
1203        if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
1204                base = MSR_RELOAD_FIXED_CTR0;
1205                idx = hwc->idx - INTEL_PMC_IDX_FIXED;
1206                value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx];
1207        }
1208        wrmsrl(base + idx, value);
1209}
1210
1211void intel_pmu_pebs_enable(struct perf_event *event)
1212{
1213        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1214        struct hw_perf_event *hwc = &event->hw;
1215        struct debug_store *ds = cpuc->ds;
1216        unsigned int idx = hwc->idx;
1217
1218        hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
1219
1220        cpuc->pebs_enabled |= 1ULL << hwc->idx;
1221
1222        if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
1223                cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
1224        else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
1225                cpuc->pebs_enabled |= 1ULL << 63;
1226
1227        if (x86_pmu.intel_cap.pebs_baseline) {
1228                hwc->config |= ICL_EVENTSEL_ADAPTIVE;
1229                if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) {
1230                        wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg);
1231                        cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg;
1232                }
1233        }
1234
1235        if (idx >= INTEL_PMC_IDX_FIXED)
1236                idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
1237
1238        /*
1239         * Use auto-reload if possible to save a MSR write in the PMI.
1240         * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
1241         */
1242        if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
1243                ds->pebs_event_reset[idx] =
1244                        (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
1245        } else {
1246                ds->pebs_event_reset[idx] = 0;
1247        }
1248
1249        intel_pmu_pebs_via_pt_enable(event);
1250}
1251
1252void intel_pmu_pebs_del(struct perf_event *event)
1253{
1254        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1255        struct hw_perf_event *hwc = &event->hw;
1256        bool needed_cb = pebs_needs_sched_cb(cpuc);
1257
1258        cpuc->n_pebs--;
1259        if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
1260                cpuc->n_large_pebs--;
1261        if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
1262                cpuc->n_pebs_via_pt--;
1263
1264        pebs_update_state(needed_cb, cpuc, event, false);
1265}
1266
1267void intel_pmu_pebs_disable(struct perf_event *event)
1268{
1269        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1270        struct hw_perf_event *hwc = &event->hw;
1271
1272        if (cpuc->n_pebs == cpuc->n_large_pebs &&
1273            cpuc->n_pebs != cpuc->n_pebs_via_pt)
1274                intel_pmu_drain_pebs_buffer();
1275
1276        cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
1277
1278        if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
1279            (x86_pmu.version < 5))
1280                cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
1281        else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
1282                cpuc->pebs_enabled &= ~(1ULL << 63);
1283
1284        intel_pmu_pebs_via_pt_disable(event);
1285
1286        if (cpuc->enabled)
1287                wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
1288
1289        hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
1290}
1291
1292void intel_pmu_pebs_enable_all(void)
1293{
1294        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1295
1296        if (cpuc->pebs_enabled)
1297                wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
1298}
1299
1300void intel_pmu_pebs_disable_all(void)
1301{
1302        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1303
1304        if (cpuc->pebs_enabled)
1305                __intel_pmu_pebs_disable_all();
1306}
1307
1308static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
1309{
1310        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1311        unsigned long from = cpuc->lbr_entries[0].from;
1312        unsigned long old_to, to = cpuc->lbr_entries[0].to;
1313        unsigned long ip = regs->ip;
1314        int is_64bit = 0;
1315        void *kaddr;
1316        int size;
1317
1318        /*
1319         * We don't need to fixup if the PEBS assist is fault like
1320         */
1321        if (!x86_pmu.intel_cap.pebs_trap)
1322                return 1;
1323
1324        /*
1325         * No LBR entry, no basic block, no rewinding
1326         */
1327        if (!cpuc->lbr_stack.nr || !from || !to)
1328                return 0;
1329
1330        /*
1331         * Basic blocks should never cross user/kernel boundaries
1332         */
1333        if (kernel_ip(ip) != kernel_ip(to))
1334                return 0;
1335
1336        /*
1337         * unsigned math, either ip is before the start (impossible) or
1338         * the basic block is larger than 1 page (sanity)
1339         */
1340        if ((ip - to) > PEBS_FIXUP_SIZE)
1341                return 0;
1342
1343        /*
1344         * We sampled a branch insn, rewind using the LBR stack
1345         */
1346        if (ip == to) {
1347                set_linear_ip(regs, from);
1348                return 1;
1349        }
1350
1351        size = ip - to;
1352        if (!kernel_ip(ip)) {
1353                int bytes;
1354                u8 *buf = this_cpu_read(insn_buffer);
1355
1356                /* 'size' must fit our buffer, see above */
1357                bytes = copy_from_user_nmi(buf, (void __user *)to, size);
1358                if (bytes != 0)
1359                        return 0;
1360
1361                kaddr = buf;
1362        } else {
1363                kaddr = (void *)to;
1364        }
1365
1366        do {
1367                struct insn insn;
1368
1369                old_to = to;
1370
1371#ifdef CONFIG_X86_64
1372                is_64bit = kernel_ip(to) || any_64bit_mode(regs);
1373#endif
1374                insn_init(&insn, kaddr, size, is_64bit);
1375
1376                /*
1377                 * Make sure there was not a problem decoding the instruction.
1378                 * This is doubly important because we have an infinite loop if
1379                 * insn.length=0.
1380                 */
1381                if (insn_get_length(&insn))
1382                        break;
1383
1384                to += insn.length;
1385                kaddr += insn.length;
1386                size -= insn.length;
1387        } while (to < ip);
1388
1389        if (to == ip) {
1390                set_linear_ip(regs, old_to);
1391                return 1;
1392        }
1393
1394        /*
1395         * Even though we decoded the basic block, the instruction stream
1396         * never matched the given IP, either the TO or the IP got corrupted.
1397         */
1398        return 0;
1399}
1400
1401static inline u64 intel_get_tsx_weight(u64 tsx_tuning)
1402{
1403        if (tsx_tuning) {
1404                union hsw_tsx_tuning tsx = { .value = tsx_tuning };
1405                return tsx.cycles_last_block;
1406        }
1407        return 0;
1408}
1409
1410static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax)
1411{
1412        u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
1413
1414        /* For RTM XABORTs also log the abort code from AX */
1415        if ((txn & PERF_TXN_TRANSACTION) && (ax & 1))
1416                txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
1417        return txn;
1418}
1419
1420static inline u64 get_pebs_status(void *n)
1421{
1422        if (x86_pmu.intel_cap.pebs_format < 4)
1423                return ((struct pebs_record_nhm *)n)->status;
1424        return ((struct pebs_basic *)n)->applicable_counters;
1425}
1426
1427#define PERF_X86_EVENT_PEBS_HSW_PREC \
1428                (PERF_X86_EVENT_PEBS_ST_HSW | \
1429                 PERF_X86_EVENT_PEBS_LD_HSW | \
1430                 PERF_X86_EVENT_PEBS_NA_HSW)
1431
1432static u64 get_data_src(struct perf_event *event, u64 aux)
1433{
1434        u64 val = PERF_MEM_NA;
1435        int fl = event->hw.flags;
1436        bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
1437
1438        if (fl & PERF_X86_EVENT_PEBS_LDLAT)
1439                val = load_latency_data(aux);
1440        else if (fl & PERF_X86_EVENT_PEBS_STLAT)
1441                val = store_latency_data(aux);
1442        else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
1443                val = precise_datala_hsw(event, aux);
1444        else if (fst)
1445                val = precise_store_data(aux);
1446        return val;
1447}
1448
1449#define PERF_SAMPLE_ADDR_TYPE   (PERF_SAMPLE_ADDR |             \
1450                                 PERF_SAMPLE_PHYS_ADDR |        \
1451                                 PERF_SAMPLE_DATA_PAGE_SIZE)
1452
1453static void setup_pebs_fixed_sample_data(struct perf_event *event,
1454                                   struct pt_regs *iregs, void *__pebs,
1455                                   struct perf_sample_data *data,
1456                                   struct pt_regs *regs)
1457{
1458        /*
1459         * We cast to the biggest pebs_record but are careful not to
1460         * unconditionally access the 'extra' entries.
1461         */
1462        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1463        struct pebs_record_skl *pebs = __pebs;
1464        u64 sample_type;
1465        int fll;
1466
1467        if (pebs == NULL)
1468                return;
1469
1470        sample_type = event->attr.sample_type;
1471        fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
1472
1473        perf_sample_data_init(data, 0, event->hw.last_period);
1474
1475        data->period = event->hw.last_period;
1476
1477        /*
1478         * Use latency for weight (only avail with PEBS-LL)
1479         */
1480        if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE))
1481                data->weight.full = pebs->lat;
1482
1483        /*
1484         * data.data_src encodes the data source
1485         */
1486        if (sample_type & PERF_SAMPLE_DATA_SRC)
1487                data->data_src.val = get_data_src(event, pebs->dse);
1488
1489        /*
1490         * We must however always use iregs for the unwinder to stay sane; the
1491         * record BP,SP,IP can point into thin air when the record is from a
1492         * previous PMI context or an (I)RET happened between the record and
1493         * PMI.
1494         */
1495        if (sample_type & PERF_SAMPLE_CALLCHAIN)
1496                data->callchain = perf_callchain(event, iregs);
1497
1498        /*
1499         * We use the interrupt regs as a base because the PEBS record does not
1500         * contain a full regs set, specifically it seems to lack segment
1501         * descriptors, which get used by things like user_mode().
1502         *
1503         * In the simple case fix up only the IP for PERF_SAMPLE_IP.
1504         */
1505        *regs = *iregs;
1506
1507        /*
1508         * Initialize regs_>flags from PEBS,
1509         * Clear exact bit (which uses x86 EFLAGS Reserved bit 3),
1510         * i.e., do not rely on it being zero:
1511         */
1512        regs->flags = pebs->flags & ~PERF_EFLAGS_EXACT;
1513
1514        if (sample_type & PERF_SAMPLE_REGS_INTR) {
1515                regs->ax = pebs->ax;
1516                regs->bx = pebs->bx;
1517                regs->cx = pebs->cx;
1518                regs->dx = pebs->dx;
1519                regs->si = pebs->si;
1520                regs->di = pebs->di;
1521
1522                regs->bp = pebs->bp;
1523                regs->sp = pebs->sp;
1524
1525#ifndef CONFIG_X86_32
1526                regs->r8 = pebs->r8;
1527                regs->r9 = pebs->r9;
1528                regs->r10 = pebs->r10;
1529                regs->r11 = pebs->r11;
1530                regs->r12 = pebs->r12;
1531                regs->r13 = pebs->r13;
1532                regs->r14 = pebs->r14;
1533                regs->r15 = pebs->r15;
1534#endif
1535        }
1536
1537        if (event->attr.precise_ip > 1) {
1538                /*
1539                 * Haswell and later processors have an 'eventing IP'
1540                 * (real IP) which fixes the off-by-1 skid in hardware.
1541                 * Use it when precise_ip >= 2 :
1542                 */
1543                if (x86_pmu.intel_cap.pebs_format >= 2) {
1544                        set_linear_ip(regs, pebs->real_ip);
1545                        regs->flags |= PERF_EFLAGS_EXACT;
1546                } else {
1547                        /* Otherwise, use PEBS off-by-1 IP: */
1548                        set_linear_ip(regs, pebs->ip);
1549
1550                        /*
1551                         * With precise_ip >= 2, try to fix up the off-by-1 IP
1552                         * using the LBR. If successful, the fixup function
1553                         * corrects regs->ip and calls set_linear_ip() on regs:
1554                         */
1555                        if (intel_pmu_pebs_fixup_ip(regs))
1556                                regs->flags |= PERF_EFLAGS_EXACT;
1557                }
1558        } else {
1559                /*
1560                 * When precise_ip == 1, return the PEBS off-by-1 IP,
1561                 * no fixup attempted:
1562                 */
1563                set_linear_ip(regs, pebs->ip);
1564        }
1565
1566
1567        if ((sample_type & PERF_SAMPLE_ADDR_TYPE) &&
1568            x86_pmu.intel_cap.pebs_format >= 1)
1569                data->addr = pebs->dla;
1570
1571        if (x86_pmu.intel_cap.pebs_format >= 2) {
1572                /* Only set the TSX weight when no memory weight. */
1573                if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll)
1574                        data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
1575
1576                if (sample_type & PERF_SAMPLE_TRANSACTION)
1577                        data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
1578                                                              pebs->ax);
1579        }
1580
1581        /*
1582         * v3 supplies an accurate time stamp, so we use that
1583         * for the time stamp.
1584         *
1585         * We can only do this for the default trace clock.
1586         */
1587        if (x86_pmu.intel_cap.pebs_format >= 3 &&
1588                event->attr.use_clockid == 0)
1589                data->time = native_sched_clock_from_tsc(pebs->tsc);
1590
1591        if (has_branch_stack(event))
1592                data->br_stack = &cpuc->lbr_stack;
1593}
1594
1595static void adaptive_pebs_save_regs(struct pt_regs *regs,
1596                                    struct pebs_gprs *gprs)
1597{
1598        regs->ax = gprs->ax;
1599        regs->bx = gprs->bx;
1600        regs->cx = gprs->cx;
1601        regs->dx = gprs->dx;
1602        regs->si = gprs->si;
1603        regs->di = gprs->di;
1604        regs->bp = gprs->bp;
1605        regs->sp = gprs->sp;
1606#ifndef CONFIG_X86_32
1607        regs->r8 = gprs->r8;
1608        regs->r9 = gprs->r9;
1609        regs->r10 = gprs->r10;
1610        regs->r11 = gprs->r11;
1611        regs->r12 = gprs->r12;
1612        regs->r13 = gprs->r13;
1613        regs->r14 = gprs->r14;
1614        regs->r15 = gprs->r15;
1615#endif
1616}
1617
1618#define PEBS_LATENCY_MASK                       0xffff
1619#define PEBS_CACHE_LATENCY_OFFSET               32
1620
1621/*
1622 * With adaptive PEBS the layout depends on what fields are configured.
1623 */
1624
1625static void setup_pebs_adaptive_sample_data(struct perf_event *event,
1626                                            struct pt_regs *iregs, void *__pebs,
1627                                            struct perf_sample_data *data,
1628                                            struct pt_regs *regs)
1629{
1630        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1631        struct pebs_basic *basic = __pebs;
1632        void *next_record = basic + 1;
1633        u64 sample_type;
1634        u64 format_size;
1635        struct pebs_meminfo *meminfo = NULL;
1636        struct pebs_gprs *gprs = NULL;
1637        struct x86_perf_regs *perf_regs;
1638
1639        if (basic == NULL)
1640                return;
1641
1642        perf_regs = container_of(regs, struct x86_perf_regs, regs);
1643        perf_regs->xmm_regs = NULL;
1644
1645        sample_type = event->attr.sample_type;
1646        format_size = basic->format_size;
1647        perf_sample_data_init(data, 0, event->hw.last_period);
1648        data->period = event->hw.last_period;
1649
1650        if (event->attr.use_clockid == 0)
1651                data->time = native_sched_clock_from_tsc(basic->tsc);
1652
1653        /*
1654         * We must however always use iregs for the unwinder to stay sane; the
1655         * record BP,SP,IP can point into thin air when the record is from a
1656         * previous PMI context or an (I)RET happened between the record and
1657         * PMI.
1658         */
1659        if (sample_type & PERF_SAMPLE_CALLCHAIN)
1660                data->callchain = perf_callchain(event, iregs);
1661
1662        *regs = *iregs;
1663        /* The ip in basic is EventingIP */
1664        set_linear_ip(regs, basic->ip);
1665        regs->flags = PERF_EFLAGS_EXACT;
1666
1667        /*
1668         * The record for MEMINFO is in front of GP
1669         * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
1670         * Save the pointer here but process later.
1671         */
1672        if (format_size & PEBS_DATACFG_MEMINFO) {
1673                meminfo = next_record;
1674                next_record = meminfo + 1;
1675        }
1676
1677        if (format_size & PEBS_DATACFG_GP) {
1678                gprs = next_record;
1679                next_record = gprs + 1;
1680
1681                if (event->attr.precise_ip < 2) {
1682                        set_linear_ip(regs, gprs->ip);
1683                        regs->flags &= ~PERF_EFLAGS_EXACT;
1684                }
1685
1686                if (sample_type & PERF_SAMPLE_REGS_INTR)
1687                        adaptive_pebs_save_regs(regs, gprs);
1688        }
1689
1690        if (format_size & PEBS_DATACFG_MEMINFO) {
1691                if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
1692                        u64 weight = meminfo->latency;
1693
1694                        if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) {
1695                                data->weight.var2_w = weight & PEBS_LATENCY_MASK;
1696                                weight >>= PEBS_CACHE_LATENCY_OFFSET;
1697                        }
1698
1699                        /*
1700                         * Although meminfo::latency is defined as a u64,
1701                         * only the lower 32 bits include the valid data
1702                         * in practice on Ice Lake and earlier platforms.
1703                         */
1704                        if (sample_type & PERF_SAMPLE_WEIGHT) {
1705                                data->weight.full = weight ?:
1706                                        intel_get_tsx_weight(meminfo->tsx_tuning);
1707                        } else {
1708                                data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
1709                                        intel_get_tsx_weight(meminfo->tsx_tuning);
1710                        }
1711                }
1712
1713                if (sample_type & PERF_SAMPLE_DATA_SRC)
1714                        data->data_src.val = get_data_src(event, meminfo->aux);
1715
1716                if (sample_type & PERF_SAMPLE_ADDR_TYPE)
1717                        data->addr = meminfo->address;
1718
1719                if (sample_type & PERF_SAMPLE_TRANSACTION)
1720                        data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
1721                                                          gprs ? gprs->ax : 0);
1722        }
1723
1724        if (format_size & PEBS_DATACFG_XMMS) {
1725                struct pebs_xmm *xmm = next_record;
1726
1727                next_record = xmm + 1;
1728                perf_regs->xmm_regs = xmm->xmm;
1729        }
1730
1731        if (format_size & PEBS_DATACFG_LBRS) {
1732                struct lbr_entry *lbr = next_record;
1733                int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
1734                                        & 0xff) + 1;
1735                next_record = next_record + num_lbr * sizeof(struct lbr_entry);
1736
1737                if (has_branch_stack(event)) {
1738                        intel_pmu_store_pebs_lbrs(lbr);
1739                        data->br_stack = &cpuc->lbr_stack;
1740                }
1741        }
1742
1743        WARN_ONCE(next_record != __pebs + (format_size >> 48),
1744                        "PEBS record size %llu, expected %llu, config %llx\n",
1745                        format_size >> 48,
1746                        (u64)(next_record - __pebs),
1747                        basic->format_size);
1748}
1749
1750static inline void *
1751get_next_pebs_record_by_bit(void *base, void *top, int bit)
1752{
1753        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1754        void *at;
1755        u64 pebs_status;
1756
1757        /*
1758         * fmt0 does not have a status bitfield (does not use
1759         * perf_record_nhm format)
1760         */
1761        if (x86_pmu.intel_cap.pebs_format < 1)
1762                return base;
1763
1764        if (base == NULL)
1765                return NULL;
1766
1767        for (at = base; at < top; at += cpuc->pebs_record_size) {
1768                unsigned long status = get_pebs_status(at);
1769
1770                if (test_bit(bit, (unsigned long *)&status)) {
1771                        /* PEBS v3 has accurate status bits */
1772                        if (x86_pmu.intel_cap.pebs_format >= 3)
1773                                return at;
1774
1775                        if (status == (1 << bit))
1776                                return at;
1777
1778                        /* clear non-PEBS bit and re-check */
1779                        pebs_status = status & cpuc->pebs_enabled;
1780                        pebs_status &= PEBS_COUNTER_MASK;
1781                        if (pebs_status == (1 << bit))
1782                                return at;
1783                }
1784        }
1785        return NULL;
1786}
1787
1788void intel_pmu_auto_reload_read(struct perf_event *event)
1789{
1790        WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD));
1791
1792        perf_pmu_disable(event->pmu);
1793        intel_pmu_drain_pebs_buffer();
1794        perf_pmu_enable(event->pmu);
1795}
1796
1797/*
1798 * Special variant of intel_pmu_save_and_restart() for auto-reload.
1799 */
1800static int
1801intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
1802{
1803        struct hw_perf_event *hwc = &event->hw;
1804        int shift = 64 - x86_pmu.cntval_bits;
1805        u64 period = hwc->sample_period;
1806        u64 prev_raw_count, new_raw_count;
1807        s64 new, old;
1808
1809        WARN_ON(!period);
1810
1811        /*
1812         * drain_pebs() only happens when the PMU is disabled.
1813         */
1814        WARN_ON(this_cpu_read(cpu_hw_events.enabled));
1815
1816        prev_raw_count = local64_read(&hwc->prev_count);
1817        rdpmcl(hwc->event_base_rdpmc, new_raw_count);
1818        local64_set(&hwc->prev_count, new_raw_count);
1819
1820        /*
1821         * Since the counter increments a negative counter value and
1822         * overflows on the sign switch, giving the interval:
1823         *
1824         *   [-period, 0]
1825         *
1826         * the difference between two consecutive reads is:
1827         *
1828         *   A) value2 - value1;
1829         *      when no overflows have happened in between,
1830         *
1831         *   B) (0 - value1) + (value2 - (-period));
1832         *      when one overflow happened in between,
1833         *
1834         *   C) (0 - value1) + (n - 1) * (period) + (value2 - (-period));
1835         *      when @n overflows happened in between.
1836         *
1837         * Here A) is the obvious difference, B) is the extension to the
1838         * discrete interval, where the first term is to the top of the
1839         * interval and the second term is from the bottom of the next
1840         * interval and C) the extension to multiple intervals, where the
1841         * middle term is the whole intervals covered.
1842         *
1843         * An equivalent of C, by reduction, is:
1844         *
1845         *   value2 - value1 + n * period
1846         */
1847        new = ((s64)(new_raw_count << shift) >> shift);
1848        old = ((s64)(prev_raw_count << shift) >> shift);
1849        local64_add(new - old + count * period, &event->count);
1850
1851        local64_set(&hwc->period_left, -new);
1852
1853        perf_event_update_userpage(event);
1854
1855        return 0;
1856}
1857
1858static __always_inline void
1859__intel_pmu_pebs_event(struct perf_event *event,
1860                       struct pt_regs *iregs,
1861                       struct perf_sample_data *data,
1862                       void *base, void *top,
1863                       int bit, int count,
1864                       void (*setup_sample)(struct perf_event *,
1865                                            struct pt_regs *,
1866                                            void *,
1867                                            struct perf_sample_data *,
1868                                            struct pt_regs *))
1869{
1870        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1871        struct hw_perf_event *hwc = &event->hw;
1872        struct x86_perf_regs perf_regs;
1873        struct pt_regs *regs = &perf_regs.regs;
1874        void *at = get_next_pebs_record_by_bit(base, top, bit);
1875        static struct pt_regs dummy_iregs;
1876
1877        if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
1878                /*
1879                 * Now, auto-reload is only enabled in fixed period mode.
1880                 * The reload value is always hwc->sample_period.
1881                 * May need to change it, if auto-reload is enabled in
1882                 * freq mode later.
1883                 */
1884                intel_pmu_save_and_restart_reload(event, count);
1885        } else if (!intel_pmu_save_and_restart(event))
1886                return;
1887
1888        if (!iregs)
1889                iregs = &dummy_iregs;
1890
1891        while (count > 1) {
1892                setup_sample(event, iregs, at, data, regs);
1893                perf_event_output(event, data, regs);
1894                at += cpuc->pebs_record_size;
1895                at = get_next_pebs_record_by_bit(at, top, bit);
1896                count--;
1897        }
1898
1899        setup_sample(event, iregs, at, data, regs);
1900        if (iregs == &dummy_iregs) {
1901                /*
1902                 * The PEBS records may be drained in the non-overflow context,
1903                 * e.g., large PEBS + context switch. Perf should treat the
1904                 * last record the same as other PEBS records, and doesn't
1905                 * invoke the generic overflow handler.
1906                 */
1907                perf_event_output(event, data, regs);
1908        } else {
1909                /*
1910                 * All but the last records are processed.
1911                 * The last one is left to be able to call the overflow handler.
1912                 */
1913                if (perf_event_overflow(event, data, regs))
1914                        x86_pmu_stop(event, 0);
1915        }
1916}
1917
1918static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data)
1919{
1920        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1921        struct debug_store *ds = cpuc->ds;
1922        struct perf_event *event = cpuc->events[0]; /* PMC0 only */
1923        struct pebs_record_core *at, *top;
1924        int n;
1925
1926        if (!x86_pmu.pebs_active)
1927                return;
1928
1929        at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
1930        top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
1931
1932        /*
1933         * Whatever else happens, drain the thing
1934         */
1935        ds->pebs_index = ds->pebs_buffer_base;
1936
1937        if (!test_bit(0, cpuc->active_mask))
1938                return;
1939
1940        WARN_ON_ONCE(!event);
1941
1942        if (!event->attr.precise_ip)
1943                return;
1944
1945        n = top - at;
1946        if (n <= 0) {
1947                if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
1948                        intel_pmu_save_and_restart_reload(event, 0);
1949                return;
1950        }
1951
1952        __intel_pmu_pebs_event(event, iregs, data, at, top, 0, n,
1953                               setup_pebs_fixed_sample_data);
1954}
1955
1956static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
1957{
1958        struct perf_event *event;
1959        int bit;
1960
1961        /*
1962         * The drain_pebs() could be called twice in a short period
1963         * for auto-reload event in pmu::read(). There are no
1964         * overflows have happened in between.
1965         * It needs to call intel_pmu_save_and_restart_reload() to
1966         * update the event->count for this case.
1967         */
1968        for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) {
1969                event = cpuc->events[bit];
1970                if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
1971                        intel_pmu_save_and_restart_reload(event, 0);
1972        }
1973}
1974
1975static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data)
1976{
1977        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1978        struct debug_store *ds = cpuc->ds;
1979        struct perf_event *event;
1980        void *base, *at, *top;
1981        short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
1982        short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
1983        int bit, i, size;
1984        u64 mask;
1985
1986        if (!x86_pmu.pebs_active)
1987                return;
1988
1989        base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
1990        top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
1991
1992        ds->pebs_index = ds->pebs_buffer_base;
1993
1994        mask = (1ULL << x86_pmu.max_pebs_events) - 1;
1995        size = x86_pmu.max_pebs_events;
1996        if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
1997                mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED;
1998                size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
1999        }
2000
2001        if (unlikely(base >= top)) {
2002                intel_pmu_pebs_event_update_no_drain(cpuc, size);
2003                return;
2004        }
2005
2006        for (at = base; at < top; at += x86_pmu.pebs_record_size) {
2007                struct pebs_record_nhm *p = at;
2008                u64 pebs_status;
2009
2010                pebs_status = p->status & cpuc->pebs_enabled;
2011                pebs_status &= mask;
2012
2013                /* PEBS v3 has more accurate status bits */
2014                if (x86_pmu.intel_cap.pebs_format >= 3) {
2015                        for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
2016                                counts[bit]++;
2017
2018                        continue;
2019                }
2020
2021                /*
2022                 * On some CPUs the PEBS status can be zero when PEBS is
2023                 * racing with clearing of GLOBAL_STATUS.
2024                 *
2025                 * Normally we would drop that record, but in the
2026                 * case when there is only a single active PEBS event
2027                 * we can assume it's for that event.
2028                 */
2029                if (!pebs_status && cpuc->pebs_enabled &&
2030                        !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
2031                        pebs_status = p->status = cpuc->pebs_enabled;
2032
2033                bit = find_first_bit((unsigned long *)&pebs_status,
2034                                        x86_pmu.max_pebs_events);
2035                if (bit >= x86_pmu.max_pebs_events)
2036                        continue;
2037
2038                /*
2039                 * The PEBS hardware does not deal well with the situation
2040                 * when events happen near to each other and multiple bits
2041                 * are set. But it should happen rarely.
2042                 *
2043                 * If these events include one PEBS and multiple non-PEBS
2044                 * events, it doesn't impact PEBS record. The record will
2045                 * be handled normally. (slow path)
2046                 *
2047                 * If these events include two or more PEBS events, the
2048                 * records for the events can be collapsed into a single
2049                 * one, and it's not possible to reconstruct all events
2050                 * that caused the PEBS record. It's called collision.
2051                 * If collision happened, the record will be dropped.
2052                 */
2053                if (pebs_status != (1ULL << bit)) {
2054                        for_each_set_bit(i, (unsigned long *)&pebs_status, size)
2055                                error[i]++;
2056                        continue;
2057                }
2058
2059                counts[bit]++;
2060        }
2061
2062        for_each_set_bit(bit, (unsigned long *)&mask, size) {
2063                if ((counts[bit] == 0) && (error[bit] == 0))
2064                        continue;
2065
2066                event = cpuc->events[bit];
2067                if (WARN_ON_ONCE(!event))
2068                        continue;
2069
2070                if (WARN_ON_ONCE(!event->attr.precise_ip))
2071                        continue;
2072
2073                /* log dropped samples number */
2074                if (error[bit]) {
2075                        perf_log_lost_samples(event, error[bit]);
2076
2077                        if (iregs && perf_event_account_interrupt(event))
2078                                x86_pmu_stop(event, 0);
2079                }
2080
2081                if (counts[bit]) {
2082                        __intel_pmu_pebs_event(event, iregs, data, base,
2083                                               top, bit, counts[bit],
2084                                               setup_pebs_fixed_sample_data);
2085                }
2086        }
2087}
2088
2089static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
2090{
2091        short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
2092        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2093        int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events);
2094        int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
2095        struct debug_store *ds = cpuc->ds;
2096        struct perf_event *event;
2097        void *base, *at, *top;
2098        int bit, size;
2099        u64 mask;
2100
2101        if (!x86_pmu.pebs_active)
2102                return;
2103
2104        base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base;
2105        top = (struct pebs_basic *)(unsigned long)ds->pebs_index;
2106
2107        ds->pebs_index = ds->pebs_buffer_base;
2108
2109        mask = ((1ULL << max_pebs_events) - 1) |
2110               (((1ULL << num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
2111        size = INTEL_PMC_IDX_FIXED + num_counters_fixed;
2112
2113        if (unlikely(base >= top)) {
2114                intel_pmu_pebs_event_update_no_drain(cpuc, size);
2115                return;
2116        }
2117
2118        for (at = base; at < top; at += cpuc->pebs_record_size) {
2119                u64 pebs_status;
2120
2121                pebs_status = get_pebs_status(at) & cpuc->pebs_enabled;
2122                pebs_status &= mask;
2123
2124                for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
2125                        counts[bit]++;
2126        }
2127
2128        for_each_set_bit(bit, (unsigned long *)&mask, size) {
2129                if (counts[bit] == 0)
2130                        continue;
2131
2132                event = cpuc->events[bit];
2133                if (WARN_ON_ONCE(!event))
2134                        continue;
2135
2136                if (WARN_ON_ONCE(!event->attr.precise_ip))
2137                        continue;
2138
2139                __intel_pmu_pebs_event(event, iregs, data, base,
2140                                       top, bit, counts[bit],
2141                                       setup_pebs_adaptive_sample_data);
2142        }
2143}
2144
2145/*
2146 * BTS, PEBS probe and setup
2147 */
2148
2149void __init intel_ds_init(void)
2150{
2151        /*
2152         * No support for 32bit formats
2153         */
2154        if (!boot_cpu_has(X86_FEATURE_DTES64))
2155                return;
2156
2157        x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
2158        x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
2159        x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
2160        if (x86_pmu.version <= 4)
2161                x86_pmu.pebs_no_isolation = 1;
2162
2163        if (x86_pmu.pebs) {
2164                char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
2165                char *pebs_qual = "";
2166                int format = x86_pmu.intel_cap.pebs_format;
2167
2168                if (format < 4)
2169                        x86_pmu.intel_cap.pebs_baseline = 0;
2170
2171                switch (format) {
2172                case 0:
2173                        pr_cont("PEBS fmt0%c, ", pebs_type);
2174                        x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
2175                        /*
2176                         * Using >PAGE_SIZE buffers makes the WRMSR to
2177                         * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
2178                         * mysteriously hang on Core2.
2179                         *
2180                         * As a workaround, we don't do this.
2181                         */
2182                        x86_pmu.pebs_buffer_size = PAGE_SIZE;
2183                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
2184                        break;
2185
2186                case 1:
2187                        pr_cont("PEBS fmt1%c, ", pebs_type);
2188                        x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
2189                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
2190                        break;
2191
2192                case 2:
2193                        pr_cont("PEBS fmt2%c, ", pebs_type);
2194                        x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
2195                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
2196                        break;
2197
2198                case 3:
2199                        pr_cont("PEBS fmt3%c, ", pebs_type);
2200                        x86_pmu.pebs_record_size =
2201                                                sizeof(struct pebs_record_skl);
2202                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
2203                        x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
2204                        break;
2205
2206                case 4:
2207                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
2208                        x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
2209                        if (x86_pmu.intel_cap.pebs_baseline) {
2210                                x86_pmu.large_pebs_flags |=
2211                                        PERF_SAMPLE_BRANCH_STACK |
2212                                        PERF_SAMPLE_TIME;
2213                                x86_pmu.flags |= PMU_FL_PEBS_ALL;
2214                                pebs_qual = "-baseline";
2215                                x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
2216                        } else {
2217                                /* Only basic record supported */
2218                                x86_pmu.large_pebs_flags &=
2219                                        ~(PERF_SAMPLE_ADDR |
2220                                          PERF_SAMPLE_TIME |
2221                                          PERF_SAMPLE_DATA_SRC |
2222                                          PERF_SAMPLE_TRANSACTION |
2223                                          PERF_SAMPLE_REGS_USER |
2224                                          PERF_SAMPLE_REGS_INTR);
2225                        }
2226                        pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
2227
2228                        if (!is_hybrid() && x86_pmu.intel_cap.pebs_output_pt_available) {
2229                                pr_cont("PEBS-via-PT, ");
2230                                x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
2231                        }
2232
2233                        break;
2234
2235                default:
2236                        pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
2237                        x86_pmu.pebs = 0;
2238                }
2239        }
2240}
2241
2242void perf_restore_debug_store(void)
2243{
2244        struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
2245
2246        if (!x86_pmu.bts && !x86_pmu.pebs)
2247                return;
2248
2249        wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
2250}
2251