linux/arch/x86/kernel/cpu/perf_event_intel_ds.c
<<
>>
Prefs
   1#include <linux/bitops.h>
   2#include <linux/types.h>
   3#include <linux/slab.h>
   4
   5#include <asm/perf_event.h>
   6#include <asm/insn.h>
   7
   8#include "perf_event.h"
   9
  10/* The size of a BTS record in bytes: */
  11#define BTS_RECORD_SIZE         24
  12
  13#define BTS_BUFFER_SIZE         (PAGE_SIZE << 4)
  14#define PEBS_BUFFER_SIZE        PAGE_SIZE
  15
  16/*
  17 * pebs_record_32 for p4 and core not supported
  18
  19struct pebs_record_32 {
  20        u32 flags, ip;
  21        u32 ax, bc, cx, dx;
  22        u32 si, di, bp, sp;
  23};
  24
  25 */
  26
  27union intel_x86_pebs_dse {
  28        u64 val;
  29        struct {
  30                unsigned int ld_dse:4;
  31                unsigned int ld_stlb_miss:1;
  32                unsigned int ld_locked:1;
  33                unsigned int ld_reserved:26;
  34        };
  35        struct {
  36                unsigned int st_l1d_hit:1;
  37                unsigned int st_reserved1:3;
  38                unsigned int st_stlb_miss:1;
  39                unsigned int st_locked:1;
  40                unsigned int st_reserved2:26;
  41        };
  42};
  43
  44
  45/*
  46 * Map PEBS Load Latency Data Source encodings to generic
  47 * memory data source information
  48 */
  49#define P(a, b) PERF_MEM_S(a, b)
  50#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
  51#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
  52
  53static const u64 pebs_data_source[] = {
  54        P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
  55        OP_LH | P(LVL, L1)  | P(SNOOP, NONE),   /* 0x01: L1 local */
  56        OP_LH | P(LVL, LFB) | P(SNOOP, NONE),   /* 0x02: LFB hit */
  57        OP_LH | P(LVL, L2)  | P(SNOOP, NONE),   /* 0x03: L2 hit */
  58        OP_LH | P(LVL, L3)  | P(SNOOP, NONE),   /* 0x04: L3 hit */
  59        OP_LH | P(LVL, L3)  | P(SNOOP, MISS),   /* 0x05: L3 hit, snoop miss */
  60        OP_LH | P(LVL, L3)  | P(SNOOP, HIT),    /* 0x06: L3 hit, snoop hit */
  61        OP_LH | P(LVL, L3)  | P(SNOOP, HITM),   /* 0x07: L3 hit, snoop hitm */
  62        OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
  63        OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
  64        OP_LH | P(LVL, LOC_RAM)  | P(SNOOP, HIT),  /* 0x0a: L3 miss, shared */
  65        OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
  66        OP_LH | P(LVL, LOC_RAM)  | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
  67        OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
  68        OP_LH | P(LVL, IO)  | P(SNOOP, NONE), /* 0x0e: I/O */
  69        OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
  70};
  71
  72static u64 precise_store_data(u64 status)
  73{
  74        union intel_x86_pebs_dse dse;
  75        u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
  76
  77        dse.val = status;
  78
  79        /*
  80         * bit 4: TLB access
  81         * 1 = stored missed 2nd level TLB
  82         *
  83         * so it either hit the walker or the OS
  84         * otherwise hit 2nd level TLB
  85         */
  86        if (dse.st_stlb_miss)
  87                val |= P(TLB, MISS);
  88        else
  89                val |= P(TLB, HIT);
  90
  91        /*
  92         * bit 0: hit L1 data cache
  93         * if not set, then all we know is that
  94         * it missed L1D
  95         */
  96        if (dse.st_l1d_hit)
  97                val |= P(LVL, HIT);
  98        else
  99                val |= P(LVL, MISS);
 100
 101        /*
 102         * bit 5: Locked prefix
 103         */
 104        if (dse.st_locked)
 105                val |= P(LOCK, LOCKED);
 106
 107        return val;
 108}
 109
 110static u64 precise_store_data_hsw(u64 status)
 111{
 112        union perf_mem_data_src dse;
 113
 114        dse.val = 0;
 115        dse.mem_op = PERF_MEM_OP_STORE;
 116        dse.mem_lvl = PERF_MEM_LVL_NA;
 117        if (status & 1)
 118                dse.mem_lvl = PERF_MEM_LVL_L1;
 119        /* Nothing else supported. Sorry. */
 120        return dse.val;
 121}
 122
 123static u64 load_latency_data(u64 status)
 124{
 125        union intel_x86_pebs_dse dse;
 126        u64 val;
 127        int model = boot_cpu_data.x86_model;
 128        int fam = boot_cpu_data.x86;
 129
 130        dse.val = status;
 131
 132        /*
 133         * use the mapping table for bit 0-3
 134         */
 135        val = pebs_data_source[dse.ld_dse];
 136
 137        /*
 138         * Nehalem models do not support TLB, Lock infos
 139         */
 140        if (fam == 0x6 && (model == 26 || model == 30
 141            || model == 31 || model == 46)) {
 142                val |= P(TLB, NA) | P(LOCK, NA);
 143                return val;
 144        }
 145        /*
 146         * bit 4: TLB access
 147         * 0 = did not miss 2nd level TLB
 148         * 1 = missed 2nd level TLB
 149         */
 150        if (dse.ld_stlb_miss)
 151                val |= P(TLB, MISS) | P(TLB, L2);
 152        else
 153                val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
 154
 155        /*
 156         * bit 5: locked prefix
 157         */
 158        if (dse.ld_locked)
 159                val |= P(LOCK, LOCKED);
 160
 161        return val;
 162}
 163
 164struct pebs_record_core {
 165        u64 flags, ip;
 166        u64 ax, bx, cx, dx;
 167        u64 si, di, bp, sp;
 168        u64 r8,  r9,  r10, r11;
 169        u64 r12, r13, r14, r15;
 170};
 171
 172struct pebs_record_nhm {
 173        u64 flags, ip;
 174        u64 ax, bx, cx, dx;
 175        u64 si, di, bp, sp;
 176        u64 r8,  r9,  r10, r11;
 177        u64 r12, r13, r14, r15;
 178        u64 status, dla, dse, lat;
 179};
 180
 181/*
 182 * Same as pebs_record_nhm, with two additional fields.
 183 */
 184struct pebs_record_hsw {
 185        struct pebs_record_nhm nhm;
 186        /*
 187         * Real IP of the event. In the Intel documentation this
 188         * is called eventingrip.
 189         */
 190        u64 real_ip;
 191        /*
 192         * TSX tuning information field: abort cycles and abort flags.
 193         */
 194        u64 tsx_tuning;
 195};
 196
 197void init_debug_store_on_cpu(int cpu)
 198{
 199        struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 200
 201        if (!ds)
 202                return;
 203
 204        wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
 205                     (u32)((u64)(unsigned long)ds),
 206                     (u32)((u64)(unsigned long)ds >> 32));
 207}
 208
 209void fini_debug_store_on_cpu(int cpu)
 210{
 211        if (!per_cpu(cpu_hw_events, cpu).ds)
 212                return;
 213
 214        wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
 215}
 216
 217static int alloc_pebs_buffer(int cpu)
 218{
 219        struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 220        int node = cpu_to_node(cpu);
 221        int max, thresh = 1; /* always use a single PEBS record */
 222        void *buffer;
 223
 224        if (!x86_pmu.pebs)
 225                return 0;
 226
 227        buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node);
 228        if (unlikely(!buffer))
 229                return -ENOMEM;
 230
 231        max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
 232
 233        ds->pebs_buffer_base = (u64)(unsigned long)buffer;
 234        ds->pebs_index = ds->pebs_buffer_base;
 235        ds->pebs_absolute_maximum = ds->pebs_buffer_base +
 236                max * x86_pmu.pebs_record_size;
 237
 238        ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
 239                thresh * x86_pmu.pebs_record_size;
 240
 241        return 0;
 242}
 243
 244static void release_pebs_buffer(int cpu)
 245{
 246        struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 247
 248        if (!ds || !x86_pmu.pebs)
 249                return;
 250
 251        kfree((void *)(unsigned long)ds->pebs_buffer_base);
 252        ds->pebs_buffer_base = 0;
 253}
 254
 255static int alloc_bts_buffer(int cpu)
 256{
 257        struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 258        int node = cpu_to_node(cpu);
 259        int max, thresh;
 260        void *buffer;
 261
 262        if (!x86_pmu.bts)
 263                return 0;
 264
 265        buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL, node);
 266        if (unlikely(!buffer))
 267                return -ENOMEM;
 268
 269        max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
 270        thresh = max / 16;
 271
 272        ds->bts_buffer_base = (u64)(unsigned long)buffer;
 273        ds->bts_index = ds->bts_buffer_base;
 274        ds->bts_absolute_maximum = ds->bts_buffer_base +
 275                max * BTS_RECORD_SIZE;
 276        ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
 277                thresh * BTS_RECORD_SIZE;
 278
 279        return 0;
 280}
 281
 282static void release_bts_buffer(int cpu)
 283{
 284        struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 285
 286        if (!ds || !x86_pmu.bts)
 287                return;
 288
 289        kfree((void *)(unsigned long)ds->bts_buffer_base);
 290        ds->bts_buffer_base = 0;
 291}
 292
 293static int alloc_ds_buffer(int cpu)
 294{
 295        int node = cpu_to_node(cpu);
 296        struct debug_store *ds;
 297
 298        ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
 299        if (unlikely(!ds))
 300                return -ENOMEM;
 301
 302        per_cpu(cpu_hw_events, cpu).ds = ds;
 303
 304        return 0;
 305}
 306
 307static void release_ds_buffer(int cpu)
 308{
 309        struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 310
 311        if (!ds)
 312                return;
 313
 314        per_cpu(cpu_hw_events, cpu).ds = NULL;
 315        kfree(ds);
 316}
 317
 318void release_ds_buffers(void)
 319{
 320        int cpu;
 321
 322        if (!x86_pmu.bts && !x86_pmu.pebs)
 323                return;
 324
 325        get_online_cpus();
 326        for_each_online_cpu(cpu)
 327                fini_debug_store_on_cpu(cpu);
 328
 329        for_each_possible_cpu(cpu) {
 330                release_pebs_buffer(cpu);
 331                release_bts_buffer(cpu);
 332                release_ds_buffer(cpu);
 333        }
 334        put_online_cpus();
 335}
 336
 337void reserve_ds_buffers(void)
 338{
 339        int bts_err = 0, pebs_err = 0;
 340        int cpu;
 341
 342        x86_pmu.bts_active = 0;
 343        x86_pmu.pebs_active = 0;
 344
 345        if (!x86_pmu.bts && !x86_pmu.pebs)
 346                return;
 347
 348        if (!x86_pmu.bts)
 349                bts_err = 1;
 350
 351        if (!x86_pmu.pebs)
 352                pebs_err = 1;
 353
 354        get_online_cpus();
 355
 356        for_each_possible_cpu(cpu) {
 357                if (alloc_ds_buffer(cpu)) {
 358                        bts_err = 1;
 359                        pebs_err = 1;
 360                }
 361
 362                if (!bts_err && alloc_bts_buffer(cpu))
 363                        bts_err = 1;
 364
 365                if (!pebs_err && alloc_pebs_buffer(cpu))
 366                        pebs_err = 1;
 367
 368                if (bts_err && pebs_err)
 369                        break;
 370        }
 371
 372        if (bts_err) {
 373                for_each_possible_cpu(cpu)
 374                        release_bts_buffer(cpu);
 375        }
 376
 377        if (pebs_err) {
 378                for_each_possible_cpu(cpu)
 379                        release_pebs_buffer(cpu);
 380        }
 381
 382        if (bts_err && pebs_err) {
 383                for_each_possible_cpu(cpu)
 384                        release_ds_buffer(cpu);
 385        } else {
 386                if (x86_pmu.bts && !bts_err)
 387                        x86_pmu.bts_active = 1;
 388
 389                if (x86_pmu.pebs && !pebs_err)
 390                        x86_pmu.pebs_active = 1;
 391
 392                for_each_online_cpu(cpu)
 393                        init_debug_store_on_cpu(cpu);
 394        }
 395
 396        put_online_cpus();
 397}
 398
 399/*
 400 * BTS
 401 */
 402
 403struct event_constraint bts_constraint =
 404        EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
 405
 406void intel_pmu_enable_bts(u64 config)
 407{
 408        unsigned long debugctlmsr;
 409
 410        debugctlmsr = get_debugctlmsr();
 411
 412        debugctlmsr |= DEBUGCTLMSR_TR;
 413        debugctlmsr |= DEBUGCTLMSR_BTS;
 414        debugctlmsr |= DEBUGCTLMSR_BTINT;
 415
 416        if (!(config & ARCH_PERFMON_EVENTSEL_OS))
 417                debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
 418
 419        if (!(config & ARCH_PERFMON_EVENTSEL_USR))
 420                debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
 421
 422        update_debugctlmsr(debugctlmsr);
 423}
 424
 425void intel_pmu_disable_bts(void)
 426{
 427        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 428        unsigned long debugctlmsr;
 429
 430        if (!cpuc->ds)
 431                return;
 432
 433        debugctlmsr = get_debugctlmsr();
 434
 435        debugctlmsr &=
 436                ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
 437                  DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
 438
 439        update_debugctlmsr(debugctlmsr);
 440}
 441
 442int intel_pmu_drain_bts_buffer(void)
 443{
 444        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 445        struct debug_store *ds = cpuc->ds;
 446        struct bts_record {
 447                u64     from;
 448                u64     to;
 449                u64     flags;
 450        };
 451        struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
 452        struct bts_record *at, *top;
 453        struct perf_output_handle handle;
 454        struct perf_event_header header;
 455        struct perf_sample_data data;
 456        struct pt_regs regs;
 457
 458        if (!event)
 459                return 0;
 460
 461        if (!x86_pmu.bts_active)
 462                return 0;
 463
 464        at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
 465        top = (struct bts_record *)(unsigned long)ds->bts_index;
 466
 467        if (top <= at)
 468                return 0;
 469
 470        memset(&regs, 0, sizeof(regs));
 471
 472        ds->bts_index = ds->bts_buffer_base;
 473
 474        perf_sample_data_init(&data, 0, event->hw.last_period);
 475
 476        /*
 477         * Prepare a generic sample, i.e. fill in the invariant fields.
 478         * We will overwrite the from and to address before we output
 479         * the sample.
 480         */
 481        perf_prepare_sample(&header, &data, event, &regs);
 482
 483        if (perf_output_begin(&handle, event, header.size * (top - at)))
 484                return 1;
 485
 486        for (; at < top; at++) {
 487                data.ip         = at->from;
 488                data.addr       = at->to;
 489
 490                perf_output_sample(&handle, &header, &data, event);
 491        }
 492
 493        perf_output_end(&handle);
 494
 495        /* There's new data available. */
 496        event->hw.interrupts++;
 497        event->pending_kill = POLL_IN;
 498        return 1;
 499}
 500
 501/*
 502 * PEBS
 503 */
 504struct event_constraint intel_core2_pebs_event_constraints[] = {
 505        INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
 506        INTEL_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
 507        INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
 508        INTEL_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
 509        INTEL_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
 510        EVENT_CONSTRAINT_END
 511};
 512
 513struct event_constraint intel_atom_pebs_event_constraints[] = {
 514        INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
 515        INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
 516        INTEL_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
 517        EVENT_CONSTRAINT_END
 518};
 519
 520struct event_constraint intel_slm_pebs_event_constraints[] = {
 521        INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */
 522        INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
 523        INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */
 524        INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */
 525        INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */
 526        INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */
 527        INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */
 528        INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* BR_INST_RETIRED.ALL_BRANCHES_PS */
 529        INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */
 530        INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS */
 531        INTEL_UEVENT_CONSTRAINT(0xebc4, 0x1), /* BR_INST_RETIRED.NON_RETURN_IND_PS */
 532        INTEL_UEVENT_CONSTRAINT(0xf7c4, 0x1), /* BR_INST_RETIRED.RETURN_PS */
 533        INTEL_UEVENT_CONSTRAINT(0xf9c4, 0x1), /* BR_INST_RETIRED.CALL_PS */
 534        INTEL_UEVENT_CONSTRAINT(0xfbc4, 0x1), /* BR_INST_RETIRED.IND_CALL_PS */
 535        INTEL_UEVENT_CONSTRAINT(0xfdc4, 0x1), /* BR_INST_RETIRED.REL_CALL_PS */
 536        INTEL_UEVENT_CONSTRAINT(0xfec4, 0x1), /* BR_INST_RETIRED.TAKEN_JCC_PS */
 537        INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_MISP_RETIRED.ALL_BRANCHES_PS */
 538        INTEL_UEVENT_CONSTRAINT(0x7ec5, 0x1), /* BR_INST_MISP_RETIRED.JCC_PS */
 539        INTEL_UEVENT_CONSTRAINT(0xebc5, 0x1), /* BR_INST_MISP_RETIRED.NON_RETURN_IND_PS */
 540        INTEL_UEVENT_CONSTRAINT(0xf7c5, 0x1), /* BR_INST_MISP_RETIRED.RETURN_PS */
 541        INTEL_UEVENT_CONSTRAINT(0xfbc5, 0x1), /* BR_INST_MISP_RETIRED.IND_CALL_PS */
 542        INTEL_UEVENT_CONSTRAINT(0xfec5, 0x1), /* BR_INST_MISP_RETIRED.TAKEN_JCC_PS */
 543        EVENT_CONSTRAINT_END
 544};
 545
 546struct event_constraint intel_nehalem_pebs_event_constraints[] = {
 547        INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
 548        INTEL_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
 549        INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
 550        INTEL_EVENT_CONSTRAINT(0xc0, 0xf),    /* INST_RETIRED.ANY */
 551        INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
 552        INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
 553        INTEL_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
 554        INTEL_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
 555        INTEL_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
 556        INTEL_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
 557        INTEL_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
 558        EVENT_CONSTRAINT_END
 559};
 560
 561struct event_constraint intel_westmere_pebs_event_constraints[] = {
 562        INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
 563        INTEL_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
 564        INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
 565        INTEL_EVENT_CONSTRAINT(0xc0, 0xf),    /* INSTR_RETIRED.* */
 566        INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
 567        INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
 568        INTEL_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
 569        INTEL_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
 570        INTEL_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
 571        INTEL_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
 572        INTEL_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
 573        EVENT_CONSTRAINT_END
 574};
 575
 576struct event_constraint intel_snb_pebs_event_constraints[] = {
 577        INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
 578        INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
 579        INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
 580        INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
 581        INTEL_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
 582        INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
 583        INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
 584        INTEL_EVENT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
 585        INTEL_EVENT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
 586        INTEL_EVENT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
 587        INTEL_EVENT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
 588        INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
 589        EVENT_CONSTRAINT_END
 590};
 591
 592struct event_constraint intel_ivb_pebs_event_constraints[] = {
 593        INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
 594        INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
 595        INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
 596        INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
 597        INTEL_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
 598        INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
 599        INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
 600        INTEL_EVENT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
 601        INTEL_EVENT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
 602        INTEL_EVENT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
 603        INTEL_EVENT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
 604        EVENT_CONSTRAINT_END
 605};
 606
 607struct event_constraint intel_hsw_pebs_event_constraints[] = {
 608        INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
 609        INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
 610        INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
 611        INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
 612        INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
 613        INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
 614        INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */
 615        INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.* */
 616        /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
 617        INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf),
 618        /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
 619        INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf),
 620        INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
 621        INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
 622        /* MEM_UOPS_RETIRED.SPLIT_STORES */
 623        INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf),
 624        INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
 625        INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
 626        INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
 627        INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
 628        INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */
 629        /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
 630        INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf),
 631        /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
 632        INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf),
 633        /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
 634        INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf),
 635        /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */
 636        INTEL_UEVENT_CONSTRAINT(0x01d3, 0xf),
 637        INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */
 638        INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */
 639
 640        EVENT_CONSTRAINT_END
 641};
 642
 643struct event_constraint *intel_pebs_constraints(struct perf_event *event)
 644{
 645        struct event_constraint *c;
 646
 647        if (!event->attr.precise_ip)
 648                return NULL;
 649
 650        if (x86_pmu.pebs_constraints) {
 651                for_each_event_constraint(c, x86_pmu.pebs_constraints) {
 652                        if ((event->hw.config & c->cmask) == c->code) {
 653                                event->hw.flags |= c->flags;
 654                                return c;
 655                        }
 656                }
 657        }
 658
 659        return &emptyconstraint;
 660}
 661
 662void intel_pmu_pebs_enable(struct perf_event *event)
 663{
 664        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 665        struct hw_perf_event *hwc = &event->hw;
 666
 667        hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
 668
 669        cpuc->pebs_enabled |= 1ULL << hwc->idx;
 670
 671        if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
 672                cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
 673        else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
 674                cpuc->pebs_enabled |= 1ULL << 63;
 675}
 676
 677void intel_pmu_pebs_disable(struct perf_event *event)
 678{
 679        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 680        struct hw_perf_event *hwc = &event->hw;
 681
 682        cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
 683
 684        if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT)
 685                cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
 686        else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST)
 687                cpuc->pebs_enabled &= ~(1ULL << 63);
 688
 689        if (cpuc->enabled)
 690                wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
 691
 692        hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
 693}
 694
 695void intel_pmu_pebs_enable_all(void)
 696{
 697        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 698
 699        if (cpuc->pebs_enabled)
 700                wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
 701}
 702
 703void intel_pmu_pebs_disable_all(void)
 704{
 705        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 706
 707        if (cpuc->pebs_enabled)
 708                wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
 709}
 710
 711static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
 712{
 713        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 714        unsigned long from = cpuc->lbr_entries[0].from;
 715        unsigned long old_to, to = cpuc->lbr_entries[0].to;
 716        unsigned long ip = regs->ip;
 717        int is_64bit = 0;
 718
 719        /*
 720         * We don't need to fixup if the PEBS assist is fault like
 721         */
 722        if (!x86_pmu.intel_cap.pebs_trap)
 723                return 1;
 724
 725        /*
 726         * No LBR entry, no basic block, no rewinding
 727         */
 728        if (!cpuc->lbr_stack.nr || !from || !to)
 729                return 0;
 730
 731        /*
 732         * Basic blocks should never cross user/kernel boundaries
 733         */
 734        if (kernel_ip(ip) != kernel_ip(to))
 735                return 0;
 736
 737        /*
 738         * unsigned math, either ip is before the start (impossible) or
 739         * the basic block is larger than 1 page (sanity)
 740         */
 741        if ((ip - to) > PAGE_SIZE)
 742                return 0;
 743
 744        /*
 745         * We sampled a branch insn, rewind using the LBR stack
 746         */
 747        if (ip == to) {
 748                set_linear_ip(regs, from);
 749                return 1;
 750        }
 751
 752        do {
 753                struct insn insn;
 754                u8 buf[MAX_INSN_SIZE];
 755                void *kaddr;
 756
 757                old_to = to;
 758                if (!kernel_ip(ip)) {
 759                        int bytes, size = MAX_INSN_SIZE;
 760
 761                        bytes = copy_from_user_nmi(buf, (void __user *)to, size);
 762                        if (bytes != size)
 763                                return 0;
 764
 765                        kaddr = buf;
 766                } else
 767                        kaddr = (void *)to;
 768
 769#ifdef CONFIG_X86_64
 770                is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
 771#endif
 772                insn_init(&insn, kaddr, is_64bit);
 773                insn_get_length(&insn);
 774                to += insn.length;
 775        } while (to < ip);
 776
 777        if (to == ip) {
 778                set_linear_ip(regs, old_to);
 779                return 1;
 780        }
 781
 782        /*
 783         * Even though we decoded the basic block, the instruction stream
 784         * never matched the given IP, either the TO or the IP got corrupted.
 785         */
 786        return 0;
 787}
 788
 789static void __intel_pmu_pebs_event(struct perf_event *event,
 790                                   struct pt_regs *iregs, void *__pebs)
 791{
 792        /*
 793         * We cast to pebs_record_nhm to get the load latency data
 794         * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used
 795         */
 796        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 797        struct pebs_record_nhm *pebs = __pebs;
 798        struct pebs_record_hsw *pebs_hsw = __pebs;
 799        struct perf_sample_data data;
 800        struct pt_regs regs;
 801        u64 sample_type;
 802        int fll, fst;
 803
 804        if (!intel_pmu_save_and_restart(event))
 805                return;
 806
 807        fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
 808        fst = event->hw.flags & (PERF_X86_EVENT_PEBS_ST |
 809                                 PERF_X86_EVENT_PEBS_ST_HSW);
 810
 811        perf_sample_data_init(&data, 0, event->hw.last_period);
 812
 813        data.period = event->hw.last_period;
 814        sample_type = event->attr.sample_type;
 815
 816        /*
 817         * if PEBS-LL or PreciseStore
 818         */
 819        if (fll || fst) {
 820                /*
 821                 * Use latency for weight (only avail with PEBS-LL)
 822                 */
 823                if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
 824                        data.weight = pebs->lat;
 825
 826                /*
 827                 * data.data_src encodes the data source
 828                 */
 829                if (sample_type & PERF_SAMPLE_DATA_SRC) {
 830                        if (fll)
 831                                data.data_src.val = load_latency_data(pebs->dse);
 832                        else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
 833                                data.data_src.val =
 834                                        precise_store_data_hsw(pebs->dse);
 835                        else
 836                                data.data_src.val = precise_store_data(pebs->dse);
 837                }
 838        }
 839
 840        /*
 841         * We use the interrupt regs as a base because the PEBS record
 842         * does not contain a full regs set, specifically it seems to
 843         * lack segment descriptors, which get used by things like
 844         * user_mode().
 845         *
 846         * In the simple case fix up only the IP and BP,SP regs, for
 847         * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
 848         * A possible PERF_SAMPLE_REGS will have to transfer all regs.
 849         */
 850        regs = *iregs;
 851        regs.flags = pebs->flags;
 852        set_linear_ip(&regs, pebs->ip);
 853        regs.bp = pebs->bp;
 854        regs.sp = pebs->sp;
 855
 856        if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
 857                regs.ip = pebs_hsw->real_ip;
 858                regs.flags |= PERF_EFLAGS_EXACT;
 859        } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
 860                regs.flags |= PERF_EFLAGS_EXACT;
 861        else
 862                regs.flags &= ~PERF_EFLAGS_EXACT;
 863
 864        if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
 865                x86_pmu.intel_cap.pebs_format >= 1)
 866                data.addr = pebs->dla;
 867
 868        if (has_branch_stack(event))
 869                data.br_stack = &cpuc->lbr_stack;
 870
 871        if (perf_event_overflow(event, &data, &regs))
 872                x86_pmu_stop(event, 0);
 873}
 874
 875static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
 876{
 877        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 878        struct debug_store *ds = cpuc->ds;
 879        struct perf_event *event = cpuc->events[0]; /* PMC0 only */
 880        struct pebs_record_core *at, *top;
 881        int n;
 882
 883        if (!x86_pmu.pebs_active)
 884                return;
 885
 886        at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
 887        top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
 888
 889        /*
 890         * Whatever else happens, drain the thing
 891         */
 892        ds->pebs_index = ds->pebs_buffer_base;
 893
 894        if (!test_bit(0, cpuc->active_mask))
 895                return;
 896
 897        WARN_ON_ONCE(!event);
 898
 899        if (!event->attr.precise_ip)
 900                return;
 901
 902        n = top - at;
 903        if (n <= 0)
 904                return;
 905
 906        /*
 907         * Should not happen, we program the threshold at 1 and do not
 908         * set a reset value.
 909         */
 910        WARN_ONCE(n > 1, "bad leftover pebs %d\n", n);
 911        at += n - 1;
 912
 913        __intel_pmu_pebs_event(event, iregs, at);
 914}
 915
 916static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
 917                                        void *top)
 918{
 919        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 920        struct debug_store *ds = cpuc->ds;
 921        struct perf_event *event = NULL;
 922        u64 status = 0;
 923        int bit;
 924
 925        ds->pebs_index = ds->pebs_buffer_base;
 926
 927        for (; at < top; at += x86_pmu.pebs_record_size) {
 928                struct pebs_record_nhm *p = at;
 929
 930                for_each_set_bit(bit, (unsigned long *)&p->status,
 931                                 x86_pmu.max_pebs_events) {
 932                        event = cpuc->events[bit];
 933                        if (!test_bit(bit, cpuc->active_mask))
 934                                continue;
 935
 936                        WARN_ON_ONCE(!event);
 937
 938                        if (!event->attr.precise_ip)
 939                                continue;
 940
 941                        if (__test_and_set_bit(bit, (unsigned long *)&status))
 942                                continue;
 943
 944                        break;
 945                }
 946
 947                if (!event || bit >= x86_pmu.max_pebs_events)
 948                        continue;
 949
 950                __intel_pmu_pebs_event(event, iregs, at);
 951        }
 952}
 953
 954static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 955{
 956        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 957        struct debug_store *ds = cpuc->ds;
 958        struct pebs_record_nhm *at, *top;
 959        int n;
 960
 961        if (!x86_pmu.pebs_active)
 962                return;
 963
 964        at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
 965        top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
 966
 967        ds->pebs_index = ds->pebs_buffer_base;
 968
 969        n = top - at;
 970        if (n <= 0)
 971                return;
 972
 973        /*
 974         * Should not happen, we program the threshold at 1 and do not
 975         * set a reset value.
 976         */
 977        WARN_ONCE(n > x86_pmu.max_pebs_events,
 978                  "Unexpected number of pebs records %d\n", n);
 979
 980        return __intel_pmu_drain_pebs_nhm(iregs, at, top);
 981}
 982
 983static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs)
 984{
 985        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 986        struct debug_store *ds = cpuc->ds;
 987        struct pebs_record_hsw *at, *top;
 988        int n;
 989
 990        if (!x86_pmu.pebs_active)
 991                return;
 992
 993        at  = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base;
 994        top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index;
 995
 996        n = top - at;
 997        if (n <= 0)
 998                return;
 999        /*
1000         * Should not happen, we program the threshold at 1 and do not
1001         * set a reset value.
1002         */
1003        WARN_ONCE(n > x86_pmu.max_pebs_events,
1004                  "Unexpected number of pebs records %d\n", n);
1005
1006        return __intel_pmu_drain_pebs_nhm(iregs, at, top);
1007}
1008
1009/*
1010 * BTS, PEBS probe and setup
1011 */
1012
1013void intel_ds_init(void)
1014{
1015        /*
1016         * No support for 32bit formats
1017         */
1018        if (!boot_cpu_has(X86_FEATURE_DTES64))
1019                return;
1020
1021        x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
1022        x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
1023        if (x86_pmu.pebs) {
1024                char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
1025                int format = x86_pmu.intel_cap.pebs_format;
1026
1027                switch (format) {
1028                case 0:
1029                        printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
1030                        x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
1031                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
1032                        break;
1033
1034                case 1:
1035                        printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
1036                        x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
1037                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
1038                        break;
1039
1040                case 2:
1041                        pr_cont("PEBS fmt2%c, ", pebs_type);
1042                        x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
1043                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw;
1044                        break;
1045
1046                default:
1047                        printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
1048                        x86_pmu.pebs = 0;
1049                }
1050        }
1051}
1052
1053void perf_restore_debug_store(void)
1054{
1055        struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
1056
1057        if (!x86_pmu.bts && !x86_pmu.pebs)
1058                return;
1059
1060        wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
1061}
1062