linux/arch/x86/events/intel/lbr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <linux/perf_event.h>
   3#include <linux/types.h>
   4
   5#include <asm/perf_event.h>
   6#include <asm/msr.h>
   7#include <asm/insn.h>
   8
   9#include "../perf_event.h"
  10
  11static const enum {
  12        LBR_EIP_FLAGS           = 1,
  13        LBR_TSX                 = 2,
  14} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
  15        [LBR_FORMAT_EIP_FLAGS]  = LBR_EIP_FLAGS,
  16        [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
  17};
  18
  19/*
  20 * Intel LBR_SELECT bits
  21 * Intel Vol3a, April 2011, Section 16.7 Table 16-10
  22 *
  23 * Hardware branch filter (not available on all CPUs)
  24 */
  25#define LBR_KERNEL_BIT          0 /* do not capture at ring0 */
  26#define LBR_USER_BIT            1 /* do not capture at ring > 0 */
  27#define LBR_JCC_BIT             2 /* do not capture conditional branches */
  28#define LBR_REL_CALL_BIT        3 /* do not capture relative calls */
  29#define LBR_IND_CALL_BIT        4 /* do not capture indirect calls */
  30#define LBR_RETURN_BIT          5 /* do not capture near returns */
  31#define LBR_IND_JMP_BIT         6 /* do not capture indirect jumps */
  32#define LBR_REL_JMP_BIT         7 /* do not capture relative jumps */
  33#define LBR_FAR_BIT             8 /* do not capture far branches */
  34#define LBR_CALL_STACK_BIT      9 /* enable call stack */
  35
  36/*
  37 * Following bit only exists in Linux; we mask it out before writing it to
  38 * the actual MSR. But it helps the constraint perf code to understand
  39 * that this is a separate configuration.
  40 */
  41#define LBR_NO_INFO_BIT        63 /* don't read LBR_INFO. */
  42
  43#define LBR_KERNEL      (1 << LBR_KERNEL_BIT)
  44#define LBR_USER        (1 << LBR_USER_BIT)
  45#define LBR_JCC         (1 << LBR_JCC_BIT)
  46#define LBR_REL_CALL    (1 << LBR_REL_CALL_BIT)
  47#define LBR_IND_CALL    (1 << LBR_IND_CALL_BIT)
  48#define LBR_RETURN      (1 << LBR_RETURN_BIT)
  49#define LBR_REL_JMP     (1 << LBR_REL_JMP_BIT)
  50#define LBR_IND_JMP     (1 << LBR_IND_JMP_BIT)
  51#define LBR_FAR         (1 << LBR_FAR_BIT)
  52#define LBR_CALL_STACK  (1 << LBR_CALL_STACK_BIT)
  53#define LBR_NO_INFO     (1ULL << LBR_NO_INFO_BIT)
  54
  55#define LBR_PLM (LBR_KERNEL | LBR_USER)
  56
  57#define LBR_SEL_MASK    0x3ff   /* valid bits in LBR_SELECT */
  58#define LBR_NOT_SUPP    -1      /* LBR filter not supported */
  59#define LBR_IGN         0       /* ignored */
  60
  61#define LBR_ANY          \
  62        (LBR_JCC        |\
  63         LBR_REL_CALL   |\
  64         LBR_IND_CALL   |\
  65         LBR_RETURN     |\
  66         LBR_REL_JMP    |\
  67         LBR_IND_JMP    |\
  68         LBR_FAR)
  69
  70#define LBR_FROM_FLAG_MISPRED   BIT_ULL(63)
  71#define LBR_FROM_FLAG_IN_TX     BIT_ULL(62)
  72#define LBR_FROM_FLAG_ABORT     BIT_ULL(61)
  73
  74#define LBR_FROM_SIGNEXT_2MSB   (BIT_ULL(60) | BIT_ULL(59))
  75
  76/*
  77 * x86control flow change classification
  78 * x86control flow changes include branches, interrupts, traps, faults
  79 */
  80enum {
  81        X86_BR_NONE             = 0,      /* unknown */
  82
  83        X86_BR_USER             = 1 << 0, /* branch target is user */
  84        X86_BR_KERNEL           = 1 << 1, /* branch target is kernel */
  85
  86        X86_BR_CALL             = 1 << 2, /* call */
  87        X86_BR_RET              = 1 << 3, /* return */
  88        X86_BR_SYSCALL          = 1 << 4, /* syscall */
  89        X86_BR_SYSRET           = 1 << 5, /* syscall return */
  90        X86_BR_INT              = 1 << 6, /* sw interrupt */
  91        X86_BR_IRET             = 1 << 7, /* return from interrupt */
  92        X86_BR_JCC              = 1 << 8, /* conditional */
  93        X86_BR_JMP              = 1 << 9, /* jump */
  94        X86_BR_IRQ              = 1 << 10,/* hw interrupt or trap or fault */
  95        X86_BR_IND_CALL         = 1 << 11,/* indirect calls */
  96        X86_BR_ABORT            = 1 << 12,/* transaction abort */
  97        X86_BR_IN_TX            = 1 << 13,/* in transaction */
  98        X86_BR_NO_TX            = 1 << 14,/* not in transaction */
  99        X86_BR_ZERO_CALL        = 1 << 15,/* zero length call */
 100        X86_BR_CALL_STACK       = 1 << 16,/* call stack */
 101        X86_BR_IND_JMP          = 1 << 17,/* indirect jump */
 102
 103        X86_BR_TYPE_SAVE        = 1 << 18,/* indicate to save branch type */
 104
 105};
 106
 107#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
 108#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
 109
 110#define X86_BR_ANY       \
 111        (X86_BR_CALL    |\
 112         X86_BR_RET     |\
 113         X86_BR_SYSCALL |\
 114         X86_BR_SYSRET  |\
 115         X86_BR_INT     |\
 116         X86_BR_IRET    |\
 117         X86_BR_JCC     |\
 118         X86_BR_JMP      |\
 119         X86_BR_IRQ      |\
 120         X86_BR_ABORT    |\
 121         X86_BR_IND_CALL |\
 122         X86_BR_IND_JMP  |\
 123         X86_BR_ZERO_CALL)
 124
 125#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
 126
 127#define X86_BR_ANY_CALL          \
 128        (X86_BR_CALL            |\
 129         X86_BR_IND_CALL        |\
 130         X86_BR_ZERO_CALL       |\
 131         X86_BR_SYSCALL         |\
 132         X86_BR_IRQ             |\
 133         X86_BR_INT)
 134
 135/*
 136 * Intel LBR_CTL bits
 137 *
 138 * Hardware branch filter for Arch LBR
 139 */
 140#define ARCH_LBR_KERNEL_BIT             1  /* capture at ring0 */
 141#define ARCH_LBR_USER_BIT               2  /* capture at ring > 0 */
 142#define ARCH_LBR_CALL_STACK_BIT         3  /* enable call stack */
 143#define ARCH_LBR_JCC_BIT                16 /* capture conditional branches */
 144#define ARCH_LBR_REL_JMP_BIT            17 /* capture relative jumps */
 145#define ARCH_LBR_IND_JMP_BIT            18 /* capture indirect jumps */
 146#define ARCH_LBR_REL_CALL_BIT           19 /* capture relative calls */
 147#define ARCH_LBR_IND_CALL_BIT           20 /* capture indirect calls */
 148#define ARCH_LBR_RETURN_BIT             21 /* capture near returns */
 149#define ARCH_LBR_OTHER_BRANCH_BIT       22 /* capture other branches */
 150
 151#define ARCH_LBR_KERNEL                 (1ULL << ARCH_LBR_KERNEL_BIT)
 152#define ARCH_LBR_USER                   (1ULL << ARCH_LBR_USER_BIT)
 153#define ARCH_LBR_CALL_STACK             (1ULL << ARCH_LBR_CALL_STACK_BIT)
 154#define ARCH_LBR_JCC                    (1ULL << ARCH_LBR_JCC_BIT)
 155#define ARCH_LBR_REL_JMP                (1ULL << ARCH_LBR_REL_JMP_BIT)
 156#define ARCH_LBR_IND_JMP                (1ULL << ARCH_LBR_IND_JMP_BIT)
 157#define ARCH_LBR_REL_CALL               (1ULL << ARCH_LBR_REL_CALL_BIT)
 158#define ARCH_LBR_IND_CALL               (1ULL << ARCH_LBR_IND_CALL_BIT)
 159#define ARCH_LBR_RETURN                 (1ULL << ARCH_LBR_RETURN_BIT)
 160#define ARCH_LBR_OTHER_BRANCH           (1ULL << ARCH_LBR_OTHER_BRANCH_BIT)
 161
 162#define ARCH_LBR_ANY                     \
 163        (ARCH_LBR_JCC                   |\
 164         ARCH_LBR_REL_JMP               |\
 165         ARCH_LBR_IND_JMP               |\
 166         ARCH_LBR_REL_CALL              |\
 167         ARCH_LBR_IND_CALL              |\
 168         ARCH_LBR_RETURN                |\
 169         ARCH_LBR_OTHER_BRANCH)
 170
 171#define ARCH_LBR_CTL_MASK                       0x7f000e
 172
 173static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
 174
 175static __always_inline bool is_lbr_call_stack_bit_set(u64 config)
 176{
 177        if (static_cpu_has(X86_FEATURE_ARCH_LBR))
 178                return !!(config & ARCH_LBR_CALL_STACK);
 179
 180        return !!(config & LBR_CALL_STACK);
 181}
 182
 183/*
 184 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
 185 * otherwise it becomes near impossible to get a reliable stack.
 186 */
 187
 188static void __intel_pmu_lbr_enable(bool pmi)
 189{
 190        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 191        u64 debugctl, lbr_select = 0, orig_debugctl;
 192
 193        /*
 194         * No need to unfreeze manually, as v4 can do that as part
 195         * of the GLOBAL_STATUS ack.
 196         */
 197        if (pmi && x86_pmu.version >= 4)
 198                return;
 199
 200        /*
 201         * No need to reprogram LBR_SELECT in a PMI, as it
 202         * did not change.
 203         */
 204        if (cpuc->lbr_sel)
 205                lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
 206        if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && !pmi && cpuc->lbr_sel)
 207                wrmsrl(MSR_LBR_SELECT, lbr_select);
 208
 209        rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
 210        orig_debugctl = debugctl;
 211
 212        if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
 213                debugctl |= DEBUGCTLMSR_LBR;
 214        /*
 215         * LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
 216         * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
 217         * may cause superfluous increase/decrease of LBR_TOS.
 218         */
 219        if (is_lbr_call_stack_bit_set(lbr_select))
 220                debugctl &= ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
 221        else
 222                debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
 223
 224        if (orig_debugctl != debugctl)
 225                wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
 226
 227        if (static_cpu_has(X86_FEATURE_ARCH_LBR))
 228                wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN);
 229}
 230
 231static void __intel_pmu_lbr_disable(void)
 232{
 233        u64 debugctl;
 234
 235        if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
 236                wrmsrl(MSR_ARCH_LBR_CTL, 0);
 237                return;
 238        }
 239
 240        rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
 241        debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
 242        wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
 243}
 244
 245void intel_pmu_lbr_reset_32(void)
 246{
 247        int i;
 248
 249        for (i = 0; i < x86_pmu.lbr_nr; i++)
 250                wrmsrl(x86_pmu.lbr_from + i, 0);
 251}
 252
 253void intel_pmu_lbr_reset_64(void)
 254{
 255        int i;
 256
 257        for (i = 0; i < x86_pmu.lbr_nr; i++) {
 258                wrmsrl(x86_pmu.lbr_from + i, 0);
 259                wrmsrl(x86_pmu.lbr_to   + i, 0);
 260                if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
 261                        wrmsrl(x86_pmu.lbr_info + i, 0);
 262        }
 263}
 264
 265static void intel_pmu_arch_lbr_reset(void)
 266{
 267        /* Write to ARCH_LBR_DEPTH MSR, all LBR entries are reset to 0 */
 268        wrmsrl(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr);
 269}
 270
 271void intel_pmu_lbr_reset(void)
 272{
 273        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 274
 275        if (!x86_pmu.lbr_nr)
 276                return;
 277
 278        x86_pmu.lbr_reset();
 279
 280        cpuc->last_task_ctx = NULL;
 281        cpuc->last_log_id = 0;
 282}
 283
 284/*
 285 * TOS = most recently recorded branch
 286 */
 287static inline u64 intel_pmu_lbr_tos(void)
 288{
 289        u64 tos;
 290
 291        rdmsrl(x86_pmu.lbr_tos, tos);
 292        return tos;
 293}
 294
 295enum {
 296        LBR_NONE,
 297        LBR_VALID,
 298};
 299
 300/*
 301 * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in
 302 * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when
 303 * TSX is not supported they have no consistent behavior:
 304 *
 305 *   - For wrmsr(), bits 61:62 are considered part of the sign extension.
 306 *   - For HW updates (branch captures) bits 61:62 are always OFF and are not
 307 *     part of the sign extension.
 308 *
 309 * Therefore, if:
 310 *
 311 *   1) LBR has TSX format
 312 *   2) CPU has no TSX support enabled
 313 *
 314 * ... then any value passed to wrmsr() must be sign extended to 63 bits and any
 315 * value from rdmsr() must be converted to have a 61 bits sign extension,
 316 * ignoring the TSX flags.
 317 */
 318static inline bool lbr_from_signext_quirk_needed(void)
 319{
 320        int lbr_format = x86_pmu.intel_cap.lbr_format;
 321        bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
 322                           boot_cpu_has(X86_FEATURE_RTM);
 323
 324        return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
 325}
 326
 327static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
 328
 329/* If quirk is enabled, ensure sign extension is 63 bits: */
 330inline u64 lbr_from_signext_quirk_wr(u64 val)
 331{
 332        if (static_branch_unlikely(&lbr_from_quirk_key)) {
 333                /*
 334                 * Sign extend into bits 61:62 while preserving bit 63.
 335                 *
 336                 * Quirk is enabled when TSX is disabled. Therefore TSX bits
 337                 * in val are always OFF and must be changed to be sign
 338                 * extension bits. Since bits 59:60 are guaranteed to be
 339                 * part of the sign extension bits, we can just copy them
 340                 * to 61:62.
 341                 */
 342                val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2;
 343        }
 344        return val;
 345}
 346
 347/*
 348 * If quirk is needed, ensure sign extension is 61 bits:
 349 */
 350static u64 lbr_from_signext_quirk_rd(u64 val)
 351{
 352        if (static_branch_unlikely(&lbr_from_quirk_key)) {
 353                /*
 354                 * Quirk is on when TSX is not enabled. Therefore TSX
 355                 * flags must be read as OFF.
 356                 */
 357                val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT);
 358        }
 359        return val;
 360}
 361
 362static __always_inline void wrlbr_from(unsigned int idx, u64 val)
 363{
 364        val = lbr_from_signext_quirk_wr(val);
 365        wrmsrl(x86_pmu.lbr_from + idx, val);
 366}
 367
 368static __always_inline void wrlbr_to(unsigned int idx, u64 val)
 369{
 370        wrmsrl(x86_pmu.lbr_to + idx, val);
 371}
 372
 373static __always_inline void wrlbr_info(unsigned int idx, u64 val)
 374{
 375        wrmsrl(x86_pmu.lbr_info + idx, val);
 376}
 377
 378static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr)
 379{
 380        u64 val;
 381
 382        if (lbr)
 383                return lbr->from;
 384
 385        rdmsrl(x86_pmu.lbr_from + idx, val);
 386
 387        return lbr_from_signext_quirk_rd(val);
 388}
 389
 390static __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr)
 391{
 392        u64 val;
 393
 394        if (lbr)
 395                return lbr->to;
 396
 397        rdmsrl(x86_pmu.lbr_to + idx, val);
 398
 399        return val;
 400}
 401
 402static __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr)
 403{
 404        u64 val;
 405
 406        if (lbr)
 407                return lbr->info;
 408
 409        rdmsrl(x86_pmu.lbr_info + idx, val);
 410
 411        return val;
 412}
 413
 414static inline void
 415wrlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
 416{
 417        wrlbr_from(idx, lbr->from);
 418        wrlbr_to(idx, lbr->to);
 419        if (need_info)
 420                wrlbr_info(idx, lbr->info);
 421}
 422
 423static inline bool
 424rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
 425{
 426        u64 from = rdlbr_from(idx, NULL);
 427
 428        /* Don't read invalid entry */
 429        if (!from)
 430                return false;
 431
 432        lbr->from = from;
 433        lbr->to = rdlbr_to(idx, NULL);
 434        if (need_info)
 435                lbr->info = rdlbr_info(idx, NULL);
 436
 437        return true;
 438}
 439
 440void intel_pmu_lbr_restore(void *ctx)
 441{
 442        bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
 443        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 444        struct x86_perf_task_context *task_ctx = ctx;
 445        int i;
 446        unsigned lbr_idx, mask;
 447        u64 tos = task_ctx->tos;
 448
 449        mask = x86_pmu.lbr_nr - 1;
 450        for (i = 0; i < task_ctx->valid_lbrs; i++) {
 451                lbr_idx = (tos - i) & mask;
 452                wrlbr_all(&task_ctx->lbr[i], lbr_idx, need_info);
 453        }
 454
 455        for (; i < x86_pmu.lbr_nr; i++) {
 456                lbr_idx = (tos - i) & mask;
 457                wrlbr_from(lbr_idx, 0);
 458                wrlbr_to(lbr_idx, 0);
 459                if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
 460                        wrlbr_info(lbr_idx, 0);
 461        }
 462
 463        wrmsrl(x86_pmu.lbr_tos, tos);
 464
 465        if (cpuc->lbr_select)
 466                wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
 467}
 468
 469static void intel_pmu_arch_lbr_restore(void *ctx)
 470{
 471        struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
 472        struct lbr_entry *entries = task_ctx->entries;
 473        int i;
 474
 475        /* Fast reset the LBRs before restore if the call stack is not full. */
 476        if (!entries[x86_pmu.lbr_nr - 1].from)
 477                intel_pmu_arch_lbr_reset();
 478
 479        for (i = 0; i < x86_pmu.lbr_nr; i++) {
 480                if (!entries[i].from)
 481                        break;
 482                wrlbr_all(&entries[i], i, true);
 483        }
 484}
 485
 486/*
 487 * Restore the Architecture LBR state from the xsave area in the perf
 488 * context data for the task via the XRSTORS instruction.
 489 */
 490static void intel_pmu_arch_lbr_xrstors(void *ctx)
 491{
 492        struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
 493
 494        copy_kernel_to_dynamic_supervisor(&task_ctx->xsave, XFEATURE_MASK_LBR);
 495}
 496
 497static __always_inline bool lbr_is_reset_in_cstate(void *ctx)
 498{
 499        if (static_cpu_has(X86_FEATURE_ARCH_LBR))
 500                return x86_pmu.lbr_deep_c_reset && !rdlbr_from(0, NULL);
 501
 502        return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL);
 503}
 504
 505static void __intel_pmu_lbr_restore(void *ctx)
 506{
 507        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 508
 509        if (task_context_opt(ctx)->lbr_callstack_users == 0 ||
 510            task_context_opt(ctx)->lbr_stack_state == LBR_NONE) {
 511                intel_pmu_lbr_reset();
 512                return;
 513        }
 514
 515        /*
 516         * Does not restore the LBR registers, if
 517         * - No one else touched them, and
 518         * - Was not cleared in Cstate
 519         */
 520        if ((ctx == cpuc->last_task_ctx) &&
 521            (task_context_opt(ctx)->log_id == cpuc->last_log_id) &&
 522            !lbr_is_reset_in_cstate(ctx)) {
 523                task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
 524                return;
 525        }
 526
 527        x86_pmu.lbr_restore(ctx);
 528
 529        task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
 530}
 531
 532void intel_pmu_lbr_save(void *ctx)
 533{
 534        bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
 535        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 536        struct x86_perf_task_context *task_ctx = ctx;
 537        unsigned lbr_idx, mask;
 538        u64 tos;
 539        int i;
 540
 541        mask = x86_pmu.lbr_nr - 1;
 542        tos = intel_pmu_lbr_tos();
 543        for (i = 0; i < x86_pmu.lbr_nr; i++) {
 544                lbr_idx = (tos - i) & mask;
 545                if (!rdlbr_all(&task_ctx->lbr[i], lbr_idx, need_info))
 546                        break;
 547        }
 548        task_ctx->valid_lbrs = i;
 549        task_ctx->tos = tos;
 550
 551        if (cpuc->lbr_select)
 552                rdmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
 553}
 554
 555static void intel_pmu_arch_lbr_save(void *ctx)
 556{
 557        struct x86_perf_task_context_arch_lbr *task_ctx = ctx;
 558        struct lbr_entry *entries = task_ctx->entries;
 559        int i;
 560
 561        for (i = 0; i < x86_pmu.lbr_nr; i++) {
 562                if (!rdlbr_all(&entries[i], i, true))
 563                        break;
 564        }
 565
 566        /* LBR call stack is not full. Reset is required in restore. */
 567        if (i < x86_pmu.lbr_nr)
 568                entries[x86_pmu.lbr_nr - 1].from = 0;
 569}
 570
 571/*
 572 * Save the Architecture LBR state to the xsave area in the perf
 573 * context data for the task via the XSAVES instruction.
 574 */
 575static void intel_pmu_arch_lbr_xsaves(void *ctx)
 576{
 577        struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
 578
 579        copy_dynamic_supervisor_to_kernel(&task_ctx->xsave, XFEATURE_MASK_LBR);
 580}
 581
 582static void __intel_pmu_lbr_save(void *ctx)
 583{
 584        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 585
 586        if (task_context_opt(ctx)->lbr_callstack_users == 0) {
 587                task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
 588                return;
 589        }
 590
 591        x86_pmu.lbr_save(ctx);
 592
 593        task_context_opt(ctx)->lbr_stack_state = LBR_VALID;
 594
 595        cpuc->last_task_ctx = ctx;
 596        cpuc->last_log_id = ++task_context_opt(ctx)->log_id;
 597}
 598
 599void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
 600                                 struct perf_event_context *next)
 601{
 602        void *prev_ctx_data, *next_ctx_data;
 603
 604        swap(prev->task_ctx_data, next->task_ctx_data);
 605
 606        /*
 607         * Architecture specific synchronization makes sense in
 608         * case both prev->task_ctx_data and next->task_ctx_data
 609         * pointers are allocated.
 610         */
 611
 612        prev_ctx_data = next->task_ctx_data;
 613        next_ctx_data = prev->task_ctx_data;
 614
 615        if (!prev_ctx_data || !next_ctx_data)
 616                return;
 617
 618        swap(task_context_opt(prev_ctx_data)->lbr_callstack_users,
 619             task_context_opt(next_ctx_data)->lbr_callstack_users);
 620}
 621
 622void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
 623{
 624        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 625        void *task_ctx;
 626
 627        if (!cpuc->lbr_users)
 628                return;
 629
 630        /*
 631         * If LBR callstack feature is enabled and the stack was saved when
 632         * the task was scheduled out, restore the stack. Otherwise flush
 633         * the LBR stack.
 634         */
 635        task_ctx = ctx ? ctx->task_ctx_data : NULL;
 636        if (task_ctx) {
 637                if (sched_in)
 638                        __intel_pmu_lbr_restore(task_ctx);
 639                else
 640                        __intel_pmu_lbr_save(task_ctx);
 641                return;
 642        }
 643
 644        /*
 645         * Since a context switch can flip the address space and LBR entries
 646         * are not tagged with an identifier, we need to wipe the LBR, even for
 647         * per-cpu events. You simply cannot resolve the branches from the old
 648         * address space.
 649         */
 650        if (sched_in)
 651                intel_pmu_lbr_reset();
 652}
 653
 654static inline bool branch_user_callstack(unsigned br_sel)
 655{
 656        return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
 657}
 658
 659void intel_pmu_lbr_add(struct perf_event *event)
 660{
 661        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 662
 663        if (!x86_pmu.lbr_nr)
 664                return;
 665
 666        if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
 667                cpuc->lbr_select = 1;
 668
 669        cpuc->br_sel = event->hw.branch_reg.reg;
 670
 671        if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data)
 672                task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users++;
 673
 674        /*
 675         * Request pmu::sched_task() callback, which will fire inside the
 676         * regular perf event scheduling, so that call will:
 677         *
 678         *  - restore or wipe; when LBR-callstack,
 679         *  - wipe; otherwise,
 680         *
 681         * when this is from __perf_event_task_sched_in().
 682         *
 683         * However, if this is from perf_install_in_context(), no such callback
 684         * will follow and we'll need to reset the LBR here if this is the
 685         * first LBR event.
 686         *
 687         * The problem is, we cannot tell these cases apart... but we can
 688         * exclude the biggest chunk of cases by looking at
 689         * event->total_time_running. An event that has accrued runtime cannot
 690         * be 'new'. Conversely, a new event can get installed through the
 691         * context switch path for the first time.
 692         */
 693        if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
 694                cpuc->lbr_pebs_users++;
 695        perf_sched_cb_inc(event->ctx->pmu);
 696        if (!cpuc->lbr_users++ && !event->total_time_running)
 697                intel_pmu_lbr_reset();
 698}
 699
 700void release_lbr_buffers(void)
 701{
 702        struct kmem_cache *kmem_cache;
 703        struct cpu_hw_events *cpuc;
 704        int cpu;
 705
 706        if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
 707                return;
 708
 709        for_each_possible_cpu(cpu) {
 710                cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
 711                kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
 712                if (kmem_cache && cpuc->lbr_xsave) {
 713                        kmem_cache_free(kmem_cache, cpuc->lbr_xsave);
 714                        cpuc->lbr_xsave = NULL;
 715                }
 716        }
 717}
 718
 719void reserve_lbr_buffers(void)
 720{
 721        struct kmem_cache *kmem_cache;
 722        struct cpu_hw_events *cpuc;
 723        int cpu;
 724
 725        if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
 726                return;
 727
 728        for_each_possible_cpu(cpu) {
 729                cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
 730                kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
 731                if (!kmem_cache || cpuc->lbr_xsave)
 732                        continue;
 733
 734                cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache,
 735                                                        GFP_KERNEL | __GFP_ZERO,
 736                                                        cpu_to_node(cpu));
 737        }
 738}
 739
 740void intel_pmu_lbr_del(struct perf_event *event)
 741{
 742        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 743
 744        if (!x86_pmu.lbr_nr)
 745                return;
 746
 747        if (branch_user_callstack(cpuc->br_sel) &&
 748            event->ctx->task_ctx_data)
 749                task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users--;
 750
 751        if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
 752                cpuc->lbr_select = 0;
 753
 754        if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
 755                cpuc->lbr_pebs_users--;
 756        cpuc->lbr_users--;
 757        WARN_ON_ONCE(cpuc->lbr_users < 0);
 758        WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
 759        perf_sched_cb_dec(event->ctx->pmu);
 760}
 761
 762static inline bool vlbr_exclude_host(void)
 763{
 764        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 765
 766        return test_bit(INTEL_PMC_IDX_FIXED_VLBR,
 767                (unsigned long *)&cpuc->intel_ctrl_guest_mask);
 768}
 769
 770void intel_pmu_lbr_enable_all(bool pmi)
 771{
 772        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 773
 774        if (cpuc->lbr_users && !vlbr_exclude_host())
 775                __intel_pmu_lbr_enable(pmi);
 776}
 777
 778void intel_pmu_lbr_disable_all(void)
 779{
 780        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 781
 782        if (cpuc->lbr_users && !vlbr_exclude_host())
 783                __intel_pmu_lbr_disable();
 784}
 785
 786void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
 787{
 788        unsigned long mask = x86_pmu.lbr_nr - 1;
 789        u64 tos = intel_pmu_lbr_tos();
 790        int i;
 791
 792        for (i = 0; i < x86_pmu.lbr_nr; i++) {
 793                unsigned long lbr_idx = (tos - i) & mask;
 794                union {
 795                        struct {
 796                                u32 from;
 797                                u32 to;
 798                        };
 799                        u64     lbr;
 800                } msr_lastbranch;
 801
 802                rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
 803
 804                cpuc->lbr_entries[i].from       = msr_lastbranch.from;
 805                cpuc->lbr_entries[i].to         = msr_lastbranch.to;
 806                cpuc->lbr_entries[i].mispred    = 0;
 807                cpuc->lbr_entries[i].predicted  = 0;
 808                cpuc->lbr_entries[i].in_tx      = 0;
 809                cpuc->lbr_entries[i].abort      = 0;
 810                cpuc->lbr_entries[i].cycles     = 0;
 811                cpuc->lbr_entries[i].type       = 0;
 812                cpuc->lbr_entries[i].reserved   = 0;
 813        }
 814        cpuc->lbr_stack.nr = i;
 815        cpuc->lbr_stack.hw_idx = tos;
 816}
 817
 818/*
 819 * Due to lack of segmentation in Linux the effective address (offset)
 820 * is the same as the linear address, allowing us to merge the LIP and EIP
 821 * LBR formats.
 822 */
 823void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 824{
 825        bool need_info = false, call_stack = false;
 826        unsigned long mask = x86_pmu.lbr_nr - 1;
 827        int lbr_format = x86_pmu.intel_cap.lbr_format;
 828        u64 tos = intel_pmu_lbr_tos();
 829        int i;
 830        int out = 0;
 831        int num = x86_pmu.lbr_nr;
 832
 833        if (cpuc->lbr_sel) {
 834                need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
 835                if (cpuc->lbr_sel->config & LBR_CALL_STACK)
 836                        call_stack = true;
 837        }
 838
 839        for (i = 0; i < num; i++) {
 840                unsigned long lbr_idx = (tos - i) & mask;
 841                u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
 842                int skip = 0;
 843                u16 cycles = 0;
 844                int lbr_flags = lbr_desc[lbr_format];
 845
 846                from = rdlbr_from(lbr_idx, NULL);
 847                to   = rdlbr_to(lbr_idx, NULL);
 848
 849                /*
 850                 * Read LBR call stack entries
 851                 * until invalid entry (0s) is detected.
 852                 */
 853                if (call_stack && !from)
 854                        break;
 855
 856                if (lbr_format == LBR_FORMAT_INFO && need_info) {
 857                        u64 info;
 858
 859                        info = rdlbr_info(lbr_idx, NULL);
 860                        mis = !!(info & LBR_INFO_MISPRED);
 861                        pred = !mis;
 862                        in_tx = !!(info & LBR_INFO_IN_TX);
 863                        abort = !!(info & LBR_INFO_ABORT);
 864                        cycles = (info & LBR_INFO_CYCLES);
 865                }
 866
 867                if (lbr_format == LBR_FORMAT_TIME) {
 868                        mis = !!(from & LBR_FROM_FLAG_MISPRED);
 869                        pred = !mis;
 870                        skip = 1;
 871                        cycles = ((to >> 48) & LBR_INFO_CYCLES);
 872
 873                        to = (u64)((((s64)to) << 16) >> 16);
 874                }
 875
 876                if (lbr_flags & LBR_EIP_FLAGS) {
 877                        mis = !!(from & LBR_FROM_FLAG_MISPRED);
 878                        pred = !mis;
 879                        skip = 1;
 880                }
 881                if (lbr_flags & LBR_TSX) {
 882                        in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
 883                        abort = !!(from & LBR_FROM_FLAG_ABORT);
 884                        skip = 3;
 885                }
 886                from = (u64)((((s64)from) << skip) >> skip);
 887
 888                /*
 889                 * Some CPUs report duplicated abort records,
 890                 * with the second entry not having an abort bit set.
 891                 * Skip them here. This loop runs backwards,
 892                 * so we need to undo the previous record.
 893                 * If the abort just happened outside the window
 894                 * the extra entry cannot be removed.
 895                 */
 896                if (abort && x86_pmu.lbr_double_abort && out > 0)
 897                        out--;
 898
 899                cpuc->lbr_entries[out].from      = from;
 900                cpuc->lbr_entries[out].to        = to;
 901                cpuc->lbr_entries[out].mispred   = mis;
 902                cpuc->lbr_entries[out].predicted = pred;
 903                cpuc->lbr_entries[out].in_tx     = in_tx;
 904                cpuc->lbr_entries[out].abort     = abort;
 905                cpuc->lbr_entries[out].cycles    = cycles;
 906                cpuc->lbr_entries[out].type      = 0;
 907                cpuc->lbr_entries[out].reserved  = 0;
 908                out++;
 909        }
 910        cpuc->lbr_stack.nr = out;
 911        cpuc->lbr_stack.hw_idx = tos;
 912}
 913
 914static __always_inline int get_lbr_br_type(u64 info)
 915{
 916        if (!static_cpu_has(X86_FEATURE_ARCH_LBR) || !x86_pmu.lbr_br_type)
 917                return 0;
 918
 919        return (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
 920}
 921
 922static __always_inline bool get_lbr_mispred(u64 info)
 923{
 924        if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
 925                return 0;
 926
 927        return !!(info & LBR_INFO_MISPRED);
 928}
 929
 930static __always_inline bool get_lbr_predicted(u64 info)
 931{
 932        if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
 933                return 0;
 934
 935        return !(info & LBR_INFO_MISPRED);
 936}
 937
 938static __always_inline u16 get_lbr_cycles(u64 info)
 939{
 940        if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
 941            !(x86_pmu.lbr_timed_lbr && info & LBR_INFO_CYC_CNT_VALID))
 942                return 0;
 943
 944        return info & LBR_INFO_CYCLES;
 945}
 946
 947static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
 948                                struct lbr_entry *entries)
 949{
 950        struct perf_branch_entry *e;
 951        struct lbr_entry *lbr;
 952        u64 from, to, info;
 953        int i;
 954
 955        for (i = 0; i < x86_pmu.lbr_nr; i++) {
 956                lbr = entries ? &entries[i] : NULL;
 957                e = &cpuc->lbr_entries[i];
 958
 959                from = rdlbr_from(i, lbr);
 960                /*
 961                 * Read LBR entries until invalid entry (0s) is detected.
 962                 */
 963                if (!from)
 964                        break;
 965
 966                to = rdlbr_to(i, lbr);
 967                info = rdlbr_info(i, lbr);
 968
 969                e->from         = from;
 970                e->to           = to;
 971                e->mispred      = get_lbr_mispred(info);
 972                e->predicted    = get_lbr_predicted(info);
 973                e->in_tx        = !!(info & LBR_INFO_IN_TX);
 974                e->abort        = !!(info & LBR_INFO_ABORT);
 975                e->cycles       = get_lbr_cycles(info);
 976                e->type         = get_lbr_br_type(info);
 977                e->reserved     = 0;
 978        }
 979
 980        cpuc->lbr_stack.nr = i;
 981}
 982
 983static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc)
 984{
 985        intel_pmu_store_lbr(cpuc, NULL);
 986}
 987
 988static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc)
 989{
 990        struct x86_perf_task_context_arch_lbr_xsave *xsave = cpuc->lbr_xsave;
 991
 992        if (!xsave) {
 993                intel_pmu_store_lbr(cpuc, NULL);
 994                return;
 995        }
 996        copy_dynamic_supervisor_to_kernel(&xsave->xsave, XFEATURE_MASK_LBR);
 997
 998        intel_pmu_store_lbr(cpuc, xsave->lbr.entries);
 999}
1000
1001void intel_pmu_lbr_read(void)
1002{
1003        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1004
1005        /*
1006         * Don't read when all LBRs users are using adaptive PEBS.
1007         *
1008         * This could be smarter and actually check the event,
1009         * but this simple approach seems to work for now.
1010         */
1011        if (!cpuc->lbr_users || vlbr_exclude_host() ||
1012            cpuc->lbr_users == cpuc->lbr_pebs_users)
1013                return;
1014
1015        x86_pmu.lbr_read(cpuc);
1016
1017        intel_pmu_lbr_filter(cpuc);
1018}
1019
1020/*
1021 * SW filter is used:
1022 * - in case there is no HW filter
1023 * - in case the HW filter has errata or limitations
1024 */
1025static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
1026{
1027        u64 br_type = event->attr.branch_sample_type;
1028        int mask = 0;
1029
1030        if (br_type & PERF_SAMPLE_BRANCH_USER)
1031                mask |= X86_BR_USER;
1032
1033        if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
1034                mask |= X86_BR_KERNEL;
1035
1036        /* we ignore BRANCH_HV here */
1037
1038        if (br_type & PERF_SAMPLE_BRANCH_ANY)
1039                mask |= X86_BR_ANY;
1040
1041        if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
1042                mask |= X86_BR_ANY_CALL;
1043
1044        if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
1045                mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
1046
1047        if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
1048                mask |= X86_BR_IND_CALL;
1049
1050        if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
1051                mask |= X86_BR_ABORT;
1052
1053        if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
1054                mask |= X86_BR_IN_TX;
1055
1056        if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
1057                mask |= X86_BR_NO_TX;
1058
1059        if (br_type & PERF_SAMPLE_BRANCH_COND)
1060                mask |= X86_BR_JCC;
1061
1062        if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
1063                if (!x86_pmu_has_lbr_callstack())
1064                        return -EOPNOTSUPP;
1065                if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
1066                        return -EINVAL;
1067                mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
1068                        X86_BR_CALL_STACK;
1069        }
1070
1071        if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
1072                mask |= X86_BR_IND_JMP;
1073
1074        if (br_type & PERF_SAMPLE_BRANCH_CALL)
1075                mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
1076
1077        if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
1078                mask |= X86_BR_TYPE_SAVE;
1079
1080        /*
1081         * stash actual user request into reg, it may
1082         * be used by fixup code for some CPU
1083         */
1084        event->hw.branch_reg.reg = mask;
1085        return 0;
1086}
1087
1088/*
1089 * setup the HW LBR filter
1090 * Used only when available, may not be enough to disambiguate
1091 * all branches, may need the help of the SW filter
1092 */
1093static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
1094{
1095        struct hw_perf_event_extra *reg;
1096        u64 br_type = event->attr.branch_sample_type;
1097        u64 mask = 0, v;
1098        int i;
1099
1100        for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
1101                if (!(br_type & (1ULL << i)))
1102                        continue;
1103
1104                v = x86_pmu.lbr_sel_map[i];
1105                if (v == LBR_NOT_SUPP)
1106                        return -EOPNOTSUPP;
1107
1108                if (v != LBR_IGN)
1109                        mask |= v;
1110        }
1111
1112        reg = &event->hw.branch_reg;
1113        reg->idx = EXTRA_REG_LBR;
1114
1115        if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
1116                reg->config = mask;
1117                return 0;
1118        }
1119
1120        /*
1121         * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
1122         * in suppress mode. So LBR_SELECT should be set to
1123         * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
1124         * But the 10th bit LBR_CALL_STACK does not operate
1125         * in suppress mode.
1126         */
1127        reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK);
1128
1129        if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
1130            (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
1131            (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
1132                reg->config |= LBR_NO_INFO;
1133
1134        return 0;
1135}
1136
1137int intel_pmu_setup_lbr_filter(struct perf_event *event)
1138{
1139        int ret = 0;
1140
1141        /*
1142         * no LBR on this PMU
1143         */
1144        if (!x86_pmu.lbr_nr)
1145                return -EOPNOTSUPP;
1146
1147        /*
1148         * setup SW LBR filter
1149         */
1150        ret = intel_pmu_setup_sw_lbr_filter(event);
1151        if (ret)
1152                return ret;
1153
1154        /*
1155         * setup HW LBR filter, if any
1156         */
1157        if (x86_pmu.lbr_sel_map)
1158                ret = intel_pmu_setup_hw_lbr_filter(event);
1159
1160        return ret;
1161}
1162
1163/*
1164 * return the type of control flow change at address "from"
1165 * instruction is not necessarily a branch (in case of interrupt).
1166 *
1167 * The branch type returned also includes the priv level of the
1168 * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
1169 *
1170 * If a branch type is unknown OR the instruction cannot be
1171 * decoded (e.g., text page not present), then X86_BR_NONE is
1172 * returned.
1173 */
1174static int branch_type(unsigned long from, unsigned long to, int abort)
1175{
1176        struct insn insn;
1177        void *addr;
1178        int bytes_read, bytes_left;
1179        int ret = X86_BR_NONE;
1180        int ext, to_plm, from_plm;
1181        u8 buf[MAX_INSN_SIZE];
1182        int is64 = 0;
1183
1184        to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
1185        from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
1186
1187        /*
1188         * maybe zero if lbr did not fill up after a reset by the time
1189         * we get a PMU interrupt
1190         */
1191        if (from == 0 || to == 0)
1192                return X86_BR_NONE;
1193
1194        if (abort)
1195                return X86_BR_ABORT | to_plm;
1196
1197        if (from_plm == X86_BR_USER) {
1198                /*
1199                 * can happen if measuring at the user level only
1200                 * and we interrupt in a kernel thread, e.g., idle.
1201                 */
1202                if (!current->mm)
1203                        return X86_BR_NONE;
1204
1205                /* may fail if text not present */
1206                bytes_left = copy_from_user_nmi(buf, (void __user *)from,
1207                                                MAX_INSN_SIZE);
1208                bytes_read = MAX_INSN_SIZE - bytes_left;
1209                if (!bytes_read)
1210                        return X86_BR_NONE;
1211
1212                addr = buf;
1213        } else {
1214                /*
1215                 * The LBR logs any address in the IP, even if the IP just
1216                 * faulted. This means userspace can control the from address.
1217                 * Ensure we don't blindly read any address by validating it is
1218                 * a known text address.
1219                 */
1220                if (kernel_text_address(from)) {
1221                        addr = (void *)from;
1222                        /*
1223                         * Assume we can get the maximum possible size
1224                         * when grabbing kernel data.  This is not
1225                         * _strictly_ true since we could possibly be
1226                         * executing up next to a memory hole, but
1227                         * it is very unlikely to be a problem.
1228                         */
1229                        bytes_read = MAX_INSN_SIZE;
1230                } else {
1231                        return X86_BR_NONE;
1232                }
1233        }
1234
1235        /*
1236         * decoder needs to know the ABI especially
1237         * on 64-bit systems running 32-bit apps
1238         */
1239#ifdef CONFIG_X86_64
1240        is64 = kernel_ip((unsigned long)addr) || any_64bit_mode(current_pt_regs());
1241#endif
1242        insn_init(&insn, addr, bytes_read, is64);
1243        if (insn_get_opcode(&insn))
1244                return X86_BR_ABORT;
1245
1246        switch (insn.opcode.bytes[0]) {
1247        case 0xf:
1248                switch (insn.opcode.bytes[1]) {
1249                case 0x05: /* syscall */
1250                case 0x34: /* sysenter */
1251                        ret = X86_BR_SYSCALL;
1252                        break;
1253                case 0x07: /* sysret */
1254                case 0x35: /* sysexit */
1255                        ret = X86_BR_SYSRET;
1256                        break;
1257                case 0x80 ... 0x8f: /* conditional */
1258                        ret = X86_BR_JCC;
1259                        break;
1260                default:
1261                        ret = X86_BR_NONE;
1262                }
1263                break;
1264        case 0x70 ... 0x7f: /* conditional */
1265                ret = X86_BR_JCC;
1266                break;
1267        case 0xc2: /* near ret */
1268        case 0xc3: /* near ret */
1269        case 0xca: /* far ret */
1270        case 0xcb: /* far ret */
1271                ret = X86_BR_RET;
1272                break;
1273        case 0xcf: /* iret */
1274                ret = X86_BR_IRET;
1275                break;
1276        case 0xcc ... 0xce: /* int */
1277                ret = X86_BR_INT;
1278                break;
1279        case 0xe8: /* call near rel */
1280                if (insn_get_immediate(&insn) || insn.immediate1.value == 0) {
1281                        /* zero length call */
1282                        ret = X86_BR_ZERO_CALL;
1283                        break;
1284                }
1285                fallthrough;
1286        case 0x9a: /* call far absolute */
1287                ret = X86_BR_CALL;
1288                break;
1289        case 0xe0 ... 0xe3: /* loop jmp */
1290                ret = X86_BR_JCC;
1291                break;
1292        case 0xe9 ... 0xeb: /* jmp */
1293                ret = X86_BR_JMP;
1294                break;
1295        case 0xff: /* call near absolute, call far absolute ind */
1296                if (insn_get_modrm(&insn))
1297                        return X86_BR_ABORT;
1298
1299                ext = (insn.modrm.bytes[0] >> 3) & 0x7;
1300                switch (ext) {
1301                case 2: /* near ind call */
1302                case 3: /* far ind call */
1303                        ret = X86_BR_IND_CALL;
1304                        break;
1305                case 4:
1306                case 5:
1307                        ret = X86_BR_IND_JMP;
1308                        break;
1309                }
1310                break;
1311        default:
1312                ret = X86_BR_NONE;
1313        }
1314        /*
1315         * interrupts, traps, faults (and thus ring transition) may
1316         * occur on any instructions. Thus, to classify them correctly,
1317         * we need to first look at the from and to priv levels. If they
1318         * are different and to is in the kernel, then it indicates
1319         * a ring transition. If the from instruction is not a ring
1320         * transition instr (syscall, systenter, int), then it means
1321         * it was a irq, trap or fault.
1322         *
1323         * we have no way of detecting kernel to kernel faults.
1324         */
1325        if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
1326            && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
1327                ret = X86_BR_IRQ;
1328
1329        /*
1330         * branch priv level determined by target as
1331         * is done by HW when LBR_SELECT is implemented
1332         */
1333        if (ret != X86_BR_NONE)
1334                ret |= to_plm;
1335
1336        return ret;
1337}
1338
1339#define X86_BR_TYPE_MAP_MAX     16
1340
1341static int branch_map[X86_BR_TYPE_MAP_MAX] = {
1342        PERF_BR_CALL,           /* X86_BR_CALL */
1343        PERF_BR_RET,            /* X86_BR_RET */
1344        PERF_BR_SYSCALL,        /* X86_BR_SYSCALL */
1345        PERF_BR_SYSRET,         /* X86_BR_SYSRET */
1346        PERF_BR_UNKNOWN,        /* X86_BR_INT */
1347        PERF_BR_UNKNOWN,        /* X86_BR_IRET */
1348        PERF_BR_COND,           /* X86_BR_JCC */
1349        PERF_BR_UNCOND,         /* X86_BR_JMP */
1350        PERF_BR_UNKNOWN,        /* X86_BR_IRQ */
1351        PERF_BR_IND_CALL,       /* X86_BR_IND_CALL */
1352        PERF_BR_UNKNOWN,        /* X86_BR_ABORT */
1353        PERF_BR_UNKNOWN,        /* X86_BR_IN_TX */
1354        PERF_BR_UNKNOWN,        /* X86_BR_NO_TX */
1355        PERF_BR_CALL,           /* X86_BR_ZERO_CALL */
1356        PERF_BR_UNKNOWN,        /* X86_BR_CALL_STACK */
1357        PERF_BR_IND,            /* X86_BR_IND_JMP */
1358};
1359
1360static int
1361common_branch_type(int type)
1362{
1363        int i;
1364
1365        type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
1366
1367        if (type) {
1368                i = __ffs(type);
1369                if (i < X86_BR_TYPE_MAP_MAX)
1370                        return branch_map[i];
1371        }
1372
1373        return PERF_BR_UNKNOWN;
1374}
1375
1376enum {
1377        ARCH_LBR_BR_TYPE_JCC                    = 0,
1378        ARCH_LBR_BR_TYPE_NEAR_IND_JMP           = 1,
1379        ARCH_LBR_BR_TYPE_NEAR_REL_JMP           = 2,
1380        ARCH_LBR_BR_TYPE_NEAR_IND_CALL          = 3,
1381        ARCH_LBR_BR_TYPE_NEAR_REL_CALL          = 4,
1382        ARCH_LBR_BR_TYPE_NEAR_RET               = 5,
1383        ARCH_LBR_BR_TYPE_KNOWN_MAX              = ARCH_LBR_BR_TYPE_NEAR_RET,
1384
1385        ARCH_LBR_BR_TYPE_MAP_MAX                = 16,
1386};
1387
1388static const int arch_lbr_br_type_map[ARCH_LBR_BR_TYPE_MAP_MAX] = {
1389        [ARCH_LBR_BR_TYPE_JCC]                  = X86_BR_JCC,
1390        [ARCH_LBR_BR_TYPE_NEAR_IND_JMP]         = X86_BR_IND_JMP,
1391        [ARCH_LBR_BR_TYPE_NEAR_REL_JMP]         = X86_BR_JMP,
1392        [ARCH_LBR_BR_TYPE_NEAR_IND_CALL]        = X86_BR_IND_CALL,
1393        [ARCH_LBR_BR_TYPE_NEAR_REL_CALL]        = X86_BR_CALL,
1394        [ARCH_LBR_BR_TYPE_NEAR_RET]             = X86_BR_RET,
1395};
1396
1397/*
1398 * implement actual branch filter based on user demand.
1399 * Hardware may not exactly satisfy that request, thus
1400 * we need to inspect opcodes. Mismatched branches are
1401 * discarded. Therefore, the number of branches returned
1402 * in PERF_SAMPLE_BRANCH_STACK sample may vary.
1403 */
1404static void
1405intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
1406{
1407        u64 from, to;
1408        int br_sel = cpuc->br_sel;
1409        int i, j, type, to_plm;
1410        bool compress = false;
1411
1412        /* if sampling all branches, then nothing to filter */
1413        if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
1414            ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
1415                return;
1416
1417        for (i = 0; i < cpuc->lbr_stack.nr; i++) {
1418
1419                from = cpuc->lbr_entries[i].from;
1420                to = cpuc->lbr_entries[i].to;
1421                type = cpuc->lbr_entries[i].type;
1422
1423                /*
1424                 * Parse the branch type recorded in LBR_x_INFO MSR.
1425                 * Doesn't support OTHER_BRANCH decoding for now.
1426                 * OTHER_BRANCH branch type still rely on software decoding.
1427                 */
1428                if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
1429                    type <= ARCH_LBR_BR_TYPE_KNOWN_MAX) {
1430                        to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
1431                        type = arch_lbr_br_type_map[type] | to_plm;
1432                } else
1433                        type = branch_type(from, to, cpuc->lbr_entries[i].abort);
1434                if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
1435                        if (cpuc->lbr_entries[i].in_tx)
1436                                type |= X86_BR_IN_TX;
1437                        else
1438                                type |= X86_BR_NO_TX;
1439                }
1440
1441                /* if type does not correspond, then discard */
1442                if (type == X86_BR_NONE || (br_sel & type) != type) {
1443                        cpuc->lbr_entries[i].from = 0;
1444                        compress = true;
1445                }
1446
1447                if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
1448                        cpuc->lbr_entries[i].type = common_branch_type(type);
1449        }
1450
1451        if (!compress)
1452                return;
1453
1454        /* remove all entries with from=0 */
1455        for (i = 0; i < cpuc->lbr_stack.nr; ) {
1456                if (!cpuc->lbr_entries[i].from) {
1457                        j = i;
1458                        while (++j < cpuc->lbr_stack.nr)
1459                                cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
1460                        cpuc->lbr_stack.nr--;
1461                        if (!cpuc->lbr_entries[i].from)
1462                                continue;
1463                }
1464                i++;
1465        }
1466}
1467
1468void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr)
1469{
1470        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1471
1472        /* Cannot get TOS for large PEBS and Arch LBR */
1473        if (static_cpu_has(X86_FEATURE_ARCH_LBR) ||
1474            (cpuc->n_pebs == cpuc->n_large_pebs))
1475                cpuc->lbr_stack.hw_idx = -1ULL;
1476        else
1477                cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos();
1478
1479        intel_pmu_store_lbr(cpuc, lbr);
1480        intel_pmu_lbr_filter(cpuc);
1481}
1482
1483/*
1484 * Map interface branch filters onto LBR filters
1485 */
1486static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1487        [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
1488        [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
1489        [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
1490        [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
1491        [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_REL_JMP
1492                                                | LBR_IND_JMP | LBR_FAR,
1493        /*
1494         * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
1495         */
1496        [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
1497         LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
1498        /*
1499         * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
1500         */
1501        [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
1502        [PERF_SAMPLE_BRANCH_COND_SHIFT]     = LBR_JCC,
1503        [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
1504};
1505
1506static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1507        [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
1508        [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
1509        [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
1510        [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
1511        [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_FAR,
1512        [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = LBR_REL_CALL | LBR_IND_CALL
1513                                                | LBR_FAR,
1514        [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = LBR_IND_CALL,
1515        [PERF_SAMPLE_BRANCH_COND_SHIFT]         = LBR_JCC,
1516        [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]     = LBR_IND_JMP,
1517        [PERF_SAMPLE_BRANCH_CALL_SHIFT]         = LBR_REL_CALL,
1518};
1519
1520static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1521        [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
1522        [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
1523        [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
1524        [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
1525        [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_FAR,
1526        [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = LBR_REL_CALL | LBR_IND_CALL
1527                                                | LBR_FAR,
1528        [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = LBR_IND_CALL,
1529        [PERF_SAMPLE_BRANCH_COND_SHIFT]         = LBR_JCC,
1530        [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]   = LBR_REL_CALL | LBR_IND_CALL
1531                                                | LBR_RETURN | LBR_CALL_STACK,
1532        [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]     = LBR_IND_JMP,
1533        [PERF_SAMPLE_BRANCH_CALL_SHIFT]         = LBR_REL_CALL,
1534};
1535
1536static int arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1537        [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = ARCH_LBR_ANY,
1538        [PERF_SAMPLE_BRANCH_USER_SHIFT]         = ARCH_LBR_USER,
1539        [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = ARCH_LBR_KERNEL,
1540        [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
1541        [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = ARCH_LBR_RETURN |
1542                                                  ARCH_LBR_OTHER_BRANCH,
1543        [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = ARCH_LBR_REL_CALL |
1544                                                  ARCH_LBR_IND_CALL |
1545                                                  ARCH_LBR_OTHER_BRANCH,
1546        [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = ARCH_LBR_IND_CALL,
1547        [PERF_SAMPLE_BRANCH_COND_SHIFT]         = ARCH_LBR_JCC,
1548        [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]   = ARCH_LBR_REL_CALL |
1549                                                  ARCH_LBR_IND_CALL |
1550                                                  ARCH_LBR_RETURN |
1551                                                  ARCH_LBR_CALL_STACK,
1552        [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]     = ARCH_LBR_IND_JMP,
1553        [PERF_SAMPLE_BRANCH_CALL_SHIFT]         = ARCH_LBR_REL_CALL,
1554};
1555
1556/* core */
1557void __init intel_pmu_lbr_init_core(void)
1558{
1559        x86_pmu.lbr_nr     = 4;
1560        x86_pmu.lbr_tos    = MSR_LBR_TOS;
1561        x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
1562        x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
1563
1564        /*
1565         * SW branch filter usage:
1566         * - compensate for lack of HW filter
1567         */
1568}
1569
1570/* nehalem/westmere */
1571void __init intel_pmu_lbr_init_nhm(void)
1572{
1573        x86_pmu.lbr_nr     = 16;
1574        x86_pmu.lbr_tos    = MSR_LBR_TOS;
1575        x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
1576        x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
1577
1578        x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1579        x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
1580
1581        /*
1582         * SW branch filter usage:
1583         * - workaround LBR_SEL errata (see above)
1584         * - support syscall, sysret capture.
1585         *   That requires LBR_FAR but that means far
1586         *   jmp need to be filtered out
1587         */
1588}
1589
1590/* sandy bridge */
1591void __init intel_pmu_lbr_init_snb(void)
1592{
1593        x86_pmu.lbr_nr   = 16;
1594        x86_pmu.lbr_tos  = MSR_LBR_TOS;
1595        x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1596        x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
1597
1598        x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1599        x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
1600
1601        /*
1602         * SW branch filter usage:
1603         * - support syscall, sysret capture.
1604         *   That requires LBR_FAR but that means far
1605         *   jmp need to be filtered out
1606         */
1607}
1608
1609static inline struct kmem_cache *
1610create_lbr_kmem_cache(size_t size, size_t align)
1611{
1612        return kmem_cache_create("x86_lbr", size, align, 0, NULL);
1613}
1614
1615/* haswell */
1616void intel_pmu_lbr_init_hsw(void)
1617{
1618        size_t size = sizeof(struct x86_perf_task_context);
1619
1620        x86_pmu.lbr_nr   = 16;
1621        x86_pmu.lbr_tos  = MSR_LBR_TOS;
1622        x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1623        x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
1624
1625        x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1626        x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
1627
1628        x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1629
1630        if (lbr_from_signext_quirk_needed())
1631                static_branch_enable(&lbr_from_quirk_key);
1632}
1633
1634/* skylake */
1635__init void intel_pmu_lbr_init_skl(void)
1636{
1637        size_t size = sizeof(struct x86_perf_task_context);
1638
1639        x86_pmu.lbr_nr   = 32;
1640        x86_pmu.lbr_tos  = MSR_LBR_TOS;
1641        x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1642        x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
1643        x86_pmu.lbr_info = MSR_LBR_INFO_0;
1644
1645        x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1646        x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
1647
1648        x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1649
1650        /*
1651         * SW branch filter usage:
1652         * - support syscall, sysret capture.
1653         *   That requires LBR_FAR but that means far
1654         *   jmp need to be filtered out
1655         */
1656}
1657
1658/* atom */
1659void __init intel_pmu_lbr_init_atom(void)
1660{
1661        /*
1662         * only models starting at stepping 10 seems
1663         * to have an operational LBR which can freeze
1664         * on PMU interrupt
1665         */
1666        if (boot_cpu_data.x86_model == 28
1667            && boot_cpu_data.x86_stepping < 10) {
1668                pr_cont("LBR disabled due to erratum");
1669                return;
1670        }
1671
1672        x86_pmu.lbr_nr     = 8;
1673        x86_pmu.lbr_tos    = MSR_LBR_TOS;
1674        x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
1675        x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
1676
1677        /*
1678         * SW branch filter usage:
1679         * - compensate for lack of HW filter
1680         */
1681}
1682
1683/* slm */
1684void __init intel_pmu_lbr_init_slm(void)
1685{
1686        x86_pmu.lbr_nr     = 8;
1687        x86_pmu.lbr_tos    = MSR_LBR_TOS;
1688        x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
1689        x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
1690
1691        x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1692        x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
1693
1694        /*
1695         * SW branch filter usage:
1696         * - compensate for lack of HW filter
1697         */
1698        pr_cont("8-deep LBR, ");
1699}
1700
1701/* Knights Landing */
1702void intel_pmu_lbr_init_knl(void)
1703{
1704        x86_pmu.lbr_nr     = 8;
1705        x86_pmu.lbr_tos    = MSR_LBR_TOS;
1706        x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
1707        x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
1708
1709        x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1710        x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
1711
1712        /* Knights Landing does have MISPREDICT bit */
1713        if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP)
1714                x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
1715}
1716
1717/*
1718 * LBR state size is variable based on the max number of registers.
1719 * This calculates the expected state size, which should match
1720 * what the hardware enumerates for the size of XFEATURE_LBR.
1721 */
1722static inline unsigned int get_lbr_state_size(void)
1723{
1724        return sizeof(struct arch_lbr_state) +
1725               x86_pmu.lbr_nr * sizeof(struct lbr_entry);
1726}
1727
1728static bool is_arch_lbr_xsave_available(void)
1729{
1730        if (!boot_cpu_has(X86_FEATURE_XSAVES))
1731                return false;
1732
1733        /*
1734         * Check the LBR state with the corresponding software structure.
1735         * Disable LBR XSAVES support if the size doesn't match.
1736         */
1737        if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size()))
1738                return false;
1739
1740        return true;
1741}
1742
1743void __init intel_pmu_arch_lbr_init(void)
1744{
1745        struct pmu *pmu = x86_get_pmu(smp_processor_id());
1746        union cpuid28_eax eax;
1747        union cpuid28_ebx ebx;
1748        union cpuid28_ecx ecx;
1749        unsigned int unused_edx;
1750        bool arch_lbr_xsave;
1751        size_t size;
1752        u64 lbr_nr;
1753
1754        /* Arch LBR Capabilities */
1755        cpuid(28, &eax.full, &ebx.full, &ecx.full, &unused_edx);
1756
1757        lbr_nr = fls(eax.split.lbr_depth_mask) * 8;
1758        if (!lbr_nr)
1759                goto clear_arch_lbr;
1760
1761        /* Apply the max depth of Arch LBR */
1762        if (wrmsrl_safe(MSR_ARCH_LBR_DEPTH, lbr_nr))
1763                goto clear_arch_lbr;
1764
1765        x86_pmu.lbr_depth_mask = eax.split.lbr_depth_mask;
1766        x86_pmu.lbr_deep_c_reset = eax.split.lbr_deep_c_reset;
1767        x86_pmu.lbr_lip = eax.split.lbr_lip;
1768        x86_pmu.lbr_cpl = ebx.split.lbr_cpl;
1769        x86_pmu.lbr_filter = ebx.split.lbr_filter;
1770        x86_pmu.lbr_call_stack = ebx.split.lbr_call_stack;
1771        x86_pmu.lbr_mispred = ecx.split.lbr_mispred;
1772        x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr;
1773        x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
1774        x86_pmu.lbr_nr = lbr_nr;
1775
1776
1777        arch_lbr_xsave = is_arch_lbr_xsave_available();
1778        if (arch_lbr_xsave) {
1779                size = sizeof(struct x86_perf_task_context_arch_lbr_xsave) +
1780                       get_lbr_state_size();
1781                pmu->task_ctx_cache = create_lbr_kmem_cache(size,
1782                                                            XSAVE_ALIGNMENT);
1783        }
1784
1785        if (!pmu->task_ctx_cache) {
1786                arch_lbr_xsave = false;
1787
1788                size = sizeof(struct x86_perf_task_context_arch_lbr) +
1789                       lbr_nr * sizeof(struct lbr_entry);
1790                pmu->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1791        }
1792
1793        x86_pmu.lbr_from = MSR_ARCH_LBR_FROM_0;
1794        x86_pmu.lbr_to = MSR_ARCH_LBR_TO_0;
1795        x86_pmu.lbr_info = MSR_ARCH_LBR_INFO_0;
1796
1797        /* LBR callstack requires both CPL and Branch Filtering support */
1798        if (!x86_pmu.lbr_cpl ||
1799            !x86_pmu.lbr_filter ||
1800            !x86_pmu.lbr_call_stack)
1801                arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP;
1802
1803        if (!x86_pmu.lbr_cpl) {
1804                arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_NOT_SUPP;
1805                arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_NOT_SUPP;
1806        } else if (!x86_pmu.lbr_filter) {
1807                arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_NOT_SUPP;
1808                arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_NOT_SUPP;
1809                arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_NOT_SUPP;
1810                arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_NOT_SUPP;
1811                arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_NOT_SUPP;
1812                arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_NOT_SUPP;
1813                arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_NOT_SUPP;
1814        }
1815
1816        x86_pmu.lbr_ctl_mask = ARCH_LBR_CTL_MASK;
1817        x86_pmu.lbr_ctl_map  = arch_lbr_ctl_map;
1818
1819        if (!x86_pmu.lbr_cpl && !x86_pmu.lbr_filter)
1820                x86_pmu.lbr_ctl_map = NULL;
1821
1822        x86_pmu.lbr_reset = intel_pmu_arch_lbr_reset;
1823        if (arch_lbr_xsave) {
1824                x86_pmu.lbr_save = intel_pmu_arch_lbr_xsaves;
1825                x86_pmu.lbr_restore = intel_pmu_arch_lbr_xrstors;
1826                x86_pmu.lbr_read = intel_pmu_arch_lbr_read_xsave;
1827                pr_cont("XSAVE ");
1828        } else {
1829                x86_pmu.lbr_save = intel_pmu_arch_lbr_save;
1830                x86_pmu.lbr_restore = intel_pmu_arch_lbr_restore;
1831                x86_pmu.lbr_read = intel_pmu_arch_lbr_read;
1832        }
1833
1834        pr_cont("Architectural LBR, ");
1835
1836        return;
1837
1838clear_arch_lbr:
1839        clear_cpu_cap(&boot_cpu_data, X86_FEATURE_ARCH_LBR);
1840}
1841
1842/**
1843 * x86_perf_get_lbr - get the LBR records information
1844 *
1845 * @lbr: the caller's memory to store the LBR records information
1846 *
1847 * Returns: 0 indicates the LBR info has been successfully obtained
1848 */
1849int x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
1850{
1851        int lbr_fmt = x86_pmu.intel_cap.lbr_format;
1852
1853        lbr->nr = x86_pmu.lbr_nr;
1854        lbr->from = x86_pmu.lbr_from;
1855        lbr->to = x86_pmu.lbr_to;
1856        lbr->info = (lbr_fmt == LBR_FORMAT_INFO) ? x86_pmu.lbr_info : 0;
1857
1858        return 0;
1859}
1860EXPORT_SYMBOL_GPL(x86_perf_get_lbr);
1861
1862struct event_constraint vlbr_constraint =
1863        __EVENT_CONSTRAINT(INTEL_FIXED_VLBR_EVENT, (1ULL << INTEL_PMC_IDX_FIXED_VLBR),
1864                          FIXED_EVENT_FLAGS, 1, 0, PERF_X86_EVENT_LBR_SELECT);
1865