linux/arch/x86/kvm/vmx/pmu_intel.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * KVM PMU support for Intel CPUs
   4 *
   5 * Copyright 2011 Red Hat, Inc. and/or its affiliates.
   6 *
   7 * Authors:
   8 *   Avi Kivity   <avi@redhat.com>
   9 *   Gleb Natapov <gleb@redhat.com>
  10 */
  11#include <linux/types.h>
  12#include <linux/kvm_host.h>
  13#include <linux/perf_event.h>
  14#include <asm/perf_event.h>
  15#include "x86.h"
  16#include "cpuid.h"
  17#include "lapic.h"
  18#include "nested.h"
  19#include "pmu.h"
  20
  21#define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
  22
  23static struct kvm_event_hw_type_mapping intel_arch_events[] = {
  24        /* Index must match CPUID 0x0A.EBX bit vector */
  25        [0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES },
  26        [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
  27        [2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES  },
  28        [3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES },
  29        [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
  30        [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
  31        [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
  32        [7] = { 0x00, 0x03, PERF_COUNT_HW_REF_CPU_CYCLES },
  33};
  34
  35/* mapping between fixed pmc index and intel_arch_events array */
  36static int fixed_pmc_events[] = {1, 0, 7};
  37
  38static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
  39{
  40        int i;
  41
  42        for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
  43                u8 new_ctrl = fixed_ctrl_field(data, i);
  44                u8 old_ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, i);
  45                struct kvm_pmc *pmc;
  46
  47                pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i);
  48
  49                if (old_ctrl == new_ctrl)
  50                        continue;
  51
  52                __set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use);
  53                reprogram_fixed_counter(pmc, new_ctrl, i);
  54        }
  55
  56        pmu->fixed_ctr_ctrl = data;
  57}
  58
  59/* function is called when global control register has been updated. */
  60static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data)
  61{
  62        int bit;
  63        u64 diff = pmu->global_ctrl ^ data;
  64
  65        pmu->global_ctrl = data;
  66
  67        for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX)
  68                reprogram_counter(pmu, bit);
  69}
  70
  71static unsigned intel_find_arch_event(struct kvm_pmu *pmu,
  72                                      u8 event_select,
  73                                      u8 unit_mask)
  74{
  75        int i;
  76
  77        for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++)
  78                if (intel_arch_events[i].eventsel == event_select
  79                    && intel_arch_events[i].unit_mask == unit_mask
  80                    && (pmu->available_event_types & (1 << i)))
  81                        break;
  82
  83        if (i == ARRAY_SIZE(intel_arch_events))
  84                return PERF_COUNT_HW_MAX;
  85
  86        return intel_arch_events[i].event_type;
  87}
  88
  89static unsigned intel_find_fixed_event(int idx)
  90{
  91        u32 event;
  92        size_t size = ARRAY_SIZE(fixed_pmc_events);
  93
  94        if (idx >= size)
  95                return PERF_COUNT_HW_MAX;
  96
  97        event = fixed_pmc_events[array_index_nospec(idx, size)];
  98        return intel_arch_events[event].event_type;
  99}
 100
 101/* check if a PMC is enabled by comparing it with globl_ctrl bits. */
 102static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
 103{
 104        struct kvm_pmu *pmu = pmc_to_pmu(pmc);
 105
 106        return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
 107}
 108
 109static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
 110{
 111        if (pmc_idx < INTEL_PMC_IDX_FIXED)
 112                return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + pmc_idx,
 113                                  MSR_P6_EVNTSEL0);
 114        else {
 115                u32 idx = pmc_idx - INTEL_PMC_IDX_FIXED;
 116
 117                return get_fixed_pmc(pmu, idx + MSR_CORE_PERF_FIXED_CTR0);
 118        }
 119}
 120
 121/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
 122static int intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
 123{
 124        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 125        bool fixed = idx & (1u << 30);
 126
 127        idx &= ~(3u << 30);
 128
 129        return (!fixed && idx >= pmu->nr_arch_gp_counters) ||
 130                (fixed && idx >= pmu->nr_arch_fixed_counters);
 131}
 132
 133static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
 134                                            unsigned int idx, u64 *mask)
 135{
 136        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 137        bool fixed = idx & (1u << 30);
 138        struct kvm_pmc *counters;
 139        unsigned int num_counters;
 140
 141        idx &= ~(3u << 30);
 142        if (fixed) {
 143                counters = pmu->fixed_counters;
 144                num_counters = pmu->nr_arch_fixed_counters;
 145        } else {
 146                counters = pmu->gp_counters;
 147                num_counters = pmu->nr_arch_gp_counters;
 148        }
 149        if (idx >= num_counters)
 150                return NULL;
 151        *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP];
 152        return &counters[array_index_nospec(idx, num_counters)];
 153}
 154
 155static inline u64 vcpu_get_perf_capabilities(struct kvm_vcpu *vcpu)
 156{
 157        if (!guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
 158                return 0;
 159
 160        return vcpu->arch.perf_capabilities;
 161}
 162
 163static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu)
 164{
 165        return (vcpu_get_perf_capabilities(vcpu) & PMU_CAP_FW_WRITES) != 0;
 166}
 167
 168static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr)
 169{
 170        if (!fw_writes_is_enabled(pmu_to_vcpu(pmu)))
 171                return NULL;
 172
 173        return get_gp_pmc(pmu, msr, MSR_IA32_PMC0);
 174}
 175
 176bool intel_pmu_lbr_is_compatible(struct kvm_vcpu *vcpu)
 177{
 178        /*
 179         * As a first step, a guest could only enable LBR feature if its
 180         * cpu model is the same as the host because the LBR registers
 181         * would be pass-through to the guest and they're model specific.
 182         */
 183        return boot_cpu_data.x86_model == guest_cpuid_model(vcpu);
 184}
 185
 186bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu)
 187{
 188        struct x86_pmu_lbr *lbr = vcpu_to_lbr_records(vcpu);
 189
 190        return lbr->nr && (vcpu_get_perf_capabilities(vcpu) & PMU_CAP_LBR_FMT);
 191}
 192
 193static bool intel_pmu_is_valid_lbr_msr(struct kvm_vcpu *vcpu, u32 index)
 194{
 195        struct x86_pmu_lbr *records = vcpu_to_lbr_records(vcpu);
 196        bool ret = false;
 197
 198        if (!intel_pmu_lbr_is_enabled(vcpu))
 199                return ret;
 200
 201        ret = (index == MSR_LBR_SELECT) || (index == MSR_LBR_TOS) ||
 202                (index >= records->from && index < records->from + records->nr) ||
 203                (index >= records->to && index < records->to + records->nr);
 204
 205        if (!ret && records->info)
 206                ret = (index >= records->info && index < records->info + records->nr);
 207
 208        return ret;
 209}
 210
 211static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
 212{
 213        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 214        int ret;
 215
 216        switch (msr) {
 217        case MSR_CORE_PERF_FIXED_CTR_CTRL:
 218        case MSR_CORE_PERF_GLOBAL_STATUS:
 219        case MSR_CORE_PERF_GLOBAL_CTRL:
 220        case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 221                ret = pmu->version > 1;
 222                break;
 223        default:
 224                ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
 225                        get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) ||
 226                        get_fixed_pmc(pmu, msr) || get_fw_gp_pmc(pmu, msr) ||
 227                        intel_pmu_is_valid_lbr_msr(vcpu, msr);
 228                break;
 229        }
 230
 231        return ret;
 232}
 233
 234static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
 235{
 236        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 237        struct kvm_pmc *pmc;
 238
 239        pmc = get_fixed_pmc(pmu, msr);
 240        pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0);
 241        pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0);
 242
 243        return pmc;
 244}
 245
 246static inline void intel_pmu_release_guest_lbr_event(struct kvm_vcpu *vcpu)
 247{
 248        struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
 249
 250        if (lbr_desc->event) {
 251                perf_event_release_kernel(lbr_desc->event);
 252                lbr_desc->event = NULL;
 253                vcpu_to_pmu(vcpu)->event_count--;
 254        }
 255}
 256
 257int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu)
 258{
 259        struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
 260        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 261        struct perf_event *event;
 262
 263        /*
 264         * The perf_event_attr is constructed in the minimum efficient way:
 265         * - set 'pinned = true' to make it task pinned so that if another
 266         *   cpu pinned event reclaims LBR, the event->oncpu will be set to -1;
 267         * - set '.exclude_host = true' to record guest branches behavior;
 268         *
 269         * - set '.config = INTEL_FIXED_VLBR_EVENT' to indicates host perf
 270         *   schedule the event without a real HW counter but a fake one;
 271         *   check is_guest_lbr_event() and __intel_get_event_constraints();
 272         *
 273         * - set 'sample_type = PERF_SAMPLE_BRANCH_STACK' and
 274         *   'branch_sample_type = PERF_SAMPLE_BRANCH_CALL_STACK |
 275         *   PERF_SAMPLE_BRANCH_USER' to configure it as a LBR callstack
 276         *   event, which helps KVM to save/restore guest LBR records
 277         *   during host context switches and reduces quite a lot overhead,
 278         *   check branch_user_callstack() and intel_pmu_lbr_sched_task();
 279         */
 280        struct perf_event_attr attr = {
 281                .type = PERF_TYPE_RAW,
 282                .size = sizeof(attr),
 283                .config = INTEL_FIXED_VLBR_EVENT,
 284                .sample_type = PERF_SAMPLE_BRANCH_STACK,
 285                .pinned = true,
 286                .exclude_host = true,
 287                .branch_sample_type = PERF_SAMPLE_BRANCH_CALL_STACK |
 288                                        PERF_SAMPLE_BRANCH_USER,
 289        };
 290
 291        if (unlikely(lbr_desc->event)) {
 292                __set_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use);
 293                return 0;
 294        }
 295
 296        event = perf_event_create_kernel_counter(&attr, -1,
 297                                                current, NULL, NULL);
 298        if (IS_ERR(event)) {
 299                pr_debug_ratelimited("%s: failed %ld\n",
 300                                        __func__, PTR_ERR(event));
 301                return PTR_ERR(event);
 302        }
 303        lbr_desc->event = event;
 304        pmu->event_count++;
 305        __set_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use);
 306        return 0;
 307}
 308
 309/*
 310 * It's safe to access LBR msrs from guest when they have not
 311 * been passthrough since the host would help restore or reset
 312 * the LBR msrs records when the guest LBR event is scheduled in.
 313 */
 314static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu *vcpu,
 315                                     struct msr_data *msr_info, bool read)
 316{
 317        struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
 318        u32 index = msr_info->index;
 319
 320        if (!intel_pmu_is_valid_lbr_msr(vcpu, index))
 321                return false;
 322
 323        if (!lbr_desc->event && intel_pmu_create_guest_lbr_event(vcpu) < 0)
 324                goto dummy;
 325
 326        /*
 327         * Disable irq to ensure the LBR feature doesn't get reclaimed by the
 328         * host at the time the value is read from the msr, and this avoids the
 329         * host LBR value to be leaked to the guest. If LBR has been reclaimed,
 330         * return 0 on guest reads.
 331         */
 332        local_irq_disable();
 333        if (lbr_desc->event->state == PERF_EVENT_STATE_ACTIVE) {
 334                if (read)
 335                        rdmsrl(index, msr_info->data);
 336                else
 337                        wrmsrl(index, msr_info->data);
 338                __set_bit(INTEL_PMC_IDX_FIXED_VLBR, vcpu_to_pmu(vcpu)->pmc_in_use);
 339                local_irq_enable();
 340                return true;
 341        }
 342        clear_bit(INTEL_PMC_IDX_FIXED_VLBR, vcpu_to_pmu(vcpu)->pmc_in_use);
 343        local_irq_enable();
 344
 345dummy:
 346        if (read)
 347                msr_info->data = 0;
 348        return true;
 349}
 350
 351static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 352{
 353        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 354        struct kvm_pmc *pmc;
 355        u32 msr = msr_info->index;
 356
 357        switch (msr) {
 358        case MSR_CORE_PERF_FIXED_CTR_CTRL:
 359                msr_info->data = pmu->fixed_ctr_ctrl;
 360                return 0;
 361        case MSR_CORE_PERF_GLOBAL_STATUS:
 362                msr_info->data = pmu->global_status;
 363                return 0;
 364        case MSR_CORE_PERF_GLOBAL_CTRL:
 365                msr_info->data = pmu->global_ctrl;
 366                return 0;
 367        case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 368                msr_info->data = pmu->global_ovf_ctrl;
 369                return 0;
 370        default:
 371                if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
 372                    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
 373                        u64 val = pmc_read_counter(pmc);
 374                        msr_info->data =
 375                                val & pmu->counter_bitmask[KVM_PMC_GP];
 376                        return 0;
 377                } else if ((pmc = get_fixed_pmc(pmu, msr))) {
 378                        u64 val = pmc_read_counter(pmc);
 379                        msr_info->data =
 380                                val & pmu->counter_bitmask[KVM_PMC_FIXED];
 381                        return 0;
 382                } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
 383                        msr_info->data = pmc->eventsel;
 384                        return 0;
 385                } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true))
 386                        return 0;
 387        }
 388
 389        return 1;
 390}
 391
 392static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 393{
 394        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 395        struct kvm_pmc *pmc;
 396        u32 msr = msr_info->index;
 397        u64 data = msr_info->data;
 398
 399        switch (msr) {
 400        case MSR_CORE_PERF_FIXED_CTR_CTRL:
 401                if (pmu->fixed_ctr_ctrl == data)
 402                        return 0;
 403                if (!(data & 0xfffffffffffff444ull)) {
 404                        reprogram_fixed_counters(pmu, data);
 405                        return 0;
 406                }
 407                break;
 408        case MSR_CORE_PERF_GLOBAL_STATUS:
 409                if (msr_info->host_initiated) {
 410                        pmu->global_status = data;
 411                        return 0;
 412                }
 413                break; /* RO MSR */
 414        case MSR_CORE_PERF_GLOBAL_CTRL:
 415                if (pmu->global_ctrl == data)
 416                        return 0;
 417                if (kvm_valid_perf_global_ctrl(pmu, data)) {
 418                        global_ctrl_changed(pmu, data);
 419                        return 0;
 420                }
 421                break;
 422        case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 423                if (!(data & pmu->global_ovf_ctrl_mask)) {
 424                        if (!msr_info->host_initiated)
 425                                pmu->global_status &= ~data;
 426                        pmu->global_ovf_ctrl = data;
 427                        return 0;
 428                }
 429                break;
 430        default:
 431                if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
 432                    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
 433                        if ((msr & MSR_PMC_FULL_WIDTH_BIT) &&
 434                            (data & ~pmu->counter_bitmask[KVM_PMC_GP]))
 435                                return 1;
 436                        if (!msr_info->host_initiated &&
 437                            !(msr & MSR_PMC_FULL_WIDTH_BIT))
 438                                data = (s64)(s32)data;
 439                        pmc->counter += data - pmc_read_counter(pmc);
 440                        if (pmc->perf_event)
 441                                perf_event_period(pmc->perf_event,
 442                                                  get_sample_period(pmc, data));
 443                        return 0;
 444                } else if ((pmc = get_fixed_pmc(pmu, msr))) {
 445                        pmc->counter += data - pmc_read_counter(pmc);
 446                        if (pmc->perf_event)
 447                                perf_event_period(pmc->perf_event,
 448                                                  get_sample_period(pmc, data));
 449                        return 0;
 450                } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
 451                        if (data == pmc->eventsel)
 452                                return 0;
 453                        if (!(data & pmu->reserved_bits)) {
 454                                reprogram_gp_counter(pmc, data);
 455                                return 0;
 456                        }
 457                } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false))
 458                        return 0;
 459        }
 460
 461        return 1;
 462}
 463
 464static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 465{
 466        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 467        struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
 468
 469        struct x86_pmu_capability x86_pmu;
 470        struct kvm_cpuid_entry2 *entry;
 471        union cpuid10_eax eax;
 472        union cpuid10_edx edx;
 473
 474        pmu->nr_arch_gp_counters = 0;
 475        pmu->nr_arch_fixed_counters = 0;
 476        pmu->counter_bitmask[KVM_PMC_GP] = 0;
 477        pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
 478        pmu->version = 0;
 479        pmu->reserved_bits = 0xffffffff00200000ull;
 480
 481        entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
 482        if (!entry)
 483                return;
 484        eax.full = entry->eax;
 485        edx.full = entry->edx;
 486
 487        pmu->version = eax.split.version_id;
 488        if (!pmu->version)
 489                return;
 490
 491        perf_get_x86_pmu_capability(&x86_pmu);
 492
 493        pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
 494                                         x86_pmu.num_counters_gp);
 495        eax.split.bit_width = min_t(int, eax.split.bit_width, x86_pmu.bit_width_gp);
 496        pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
 497        eax.split.mask_length = min_t(int, eax.split.mask_length, x86_pmu.events_mask_len);
 498        pmu->available_event_types = ~entry->ebx &
 499                                        ((1ull << eax.split.mask_length) - 1);
 500
 501        if (pmu->version == 1) {
 502                pmu->nr_arch_fixed_counters = 0;
 503        } else {
 504                pmu->nr_arch_fixed_counters =
 505                        min_t(int, edx.split.num_counters_fixed,
 506                              x86_pmu.num_counters_fixed);
 507                edx.split.bit_width_fixed = min_t(int,
 508                        edx.split.bit_width_fixed, x86_pmu.bit_width_fixed);
 509                pmu->counter_bitmask[KVM_PMC_FIXED] =
 510                        ((u64)1 << edx.split.bit_width_fixed) - 1;
 511        }
 512
 513        pmu->global_ctrl = ((1ull << pmu->nr_arch_gp_counters) - 1) |
 514                (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED);
 515        pmu->global_ctrl_mask = ~pmu->global_ctrl;
 516        pmu->global_ovf_ctrl_mask = pmu->global_ctrl_mask
 517                        & ~(MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF |
 518                            MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD);
 519        if (vmx_pt_mode_is_host_guest())
 520                pmu->global_ovf_ctrl_mask &=
 521                                ~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI;
 522
 523        entry = kvm_find_cpuid_entry(vcpu, 7, 0);
 524        if (entry &&
 525            (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) &&
 526            (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM)))
 527                pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED;
 528
 529        bitmap_set(pmu->all_valid_pmc_idx,
 530                0, pmu->nr_arch_gp_counters);
 531        bitmap_set(pmu->all_valid_pmc_idx,
 532                INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters);
 533
 534        nested_vmx_pmu_entry_exit_ctls_update(vcpu);
 535
 536        if (intel_pmu_lbr_is_compatible(vcpu))
 537                x86_perf_get_lbr(&lbr_desc->records);
 538        else
 539                lbr_desc->records.nr = 0;
 540
 541        if (lbr_desc->records.nr)
 542                bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1);
 543}
 544
 545static void intel_pmu_init(struct kvm_vcpu *vcpu)
 546{
 547        int i;
 548        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 549        struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
 550
 551        for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
 552                pmu->gp_counters[i].type = KVM_PMC_GP;
 553                pmu->gp_counters[i].vcpu = vcpu;
 554                pmu->gp_counters[i].idx = i;
 555                pmu->gp_counters[i].current_config = 0;
 556        }
 557
 558        for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
 559                pmu->fixed_counters[i].type = KVM_PMC_FIXED;
 560                pmu->fixed_counters[i].vcpu = vcpu;
 561                pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
 562                pmu->fixed_counters[i].current_config = 0;
 563        }
 564
 565        vcpu->arch.perf_capabilities = vmx_get_perf_capabilities();
 566        lbr_desc->records.nr = 0;
 567        lbr_desc->event = NULL;
 568        lbr_desc->msr_passthrough = false;
 569}
 570
 571static void intel_pmu_reset(struct kvm_vcpu *vcpu)
 572{
 573        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 574        struct kvm_pmc *pmc = NULL;
 575        int i;
 576
 577        for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
 578                pmc = &pmu->gp_counters[i];
 579
 580                pmc_stop_counter(pmc);
 581                pmc->counter = pmc->eventsel = 0;
 582        }
 583
 584        for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
 585                pmc = &pmu->fixed_counters[i];
 586
 587                pmc_stop_counter(pmc);
 588                pmc->counter = 0;
 589        }
 590
 591        pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status =
 592                pmu->global_ovf_ctrl = 0;
 593
 594        intel_pmu_release_guest_lbr_event(vcpu);
 595}
 596
 597/*
 598 * Emulate LBR_On_PMI behavior for 1 < pmu.version < 4.
 599 *
 600 * If Freeze_LBR_On_PMI = 1, the LBR is frozen on PMI and
 601 * the KVM emulates to clear the LBR bit (bit 0) in IA32_DEBUGCTL.
 602 *
 603 * Guest needs to re-enable LBR to resume branches recording.
 604 */
 605static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu)
 606{
 607        u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL);
 608
 609        if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
 610                data &= ~DEBUGCTLMSR_LBR;
 611                vmcs_write64(GUEST_IA32_DEBUGCTL, data);
 612        }
 613}
 614
 615static void intel_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
 616{
 617        u8 version = vcpu_to_pmu(vcpu)->version;
 618
 619        if (!intel_pmu_lbr_is_enabled(vcpu))
 620                return;
 621
 622        if (version > 1 && version < 4)
 623                intel_pmu_legacy_freezing_lbrs_on_pmi(vcpu);
 624}
 625
 626static void vmx_update_intercept_for_lbr_msrs(struct kvm_vcpu *vcpu, bool set)
 627{
 628        struct x86_pmu_lbr *lbr = vcpu_to_lbr_records(vcpu);
 629        int i;
 630
 631        for (i = 0; i < lbr->nr; i++) {
 632                vmx_set_intercept_for_msr(vcpu, lbr->from + i, MSR_TYPE_RW, set);
 633                vmx_set_intercept_for_msr(vcpu, lbr->to + i, MSR_TYPE_RW, set);
 634                if (lbr->info)
 635                        vmx_set_intercept_for_msr(vcpu, lbr->info + i, MSR_TYPE_RW, set);
 636        }
 637
 638        vmx_set_intercept_for_msr(vcpu, MSR_LBR_SELECT, MSR_TYPE_RW, set);
 639        vmx_set_intercept_for_msr(vcpu, MSR_LBR_TOS, MSR_TYPE_RW, set);
 640}
 641
 642static inline void vmx_disable_lbr_msrs_passthrough(struct kvm_vcpu *vcpu)
 643{
 644        struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
 645
 646        if (!lbr_desc->msr_passthrough)
 647                return;
 648
 649        vmx_update_intercept_for_lbr_msrs(vcpu, true);
 650        lbr_desc->msr_passthrough = false;
 651}
 652
 653static inline void vmx_enable_lbr_msrs_passthrough(struct kvm_vcpu *vcpu)
 654{
 655        struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
 656
 657        if (lbr_desc->msr_passthrough)
 658                return;
 659
 660        vmx_update_intercept_for_lbr_msrs(vcpu, false);
 661        lbr_desc->msr_passthrough = true;
 662}
 663
 664/*
 665 * Higher priority host perf events (e.g. cpu pinned) could reclaim the
 666 * pmu resources (e.g. LBR) that were assigned to the guest. This is
 667 * usually done via ipi calls (more details in perf_install_in_context).
 668 *
 669 * Before entering the non-root mode (with irq disabled here), double
 670 * confirm that the pmu features enabled to the guest are not reclaimed
 671 * by higher priority host events. Otherwise, disallow vcpu's access to
 672 * the reclaimed features.
 673 */
 674void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu)
 675{
 676        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 677        struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
 678
 679        if (!lbr_desc->event) {
 680                vmx_disable_lbr_msrs_passthrough(vcpu);
 681                if (vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR)
 682                        goto warn;
 683                if (test_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use))
 684                        goto warn;
 685                return;
 686        }
 687
 688        if (lbr_desc->event->state < PERF_EVENT_STATE_ACTIVE) {
 689                vmx_disable_lbr_msrs_passthrough(vcpu);
 690                __clear_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use);
 691                goto warn;
 692        } else
 693                vmx_enable_lbr_msrs_passthrough(vcpu);
 694
 695        return;
 696
 697warn:
 698        pr_warn_ratelimited("kvm: vcpu-%d: fail to passthrough LBR.\n",
 699                vcpu->vcpu_id);
 700}
 701
 702static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
 703{
 704        if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR))
 705                intel_pmu_release_guest_lbr_event(vcpu);
 706}
 707
 708struct kvm_pmu_ops intel_pmu_ops = {
 709        .find_arch_event = intel_find_arch_event,
 710        .find_fixed_event = intel_find_fixed_event,
 711        .pmc_is_enabled = intel_pmc_is_enabled,
 712        .pmc_idx_to_pmc = intel_pmc_idx_to_pmc,
 713        .rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc,
 714        .msr_idx_to_pmc = intel_msr_idx_to_pmc,
 715        .is_valid_rdpmc_ecx = intel_is_valid_rdpmc_ecx,
 716        .is_valid_msr = intel_is_valid_msr,
 717        .get_msr = intel_pmu_get_msr,
 718        .set_msr = intel_pmu_set_msr,
 719        .refresh = intel_pmu_refresh,
 720        .init = intel_pmu_init,
 721        .reset = intel_pmu_reset,
 722        .deliver_pmi = intel_pmu_deliver_pmi,
 723        .cleanup = intel_pmu_cleanup,
 724};
 725