linux/arch/arm64/kvm/pmu-emul.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2015 Linaro Ltd.
   4 * Author: Shannon Zhao <shannon.zhao@linaro.org>
   5 */
   6
   7#include <linux/cpu.h>
   8#include <linux/kvm.h>
   9#include <linux/kvm_host.h>
  10#include <linux/perf_event.h>
  11#include <linux/perf/arm_pmu.h>
  12#include <linux/uaccess.h>
  13#include <asm/kvm_emulate.h>
  14#include <kvm/arm_pmu.h>
  15#include <kvm/arm_vgic.h>
  16
  17static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
  18static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
  19static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
  20
  21#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
  22
  23static u32 kvm_pmu_event_mask(struct kvm *kvm)
  24{
  25        switch (kvm->arch.pmuver) {
  26        case ID_AA64DFR0_PMUVER_8_0:
  27                return GENMASK(9, 0);
  28        case ID_AA64DFR0_PMUVER_8_1:
  29        case ID_AA64DFR0_PMUVER_8_4:
  30        case ID_AA64DFR0_PMUVER_8_5:
  31                return GENMASK(15, 0);
  32        default:                /* Shouldn't be here, just for sanity */
  33                WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver);
  34                return 0;
  35        }
  36}
  37
  38/**
  39 * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
  40 * @vcpu: The vcpu pointer
  41 * @select_idx: The counter index
  42 */
  43static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
  44{
  45        return (select_idx == ARMV8_PMU_CYCLE_IDX &&
  46                __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
  47}
  48
  49static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
  50{
  51        struct kvm_pmu *pmu;
  52        struct kvm_vcpu_arch *vcpu_arch;
  53
  54        pmc -= pmc->idx;
  55        pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
  56        vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
  57        return container_of(vcpu_arch, struct kvm_vcpu, arch);
  58}
  59
  60/**
  61 * kvm_pmu_pmc_is_chained - determine if the pmc is chained
  62 * @pmc: The PMU counter pointer
  63 */
  64static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
  65{
  66        struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
  67
  68        return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
  69}
  70
  71/**
  72 * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
  73 * @select_idx: The counter index
  74 */
  75static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
  76{
  77        return select_idx & 0x1;
  78}
  79
  80/**
  81 * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
  82 * @pmc: The PMU counter pointer
  83 *
  84 * When a pair of PMCs are chained together we use the low counter (canonical)
  85 * to hold the underlying perf event.
  86 */
  87static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
  88{
  89        if (kvm_pmu_pmc_is_chained(pmc) &&
  90            kvm_pmu_idx_is_high_counter(pmc->idx))
  91                return pmc - 1;
  92
  93        return pmc;
  94}
  95static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
  96{
  97        if (kvm_pmu_idx_is_high_counter(pmc->idx))
  98                return pmc - 1;
  99        else
 100                return pmc + 1;
 101}
 102
 103/**
 104 * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
 105 * @vcpu: The vcpu pointer
 106 * @select_idx: The counter index
 107 */
 108static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
 109{
 110        u64 eventsel, reg;
 111
 112        select_idx |= 0x1;
 113
 114        if (select_idx == ARMV8_PMU_CYCLE_IDX)
 115                return false;
 116
 117        reg = PMEVTYPER0_EL0 + select_idx;
 118        eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm);
 119
 120        return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
 121}
 122
 123/**
 124 * kvm_pmu_get_pair_counter_value - get PMU counter value
 125 * @vcpu: The vcpu pointer
 126 * @pmc: The PMU counter pointer
 127 */
 128static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
 129                                          struct kvm_pmc *pmc)
 130{
 131        u64 counter, counter_high, reg, enabled, running;
 132
 133        if (kvm_pmu_pmc_is_chained(pmc)) {
 134                pmc = kvm_pmu_get_canonical_pmc(pmc);
 135                reg = PMEVCNTR0_EL0 + pmc->idx;
 136
 137                counter = __vcpu_sys_reg(vcpu, reg);
 138                counter_high = __vcpu_sys_reg(vcpu, reg + 1);
 139
 140                counter = lower_32_bits(counter) | (counter_high << 32);
 141        } else {
 142                reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
 143                      ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
 144                counter = __vcpu_sys_reg(vcpu, reg);
 145        }
 146
 147        /*
 148         * The real counter value is equal to the value of counter register plus
 149         * the value perf event counts.
 150         */
 151        if (pmc->perf_event)
 152                counter += perf_event_read_value(pmc->perf_event, &enabled,
 153                                                 &running);
 154
 155        return counter;
 156}
 157
 158/**
 159 * kvm_pmu_get_counter_value - get PMU counter value
 160 * @vcpu: The vcpu pointer
 161 * @select_idx: The counter index
 162 */
 163u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
 164{
 165        u64 counter;
 166        struct kvm_pmu *pmu = &vcpu->arch.pmu;
 167        struct kvm_pmc *pmc = &pmu->pmc[select_idx];
 168
 169        counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
 170
 171        if (kvm_pmu_pmc_is_chained(pmc) &&
 172            kvm_pmu_idx_is_high_counter(select_idx))
 173                counter = upper_32_bits(counter);
 174        else if (select_idx != ARMV8_PMU_CYCLE_IDX)
 175                counter = lower_32_bits(counter);
 176
 177        return counter;
 178}
 179
 180/**
 181 * kvm_pmu_set_counter_value - set PMU counter value
 182 * @vcpu: The vcpu pointer
 183 * @select_idx: The counter index
 184 * @val: The counter value
 185 */
 186void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
 187{
 188        u64 reg;
 189
 190        reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
 191              ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
 192        __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
 193
 194        /* Recreate the perf event to reflect the updated sample_period */
 195        kvm_pmu_create_perf_event(vcpu, select_idx);
 196}
 197
 198/**
 199 * kvm_pmu_release_perf_event - remove the perf event
 200 * @pmc: The PMU counter pointer
 201 */
 202static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
 203{
 204        pmc = kvm_pmu_get_canonical_pmc(pmc);
 205        if (pmc->perf_event) {
 206                perf_event_disable(pmc->perf_event);
 207                perf_event_release_kernel(pmc->perf_event);
 208                pmc->perf_event = NULL;
 209        }
 210}
 211
 212/**
 213 * kvm_pmu_stop_counter - stop PMU counter
 214 * @pmc: The PMU counter pointer
 215 *
 216 * If this counter has been configured to monitor some event, release it here.
 217 */
 218static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
 219{
 220        u64 counter, reg, val;
 221
 222        pmc = kvm_pmu_get_canonical_pmc(pmc);
 223        if (!pmc->perf_event)
 224                return;
 225
 226        counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
 227
 228        if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
 229                reg = PMCCNTR_EL0;
 230                val = counter;
 231        } else {
 232                reg = PMEVCNTR0_EL0 + pmc->idx;
 233                val = lower_32_bits(counter);
 234        }
 235
 236        __vcpu_sys_reg(vcpu, reg) = val;
 237
 238        if (kvm_pmu_pmc_is_chained(pmc))
 239                __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
 240
 241        kvm_pmu_release_perf_event(pmc);
 242}
 243
 244/**
 245 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
 246 * @vcpu: The vcpu pointer
 247 *
 248 */
 249void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
 250{
 251        int i;
 252        struct kvm_pmu *pmu = &vcpu->arch.pmu;
 253
 254        for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
 255                pmu->pmc[i].idx = i;
 256}
 257
 258/**
 259 * kvm_pmu_vcpu_reset - reset pmu state for cpu
 260 * @vcpu: The vcpu pointer
 261 *
 262 */
 263void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
 264{
 265        unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
 266        struct kvm_pmu *pmu = &vcpu->arch.pmu;
 267        int i;
 268
 269        for_each_set_bit(i, &mask, 32)
 270                kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
 271
 272        bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
 273}
 274
 275/**
 276 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
 277 * @vcpu: The vcpu pointer
 278 *
 279 */
 280void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
 281{
 282        int i;
 283        struct kvm_pmu *pmu = &vcpu->arch.pmu;
 284
 285        for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
 286                kvm_pmu_release_perf_event(&pmu->pmc[i]);
 287        irq_work_sync(&vcpu->arch.pmu.overflow_work);
 288}
 289
 290u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
 291{
 292        u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
 293
 294        val &= ARMV8_PMU_PMCR_N_MASK;
 295        if (val == 0)
 296                return BIT(ARMV8_PMU_CYCLE_IDX);
 297        else
 298                return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
 299}
 300
 301/**
 302 * kvm_pmu_enable_counter_mask - enable selected PMU counters
 303 * @vcpu: The vcpu pointer
 304 * @val: the value guest writes to PMCNTENSET register
 305 *
 306 * Call perf_event_enable to start counting the perf event
 307 */
 308void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
 309{
 310        int i;
 311        struct kvm_pmu *pmu = &vcpu->arch.pmu;
 312        struct kvm_pmc *pmc;
 313
 314        if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
 315                return;
 316
 317        for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
 318                if (!(val & BIT(i)))
 319                        continue;
 320
 321                pmc = &pmu->pmc[i];
 322
 323                /* A change in the enable state may affect the chain state */
 324                kvm_pmu_update_pmc_chained(vcpu, i);
 325                kvm_pmu_create_perf_event(vcpu, i);
 326
 327                /* At this point, pmc must be the canonical */
 328                if (pmc->perf_event) {
 329                        perf_event_enable(pmc->perf_event);
 330                        if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
 331                                kvm_debug("fail to enable perf event\n");
 332                }
 333        }
 334}
 335
 336/**
 337 * kvm_pmu_disable_counter_mask - disable selected PMU counters
 338 * @vcpu: The vcpu pointer
 339 * @val: the value guest writes to PMCNTENCLR register
 340 *
 341 * Call perf_event_disable to stop counting the perf event
 342 */
 343void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
 344{
 345        int i;
 346        struct kvm_pmu *pmu = &vcpu->arch.pmu;
 347        struct kvm_pmc *pmc;
 348
 349        if (!val)
 350                return;
 351
 352        for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
 353                if (!(val & BIT(i)))
 354                        continue;
 355
 356                pmc = &pmu->pmc[i];
 357
 358                /* A change in the enable state may affect the chain state */
 359                kvm_pmu_update_pmc_chained(vcpu, i);
 360                kvm_pmu_create_perf_event(vcpu, i);
 361
 362                /* At this point, pmc must be the canonical */
 363                if (pmc->perf_event)
 364                        perf_event_disable(pmc->perf_event);
 365        }
 366}
 367
 368static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
 369{
 370        u64 reg = 0;
 371
 372        if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
 373                reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
 374                reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
 375                reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
 376        }
 377
 378        return reg;
 379}
 380
 381static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
 382{
 383        struct kvm_pmu *pmu = &vcpu->arch.pmu;
 384        bool overflow;
 385
 386        if (!kvm_vcpu_has_pmu(vcpu))
 387                return;
 388
 389        overflow = !!kvm_pmu_overflow_status(vcpu);
 390        if (pmu->irq_level == overflow)
 391                return;
 392
 393        pmu->irq_level = overflow;
 394
 395        if (likely(irqchip_in_kernel(vcpu->kvm))) {
 396                int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
 397                                              pmu->irq_num, overflow, pmu);
 398                WARN_ON(ret);
 399        }
 400}
 401
 402bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
 403{
 404        struct kvm_pmu *pmu = &vcpu->arch.pmu;
 405        struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
 406        bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
 407
 408        if (likely(irqchip_in_kernel(vcpu->kvm)))
 409                return false;
 410
 411        return pmu->irq_level != run_level;
 412}
 413
 414/*
 415 * Reflect the PMU overflow interrupt output level into the kvm_run structure
 416 */
 417void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
 418{
 419        struct kvm_sync_regs *regs = &vcpu->run->s.regs;
 420
 421        /* Populate the timer bitmap for user space */
 422        regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
 423        if (vcpu->arch.pmu.irq_level)
 424                regs->device_irq_level |= KVM_ARM_DEV_PMU;
 425}
 426
 427/**
 428 * kvm_pmu_flush_hwstate - flush pmu state to cpu
 429 * @vcpu: The vcpu pointer
 430 *
 431 * Check if the PMU has overflowed while we were running in the host, and inject
 432 * an interrupt if that was the case.
 433 */
 434void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
 435{
 436        kvm_pmu_update_state(vcpu);
 437}
 438
 439/**
 440 * kvm_pmu_sync_hwstate - sync pmu state from cpu
 441 * @vcpu: The vcpu pointer
 442 *
 443 * Check if the PMU has overflowed while we were running in the guest, and
 444 * inject an interrupt if that was the case.
 445 */
 446void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
 447{
 448        kvm_pmu_update_state(vcpu);
 449}
 450
 451/**
 452 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
 453 * to the event.
 454 * This is why we need a callback to do it once outside of the NMI context.
 455 */
 456static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
 457{
 458        struct kvm_vcpu *vcpu;
 459        struct kvm_pmu *pmu;
 460
 461        pmu = container_of(work, struct kvm_pmu, overflow_work);
 462        vcpu = kvm_pmc_to_vcpu(pmu->pmc);
 463
 464        kvm_vcpu_kick(vcpu);
 465}
 466
 467/**
 468 * When the perf event overflows, set the overflow status and inform the vcpu.
 469 */
 470static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
 471                                  struct perf_sample_data *data,
 472                                  struct pt_regs *regs)
 473{
 474        struct kvm_pmc *pmc = perf_event->overflow_handler_context;
 475        struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
 476        struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
 477        int idx = pmc->idx;
 478        u64 period;
 479
 480        cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
 481
 482        /*
 483         * Reset the sample period to the architectural limit,
 484         * i.e. the point where the counter overflows.
 485         */
 486        period = -(local64_read(&perf_event->count));
 487
 488        if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
 489                period &= GENMASK(31, 0);
 490
 491        local64_set(&perf_event->hw.period_left, 0);
 492        perf_event->attr.sample_period = period;
 493        perf_event->hw.sample_period = period;
 494
 495        __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
 496
 497        if (kvm_pmu_overflow_status(vcpu)) {
 498                kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 499
 500                if (!in_nmi())
 501                        kvm_vcpu_kick(vcpu);
 502                else
 503                        irq_work_queue(&vcpu->arch.pmu.overflow_work);
 504        }
 505
 506        cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
 507}
 508
 509/**
 510 * kvm_pmu_software_increment - do software increment
 511 * @vcpu: The vcpu pointer
 512 * @val: the value guest writes to PMSWINC register
 513 */
 514void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
 515{
 516        struct kvm_pmu *pmu = &vcpu->arch.pmu;
 517        int i;
 518
 519        if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
 520                return;
 521
 522        /* Weed out disabled counters */
 523        val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
 524
 525        for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
 526                u64 type, reg;
 527
 528                if (!(val & BIT(i)))
 529                        continue;
 530
 531                /* PMSWINC only applies to ... SW_INC! */
 532                type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
 533                type &= kvm_pmu_event_mask(vcpu->kvm);
 534                if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
 535                        continue;
 536
 537                /* increment this even SW_INC counter */
 538                reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
 539                reg = lower_32_bits(reg);
 540                __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
 541
 542                if (reg) /* no overflow on the low part */
 543                        continue;
 544
 545                if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) {
 546                        /* increment the high counter */
 547                        reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
 548                        reg = lower_32_bits(reg);
 549                        __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
 550                        if (!reg) /* mark overflow on the high counter */
 551                                __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
 552                } else {
 553                        /* mark overflow on low counter */
 554                        __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
 555                }
 556        }
 557}
 558
 559/**
 560 * kvm_pmu_handle_pmcr - handle PMCR register
 561 * @vcpu: The vcpu pointer
 562 * @val: the value guest writes to PMCR register
 563 */
 564void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
 565{
 566        int i;
 567
 568        if (val & ARMV8_PMU_PMCR_E) {
 569                kvm_pmu_enable_counter_mask(vcpu,
 570                       __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
 571        } else {
 572                kvm_pmu_disable_counter_mask(vcpu,
 573                       __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
 574        }
 575
 576        if (val & ARMV8_PMU_PMCR_C)
 577                kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
 578
 579        if (val & ARMV8_PMU_PMCR_P) {
 580                unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
 581                mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
 582                for_each_set_bit(i, &mask, 32)
 583                        kvm_pmu_set_counter_value(vcpu, i, 0);
 584        }
 585}
 586
 587static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
 588{
 589        return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
 590               (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
 591}
 592
 593/**
 594 * kvm_pmu_create_perf_event - create a perf event for a counter
 595 * @vcpu: The vcpu pointer
 596 * @select_idx: The number of selected counter
 597 */
 598static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
 599{
 600        struct kvm_pmu *pmu = &vcpu->arch.pmu;
 601        struct kvm_pmc *pmc;
 602        struct perf_event *event;
 603        struct perf_event_attr attr;
 604        u64 eventsel, counter, reg, data;
 605
 606        /*
 607         * For chained counters the event type and filtering attributes are
 608         * obtained from the low/even counter. We also use this counter to
 609         * determine if the event is enabled/disabled.
 610         */
 611        pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]);
 612
 613        reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
 614              ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
 615        data = __vcpu_sys_reg(vcpu, reg);
 616
 617        kvm_pmu_stop_counter(vcpu, pmc);
 618        if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
 619                eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
 620        else
 621                eventsel = data & kvm_pmu_event_mask(vcpu->kvm);
 622
 623        /* Software increment event doesn't need to be backed by a perf event */
 624        if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
 625                return;
 626
 627        /*
 628         * If we have a filter in place and that the event isn't allowed, do
 629         * not install a perf event either.
 630         */
 631        if (vcpu->kvm->arch.pmu_filter &&
 632            !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
 633                return;
 634
 635        memset(&attr, 0, sizeof(struct perf_event_attr));
 636        attr.type = PERF_TYPE_RAW;
 637        attr.size = sizeof(attr);
 638        attr.pinned = 1;
 639        attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
 640        attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
 641        attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
 642        attr.exclude_hv = 1; /* Don't count EL2 events */
 643        attr.exclude_host = 1; /* Don't count host events */
 644        attr.config = eventsel;
 645
 646        counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
 647
 648        if (kvm_pmu_pmc_is_chained(pmc)) {
 649                /**
 650                 * The initial sample period (overflow count) of an event. For
 651                 * chained counters we only support overflow interrupts on the
 652                 * high counter.
 653                 */
 654                attr.sample_period = (-counter) & GENMASK(63, 0);
 655                attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
 656
 657                event = perf_event_create_kernel_counter(&attr, -1, current,
 658                                                         kvm_pmu_perf_overflow,
 659                                                         pmc + 1);
 660        } else {
 661                /* The initial sample period (overflow count) of an event. */
 662                if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
 663                        attr.sample_period = (-counter) & GENMASK(63, 0);
 664                else
 665                        attr.sample_period = (-counter) & GENMASK(31, 0);
 666
 667                event = perf_event_create_kernel_counter(&attr, -1, current,
 668                                                 kvm_pmu_perf_overflow, pmc);
 669        }
 670
 671        if (IS_ERR(event)) {
 672                pr_err_once("kvm: pmu event creation failed %ld\n",
 673                            PTR_ERR(event));
 674                return;
 675        }
 676
 677        pmc->perf_event = event;
 678}
 679
 680/**
 681 * kvm_pmu_update_pmc_chained - update chained bitmap
 682 * @vcpu: The vcpu pointer
 683 * @select_idx: The number of selected counter
 684 *
 685 * Update the chained bitmap based on the event type written in the
 686 * typer register and the enable state of the odd register.
 687 */
 688static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
 689{
 690        struct kvm_pmu *pmu = &vcpu->arch.pmu;
 691        struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc;
 692        bool new_state, old_state;
 693
 694        old_state = kvm_pmu_pmc_is_chained(pmc);
 695        new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
 696                    kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1);
 697
 698        if (old_state == new_state)
 699                return;
 700
 701        canonical_pmc = kvm_pmu_get_canonical_pmc(pmc);
 702        kvm_pmu_stop_counter(vcpu, canonical_pmc);
 703        if (new_state) {
 704                /*
 705                 * During promotion from !chained to chained we must ensure
 706                 * the adjacent counter is stopped and its event destroyed
 707                 */
 708                kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc));
 709                set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
 710                return;
 711        }
 712        clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
 713}
 714
 715/**
 716 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
 717 * @vcpu: The vcpu pointer
 718 * @data: The data guest writes to PMXEVTYPER_EL0
 719 * @select_idx: The number of selected counter
 720 *
 721 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
 722 * event with given hardware event number. Here we call perf_event API to
 723 * emulate this action and create a kernel perf event for it.
 724 */
 725void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
 726                                    u64 select_idx)
 727{
 728        u64 reg, mask;
 729
 730        mask  =  ARMV8_PMU_EVTYPE_MASK;
 731        mask &= ~ARMV8_PMU_EVTYPE_EVENT;
 732        mask |= kvm_pmu_event_mask(vcpu->kvm);
 733
 734        reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
 735              ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
 736
 737        __vcpu_sys_reg(vcpu, reg) = data & mask;
 738
 739        kvm_pmu_update_pmc_chained(vcpu, select_idx);
 740        kvm_pmu_create_perf_event(vcpu, select_idx);
 741}
 742
 743void kvm_host_pmu_init(struct arm_pmu *pmu)
 744{
 745        if (pmu->pmuver != 0 && pmu->pmuver != ID_AA64DFR0_PMUVER_IMP_DEF &&
 746            !kvm_arm_support_pmu_v3() && !is_protected_kvm_enabled())
 747                static_branch_enable(&kvm_arm_pmu_available);
 748}
 749
 750static int kvm_pmu_probe_pmuver(void)
 751{
 752        struct perf_event_attr attr = { };
 753        struct perf_event *event;
 754        struct arm_pmu *pmu;
 755        int pmuver = ID_AA64DFR0_PMUVER_IMP_DEF;
 756
 757        /*
 758         * Create a dummy event that only counts user cycles. As we'll never
 759         * leave this function with the event being live, it will never
 760         * count anything. But it allows us to probe some of the PMU
 761         * details. Yes, this is terrible.
 762         */
 763        attr.type = PERF_TYPE_RAW;
 764        attr.size = sizeof(attr);
 765        attr.pinned = 1;
 766        attr.disabled = 0;
 767        attr.exclude_user = 0;
 768        attr.exclude_kernel = 1;
 769        attr.exclude_hv = 1;
 770        attr.exclude_host = 1;
 771        attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
 772        attr.sample_period = GENMASK(63, 0);
 773
 774        event = perf_event_create_kernel_counter(&attr, -1, current,
 775                                                 kvm_pmu_perf_overflow, &attr);
 776
 777        if (IS_ERR(event)) {
 778                pr_err_once("kvm: pmu event creation failed %ld\n",
 779                            PTR_ERR(event));
 780                return ID_AA64DFR0_PMUVER_IMP_DEF;
 781        }
 782
 783        if (event->pmu) {
 784                pmu = to_arm_pmu(event->pmu);
 785                if (pmu->pmuver)
 786                        pmuver = pmu->pmuver;
 787        }
 788
 789        perf_event_disable(event);
 790        perf_event_release_kernel(event);
 791
 792        return pmuver;
 793}
 794
 795u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
 796{
 797        unsigned long *bmap = vcpu->kvm->arch.pmu_filter;
 798        u64 val, mask = 0;
 799        int base, i, nr_events;
 800
 801        if (!pmceid1) {
 802                val = read_sysreg(pmceid0_el0);
 803                base = 0;
 804        } else {
 805                val = read_sysreg(pmceid1_el0);
 806                /*
 807                 * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
 808                 * as RAZ
 809                 */
 810                if (vcpu->kvm->arch.pmuver >= ID_AA64DFR0_PMUVER_8_4)
 811                        val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
 812                base = 32;
 813        }
 814
 815        if (!bmap)
 816                return val;
 817
 818        nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
 819
 820        for (i = 0; i < 32; i += 8) {
 821                u64 byte;
 822
 823                byte = bitmap_get_value8(bmap, base + i);
 824                mask |= byte << i;
 825                if (nr_events >= (0x4000 + base + 32)) {
 826                        byte = bitmap_get_value8(bmap, 0x4000 + base + i);
 827                        mask |= byte << (32 + i);
 828                }
 829        }
 830
 831        return val & mask;
 832}
 833
 834int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
 835{
 836        if (!kvm_vcpu_has_pmu(vcpu))
 837                return 0;
 838
 839        if (!vcpu->arch.pmu.created)
 840                return -EINVAL;
 841
 842        /*
 843         * A valid interrupt configuration for the PMU is either to have a
 844         * properly configured interrupt number and using an in-kernel
 845         * irqchip, or to not have an in-kernel GIC and not set an IRQ.
 846         */
 847        if (irqchip_in_kernel(vcpu->kvm)) {
 848                int irq = vcpu->arch.pmu.irq_num;
 849                /*
 850                 * If we are using an in-kernel vgic, at this point we know
 851                 * the vgic will be initialized, so we can check the PMU irq
 852                 * number against the dimensions of the vgic and make sure
 853                 * it's valid.
 854                 */
 855                if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
 856                        return -EINVAL;
 857        } else if (kvm_arm_pmu_irq_initialized(vcpu)) {
 858                   return -EINVAL;
 859        }
 860
 861        /* One-off reload of the PMU on first run */
 862        kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
 863
 864        return 0;
 865}
 866
 867static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
 868{
 869        if (irqchip_in_kernel(vcpu->kvm)) {
 870                int ret;
 871
 872                /*
 873                 * If using the PMU with an in-kernel virtual GIC
 874                 * implementation, we require the GIC to be already
 875                 * initialized when initializing the PMU.
 876                 */
 877                if (!vgic_initialized(vcpu->kvm))
 878                        return -ENODEV;
 879
 880                if (!kvm_arm_pmu_irq_initialized(vcpu))
 881                        return -ENXIO;
 882
 883                ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
 884                                         &vcpu->arch.pmu);
 885                if (ret)
 886                        return ret;
 887        }
 888
 889        init_irq_work(&vcpu->arch.pmu.overflow_work,
 890                      kvm_pmu_perf_overflow_notify_vcpu);
 891
 892        vcpu->arch.pmu.created = true;
 893        return 0;
 894}
 895
 896/*
 897 * For one VM the interrupt type must be same for each vcpu.
 898 * As a PPI, the interrupt number is the same for all vcpus,
 899 * while as an SPI it must be a separate number per vcpu.
 900 */
 901static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
 902{
 903        int i;
 904        struct kvm_vcpu *vcpu;
 905
 906        kvm_for_each_vcpu(i, vcpu, kvm) {
 907                if (!kvm_arm_pmu_irq_initialized(vcpu))
 908                        continue;
 909
 910                if (irq_is_ppi(irq)) {
 911                        if (vcpu->arch.pmu.irq_num != irq)
 912                                return false;
 913                } else {
 914                        if (vcpu->arch.pmu.irq_num == irq)
 915                                return false;
 916                }
 917        }
 918
 919        return true;
 920}
 921
 922int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 923{
 924        if (!kvm_vcpu_has_pmu(vcpu))
 925                return -ENODEV;
 926
 927        if (vcpu->arch.pmu.created)
 928                return -EBUSY;
 929
 930        if (!vcpu->kvm->arch.pmuver)
 931                vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver();
 932
 933        if (vcpu->kvm->arch.pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
 934                return -ENODEV;
 935
 936        switch (attr->attr) {
 937        case KVM_ARM_VCPU_PMU_V3_IRQ: {
 938                int __user *uaddr = (int __user *)(long)attr->addr;
 939                int irq;
 940
 941                if (!irqchip_in_kernel(vcpu->kvm))
 942                        return -EINVAL;
 943
 944                if (get_user(irq, uaddr))
 945                        return -EFAULT;
 946
 947                /* The PMU overflow interrupt can be a PPI or a valid SPI. */
 948                if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
 949                        return -EINVAL;
 950
 951                if (!pmu_irq_is_valid(vcpu->kvm, irq))
 952                        return -EINVAL;
 953
 954                if (kvm_arm_pmu_irq_initialized(vcpu))
 955                        return -EBUSY;
 956
 957                kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
 958                vcpu->arch.pmu.irq_num = irq;
 959                return 0;
 960        }
 961        case KVM_ARM_VCPU_PMU_V3_FILTER: {
 962                struct kvm_pmu_event_filter __user *uaddr;
 963                struct kvm_pmu_event_filter filter;
 964                int nr_events;
 965
 966                nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
 967
 968                uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
 969
 970                if (copy_from_user(&filter, uaddr, sizeof(filter)))
 971                        return -EFAULT;
 972
 973                if (((u32)filter.base_event + filter.nevents) > nr_events ||
 974                    (filter.action != KVM_PMU_EVENT_ALLOW &&
 975                     filter.action != KVM_PMU_EVENT_DENY))
 976                        return -EINVAL;
 977
 978                mutex_lock(&vcpu->kvm->lock);
 979
 980                if (!vcpu->kvm->arch.pmu_filter) {
 981                        vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL);
 982                        if (!vcpu->kvm->arch.pmu_filter) {
 983                                mutex_unlock(&vcpu->kvm->lock);
 984                                return -ENOMEM;
 985                        }
 986
 987                        /*
 988                         * The default depends on the first applied filter.
 989                         * If it allows events, the default is to deny.
 990                         * Conversely, if the first filter denies a set of
 991                         * events, the default is to allow.
 992                         */
 993                        if (filter.action == KVM_PMU_EVENT_ALLOW)
 994                                bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events);
 995                        else
 996                                bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events);
 997                }
 998
 999                if (filter.action == KVM_PMU_EVENT_ALLOW)
1000                        bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
1001                else
1002                        bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
1003
1004                mutex_unlock(&vcpu->kvm->lock);
1005
1006                return 0;
1007        }
1008        case KVM_ARM_VCPU_PMU_V3_INIT:
1009                return kvm_arm_pmu_v3_init(vcpu);
1010        }
1011
1012        return -ENXIO;
1013}
1014
1015int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1016{
1017        switch (attr->attr) {
1018        case KVM_ARM_VCPU_PMU_V3_IRQ: {
1019                int __user *uaddr = (int __user *)(long)attr->addr;
1020                int irq;
1021
1022                if (!irqchip_in_kernel(vcpu->kvm))
1023                        return -EINVAL;
1024
1025                if (!kvm_vcpu_has_pmu(vcpu))
1026                        return -ENODEV;
1027
1028                if (!kvm_arm_pmu_irq_initialized(vcpu))
1029                        return -ENXIO;
1030
1031                irq = vcpu->arch.pmu.irq_num;
1032                return put_user(irq, uaddr);
1033        }
1034        }
1035
1036        return -ENXIO;
1037}
1038
1039int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1040{
1041        switch (attr->attr) {
1042        case KVM_ARM_VCPU_PMU_V3_IRQ:
1043        case KVM_ARM_VCPU_PMU_V3_INIT:
1044        case KVM_ARM_VCPU_PMU_V3_FILTER:
1045                if (kvm_vcpu_has_pmu(vcpu))
1046                        return 0;
1047        }
1048
1049        return -ENXIO;
1050}
1051