linux/arch/x86/xen/pmu.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <linux/types.h>
   3#include <linux/interrupt.h>
   4
   5#include <asm/xen/hypercall.h>
   6#include <xen/xen.h>
   7#include <xen/page.h>
   8#include <xen/interface/xen.h>
   9#include <xen/interface/vcpu.h>
  10#include <xen/interface/xenpmu.h>
  11
  12#include "xen-ops.h"
  13#include "pmu.h"
  14
  15/* x86_pmu.handle_irq definition */
  16#include "../events/perf_event.h"
  17
  18#define XENPMU_IRQ_PROCESSING    1
  19struct xenpmu {
  20        /* Shared page between hypervisor and domain */
  21        struct xen_pmu_data *xenpmu_data;
  22
  23        uint8_t flags;
  24};
  25static DEFINE_PER_CPU(struct xenpmu, xenpmu_shared);
  26#define get_xenpmu_data()    (this_cpu_ptr(&xenpmu_shared)->xenpmu_data)
  27#define get_xenpmu_flags()   (this_cpu_ptr(&xenpmu_shared)->flags)
  28
  29/* Macro for computing address of a PMU MSR bank */
  30#define field_offset(ctxt, field) ((void *)((uintptr_t)ctxt + \
  31                                            (uintptr_t)ctxt->field))
  32
  33/* AMD PMU */
  34#define F15H_NUM_COUNTERS   6
  35#define F10H_NUM_COUNTERS   4
  36
  37static __read_mostly uint32_t amd_counters_base;
  38static __read_mostly uint32_t amd_ctrls_base;
  39static __read_mostly int amd_msr_step;
  40static __read_mostly int k7_counters_mirrored;
  41static __read_mostly int amd_num_counters;
  42
  43/* Intel PMU */
  44#define MSR_TYPE_COUNTER            0
  45#define MSR_TYPE_CTRL               1
  46#define MSR_TYPE_GLOBAL             2
  47#define MSR_TYPE_ARCH_COUNTER       3
  48#define MSR_TYPE_ARCH_CTRL          4
  49
  50/* Number of general pmu registers (CPUID.EAX[0xa].EAX[8..15]) */
  51#define PMU_GENERAL_NR_SHIFT        8
  52#define PMU_GENERAL_NR_BITS         8
  53#define PMU_GENERAL_NR_MASK         (((1 << PMU_GENERAL_NR_BITS) - 1) \
  54                                     << PMU_GENERAL_NR_SHIFT)
  55
  56/* Number of fixed pmu registers (CPUID.EDX[0xa].EDX[0..4]) */
  57#define PMU_FIXED_NR_SHIFT          0
  58#define PMU_FIXED_NR_BITS           5
  59#define PMU_FIXED_NR_MASK           (((1 << PMU_FIXED_NR_BITS) - 1) \
  60                                     << PMU_FIXED_NR_SHIFT)
  61
  62/* Alias registers (0x4c1) for full-width writes to PMCs */
  63#define MSR_PMC_ALIAS_MASK          (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_PMC0))
  64
  65#define INTEL_PMC_TYPE_SHIFT        30
  66
  67static __read_mostly int intel_num_arch_counters, intel_num_fixed_counters;
  68
  69
  70static void xen_pmu_arch_init(void)
  71{
  72        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
  73
  74                switch (boot_cpu_data.x86) {
  75                case 0x15:
  76                        amd_num_counters = F15H_NUM_COUNTERS;
  77                        amd_counters_base = MSR_F15H_PERF_CTR;
  78                        amd_ctrls_base = MSR_F15H_PERF_CTL;
  79                        amd_msr_step = 2;
  80                        k7_counters_mirrored = 1;
  81                        break;
  82                case 0x10:
  83                case 0x12:
  84                case 0x14:
  85                case 0x16:
  86                default:
  87                        amd_num_counters = F10H_NUM_COUNTERS;
  88                        amd_counters_base = MSR_K7_PERFCTR0;
  89                        amd_ctrls_base = MSR_K7_EVNTSEL0;
  90                        amd_msr_step = 1;
  91                        k7_counters_mirrored = 0;
  92                        break;
  93                }
  94        } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
  95                amd_num_counters = F10H_NUM_COUNTERS;
  96                amd_counters_base = MSR_K7_PERFCTR0;
  97                amd_ctrls_base = MSR_K7_EVNTSEL0;
  98                amd_msr_step = 1;
  99                k7_counters_mirrored = 0;
 100        } else {
 101                uint32_t eax, ebx, ecx, edx;
 102
 103                cpuid(0xa, &eax, &ebx, &ecx, &edx);
 104
 105                intel_num_arch_counters = (eax & PMU_GENERAL_NR_MASK) >>
 106                        PMU_GENERAL_NR_SHIFT;
 107                intel_num_fixed_counters = (edx & PMU_FIXED_NR_MASK) >>
 108                        PMU_FIXED_NR_SHIFT;
 109        }
 110}
 111
 112static inline uint32_t get_fam15h_addr(u32 addr)
 113{
 114        switch (addr) {
 115        case MSR_K7_PERFCTR0:
 116        case MSR_K7_PERFCTR1:
 117        case MSR_K7_PERFCTR2:
 118        case MSR_K7_PERFCTR3:
 119                return MSR_F15H_PERF_CTR + (addr - MSR_K7_PERFCTR0);
 120        case MSR_K7_EVNTSEL0:
 121        case MSR_K7_EVNTSEL1:
 122        case MSR_K7_EVNTSEL2:
 123        case MSR_K7_EVNTSEL3:
 124                return MSR_F15H_PERF_CTL + (addr - MSR_K7_EVNTSEL0);
 125        default:
 126                break;
 127        }
 128
 129        return addr;
 130}
 131
 132static inline bool is_amd_pmu_msr(unsigned int msr)
 133{
 134        if ((msr >= MSR_F15H_PERF_CTL &&
 135             msr < MSR_F15H_PERF_CTR + (amd_num_counters * 2)) ||
 136            (msr >= MSR_K7_EVNTSEL0 &&
 137             msr < MSR_K7_PERFCTR0 + amd_num_counters))
 138                return true;
 139
 140        return false;
 141}
 142
 143static int is_intel_pmu_msr(u32 msr_index, int *type, int *index)
 144{
 145        u32 msr_index_pmc;
 146
 147        switch (msr_index) {
 148        case MSR_CORE_PERF_FIXED_CTR_CTRL:
 149        case MSR_IA32_DS_AREA:
 150        case MSR_IA32_PEBS_ENABLE:
 151                *type = MSR_TYPE_CTRL;
 152                return true;
 153
 154        case MSR_CORE_PERF_GLOBAL_CTRL:
 155        case MSR_CORE_PERF_GLOBAL_STATUS:
 156        case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 157                *type = MSR_TYPE_GLOBAL;
 158                return true;
 159
 160        default:
 161
 162                if ((msr_index >= MSR_CORE_PERF_FIXED_CTR0) &&
 163                    (msr_index < MSR_CORE_PERF_FIXED_CTR0 +
 164                                 intel_num_fixed_counters)) {
 165                        *index = msr_index - MSR_CORE_PERF_FIXED_CTR0;
 166                        *type = MSR_TYPE_COUNTER;
 167                        return true;
 168                }
 169
 170                if ((msr_index >= MSR_P6_EVNTSEL0) &&
 171                    (msr_index < MSR_P6_EVNTSEL0 +  intel_num_arch_counters)) {
 172                        *index = msr_index - MSR_P6_EVNTSEL0;
 173                        *type = MSR_TYPE_ARCH_CTRL;
 174                        return true;
 175                }
 176
 177                msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK;
 178                if ((msr_index_pmc >= MSR_IA32_PERFCTR0) &&
 179                    (msr_index_pmc < MSR_IA32_PERFCTR0 +
 180                                     intel_num_arch_counters)) {
 181                        *type = MSR_TYPE_ARCH_COUNTER;
 182                        *index = msr_index_pmc - MSR_IA32_PERFCTR0;
 183                        return true;
 184                }
 185                return false;
 186        }
 187}
 188
 189static bool xen_intel_pmu_emulate(unsigned int msr, u64 *val, int type,
 190                                  int index, bool is_read)
 191{
 192        uint64_t *reg = NULL;
 193        struct xen_pmu_intel_ctxt *ctxt;
 194        uint64_t *fix_counters;
 195        struct xen_pmu_cntr_pair *arch_cntr_pair;
 196        struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 197        uint8_t xenpmu_flags = get_xenpmu_flags();
 198
 199
 200        if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
 201                return false;
 202
 203        ctxt = &xenpmu_data->pmu.c.intel;
 204
 205        switch (msr) {
 206        case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 207                reg = &ctxt->global_ovf_ctrl;
 208                break;
 209        case MSR_CORE_PERF_GLOBAL_STATUS:
 210                reg = &ctxt->global_status;
 211                break;
 212        case MSR_CORE_PERF_GLOBAL_CTRL:
 213                reg = &ctxt->global_ctrl;
 214                break;
 215        case MSR_CORE_PERF_FIXED_CTR_CTRL:
 216                reg = &ctxt->fixed_ctrl;
 217                break;
 218        default:
 219                switch (type) {
 220                case MSR_TYPE_COUNTER:
 221                        fix_counters = field_offset(ctxt, fixed_counters);
 222                        reg = &fix_counters[index];
 223                        break;
 224                case MSR_TYPE_ARCH_COUNTER:
 225                        arch_cntr_pair = field_offset(ctxt, arch_counters);
 226                        reg = &arch_cntr_pair[index].counter;
 227                        break;
 228                case MSR_TYPE_ARCH_CTRL:
 229                        arch_cntr_pair = field_offset(ctxt, arch_counters);
 230                        reg = &arch_cntr_pair[index].control;
 231                        break;
 232                default:
 233                        return false;
 234                }
 235        }
 236
 237        if (reg) {
 238                if (is_read)
 239                        *val = *reg;
 240                else {
 241                        *reg = *val;
 242
 243                        if (msr == MSR_CORE_PERF_GLOBAL_OVF_CTRL)
 244                                ctxt->global_status &= (~(*val));
 245                }
 246                return true;
 247        }
 248
 249        return false;
 250}
 251
 252static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read)
 253{
 254        uint64_t *reg = NULL;
 255        int i, off = 0;
 256        struct xen_pmu_amd_ctxt *ctxt;
 257        uint64_t *counter_regs, *ctrl_regs;
 258        struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 259        uint8_t xenpmu_flags = get_xenpmu_flags();
 260
 261        if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
 262                return false;
 263
 264        if (k7_counters_mirrored &&
 265            ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)))
 266                msr = get_fam15h_addr(msr);
 267
 268        ctxt = &xenpmu_data->pmu.c.amd;
 269        for (i = 0; i < amd_num_counters; i++) {
 270                if (msr == amd_ctrls_base + off) {
 271                        ctrl_regs = field_offset(ctxt, ctrls);
 272                        reg = &ctrl_regs[i];
 273                        break;
 274                } else if (msr == amd_counters_base + off) {
 275                        counter_regs = field_offset(ctxt, counters);
 276                        reg = &counter_regs[i];
 277                        break;
 278                }
 279                off += amd_msr_step;
 280        }
 281
 282        if (reg) {
 283                if (is_read)
 284                        *val = *reg;
 285                else
 286                        *reg = *val;
 287
 288                return true;
 289        }
 290        return false;
 291}
 292
 293bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
 294{
 295        if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
 296                if (is_amd_pmu_msr(msr)) {
 297                        if (!xen_amd_pmu_emulate(msr, val, 1))
 298                                *val = native_read_msr_safe(msr, err);
 299                        return true;
 300                }
 301        } else {
 302                int type, index;
 303
 304                if (is_intel_pmu_msr(msr, &type, &index)) {
 305                        if (!xen_intel_pmu_emulate(msr, val, type, index, 1))
 306                                *val = native_read_msr_safe(msr, err);
 307                        return true;
 308                }
 309        }
 310
 311        return false;
 312}
 313
 314bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err)
 315{
 316        uint64_t val = ((uint64_t)high << 32) | low;
 317
 318        if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
 319                if (is_amd_pmu_msr(msr)) {
 320                        if (!xen_amd_pmu_emulate(msr, &val, 0))
 321                                *err = native_write_msr_safe(msr, low, high);
 322                        return true;
 323                }
 324        } else {
 325                int type, index;
 326
 327                if (is_intel_pmu_msr(msr, &type, &index)) {
 328                        if (!xen_intel_pmu_emulate(msr, &val, type, index, 0))
 329                                *err = native_write_msr_safe(msr, low, high);
 330                        return true;
 331                }
 332        }
 333
 334        return false;
 335}
 336
 337static unsigned long long xen_amd_read_pmc(int counter)
 338{
 339        struct xen_pmu_amd_ctxt *ctxt;
 340        uint64_t *counter_regs;
 341        struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 342        uint8_t xenpmu_flags = get_xenpmu_flags();
 343
 344        if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
 345                uint32_t msr;
 346                int err;
 347
 348                msr = amd_counters_base + (counter * amd_msr_step);
 349                return native_read_msr_safe(msr, &err);
 350        }
 351
 352        ctxt = &xenpmu_data->pmu.c.amd;
 353        counter_regs = field_offset(ctxt, counters);
 354        return counter_regs[counter];
 355}
 356
 357static unsigned long long xen_intel_read_pmc(int counter)
 358{
 359        struct xen_pmu_intel_ctxt *ctxt;
 360        uint64_t *fixed_counters;
 361        struct xen_pmu_cntr_pair *arch_cntr_pair;
 362        struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 363        uint8_t xenpmu_flags = get_xenpmu_flags();
 364
 365        if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
 366                uint32_t msr;
 367                int err;
 368
 369                if (counter & (1 << INTEL_PMC_TYPE_SHIFT))
 370                        msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff);
 371                else
 372                        msr = MSR_IA32_PERFCTR0 + counter;
 373
 374                return native_read_msr_safe(msr, &err);
 375        }
 376
 377        ctxt = &xenpmu_data->pmu.c.intel;
 378        if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) {
 379                fixed_counters = field_offset(ctxt, fixed_counters);
 380                return fixed_counters[counter & 0xffff];
 381        }
 382
 383        arch_cntr_pair = field_offset(ctxt, arch_counters);
 384        return arch_cntr_pair[counter].counter;
 385}
 386
 387unsigned long long xen_read_pmc(int counter)
 388{
 389        if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
 390                return xen_amd_read_pmc(counter);
 391        else
 392                return xen_intel_read_pmc(counter);
 393}
 394
 395int pmu_apic_update(uint32_t val)
 396{
 397        int ret;
 398        struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 399
 400        if (!xenpmu_data) {
 401                pr_warn_once("%s: pmudata not initialized\n", __func__);
 402                return -EINVAL;
 403        }
 404
 405        xenpmu_data->pmu.l.lapic_lvtpc = val;
 406
 407        if (get_xenpmu_flags() & XENPMU_IRQ_PROCESSING)
 408                return 0;
 409
 410        ret = HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set, NULL);
 411
 412        return ret;
 413}
 414
 415/* perf callbacks */
 416static int xen_is_in_guest(void)
 417{
 418        const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 419
 420        if (!xenpmu_data) {
 421                pr_warn_once("%s: pmudata not initialized\n", __func__);
 422                return 0;
 423        }
 424
 425        if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF))
 426                return 0;
 427
 428        return 1;
 429}
 430
 431static int xen_is_user_mode(void)
 432{
 433        const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 434
 435        if (!xenpmu_data) {
 436                pr_warn_once("%s: pmudata not initialized\n", __func__);
 437                return 0;
 438        }
 439
 440        if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV)
 441                return (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER);
 442        else
 443                return !!(xenpmu_data->pmu.r.regs.cpl & 3);
 444}
 445
 446static unsigned long xen_get_guest_ip(void)
 447{
 448        const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 449
 450        if (!xenpmu_data) {
 451                pr_warn_once("%s: pmudata not initialized\n", __func__);
 452                return 0;
 453        }
 454
 455        return xenpmu_data->pmu.r.regs.ip;
 456}
 457
 458static struct perf_guest_info_callbacks xen_guest_cbs = {
 459        .is_in_guest            = xen_is_in_guest,
 460        .is_user_mode           = xen_is_user_mode,
 461        .get_guest_ip           = xen_get_guest_ip,
 462};
 463
 464/* Convert registers from Xen's format to Linux' */
 465static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
 466                             struct pt_regs *regs, uint64_t pmu_flags)
 467{
 468        regs->ip = xen_regs->ip;
 469        regs->cs = xen_regs->cs;
 470        regs->sp = xen_regs->sp;
 471
 472        if (pmu_flags & PMU_SAMPLE_PV) {
 473                if (pmu_flags & PMU_SAMPLE_USER)
 474                        regs->cs |= 3;
 475                else
 476                        regs->cs &= ~3;
 477        } else {
 478                if (xen_regs->cpl)
 479                        regs->cs |= 3;
 480                else
 481                        regs->cs &= ~3;
 482        }
 483}
 484
 485irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
 486{
 487        int err, ret = IRQ_NONE;
 488        struct pt_regs regs = {0};
 489        const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 490        uint8_t xenpmu_flags = get_xenpmu_flags();
 491
 492        if (!xenpmu_data) {
 493                pr_warn_once("%s: pmudata not initialized\n", __func__);
 494                return ret;
 495        }
 496
 497        this_cpu_ptr(&xenpmu_shared)->flags =
 498                xenpmu_flags | XENPMU_IRQ_PROCESSING;
 499        xen_convert_regs(&xenpmu_data->pmu.r.regs, &regs,
 500                         xenpmu_data->pmu.pmu_flags);
 501        if (x86_pmu.handle_irq(&regs))
 502                ret = IRQ_HANDLED;
 503
 504        /* Write out cached context to HW */
 505        err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL);
 506        this_cpu_ptr(&xenpmu_shared)->flags = xenpmu_flags;
 507        if (err) {
 508                pr_warn_once("%s: failed hypercall, err: %d\n", __func__, err);
 509                return IRQ_NONE;
 510        }
 511
 512        return ret;
 513}
 514
 515bool is_xen_pmu(int cpu)
 516{
 517        return (get_xenpmu_data() != NULL);
 518}
 519
 520void xen_pmu_init(int cpu)
 521{
 522        int err;
 523        struct xen_pmu_params xp;
 524        unsigned long pfn;
 525        struct xen_pmu_data *xenpmu_data;
 526
 527        BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE);
 528
 529        if (xen_hvm_domain())
 530                return;
 531
 532        xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL);
 533        if (!xenpmu_data) {
 534                pr_err("VPMU init: No memory\n");
 535                return;
 536        }
 537        pfn = virt_to_pfn(xenpmu_data);
 538
 539        xp.val = pfn_to_mfn(pfn);
 540        xp.vcpu = cpu;
 541        xp.version.maj = XENPMU_VER_MAJ;
 542        xp.version.min = XENPMU_VER_MIN;
 543        err = HYPERVISOR_xenpmu_op(XENPMU_init, &xp);
 544        if (err)
 545                goto fail;
 546
 547        per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data;
 548        per_cpu(xenpmu_shared, cpu).flags = 0;
 549
 550        if (cpu == 0) {
 551                perf_register_guest_info_callbacks(&xen_guest_cbs);
 552                xen_pmu_arch_init();
 553        }
 554
 555        return;
 556
 557fail:
 558        if (err == -EOPNOTSUPP || err == -ENOSYS)
 559                pr_info_once("VPMU disabled by hypervisor.\n");
 560        else
 561                pr_info_once("Could not initialize VPMU for cpu %d, error %d\n",
 562                        cpu, err);
 563        free_pages((unsigned long)xenpmu_data, 0);
 564}
 565
 566void xen_pmu_finish(int cpu)
 567{
 568        struct xen_pmu_params xp;
 569
 570        if (xen_hvm_domain())
 571                return;
 572
 573        xp.vcpu = cpu;
 574        xp.version.maj = XENPMU_VER_MAJ;
 575        xp.version.min = XENPMU_VER_MIN;
 576
 577        (void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp);
 578
 579        free_pages((unsigned long)per_cpu(xenpmu_shared, cpu).xenpmu_data, 0);
 580        per_cpu(xenpmu_shared, cpu).xenpmu_data = NULL;
 581}
 582