linux/arch/x86/xen/pmu.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <linux/types.h>
   3#include <linux/interrupt.h>
   4
   5#include <asm/xen/hypercall.h>
   6#include <xen/page.h>
   7#include <xen/interface/xen.h>
   8#include <xen/interface/vcpu.h>
   9#include <xen/interface/xenpmu.h>
  10
  11#include "xen-ops.h"
  12#include "pmu.h"
  13
  14/* x86_pmu.handle_irq definition */
  15#include "../events/perf_event.h"
  16
  17#define XENPMU_IRQ_PROCESSING    1
  18struct xenpmu {
  19        /* Shared page between hypervisor and domain */
  20        struct xen_pmu_data *xenpmu_data;
  21
  22        uint8_t flags;
  23};
  24static DEFINE_PER_CPU(struct xenpmu, xenpmu_shared);
  25#define get_xenpmu_data()    (this_cpu_ptr(&xenpmu_shared)->xenpmu_data)
  26#define get_xenpmu_flags()   (this_cpu_ptr(&xenpmu_shared)->flags)
  27
  28/* Macro for computing address of a PMU MSR bank */
  29#define field_offset(ctxt, field) ((void *)((uintptr_t)ctxt + \
  30                                            (uintptr_t)ctxt->field))
  31
  32/* AMD PMU */
  33#define F15H_NUM_COUNTERS   6
  34#define F10H_NUM_COUNTERS   4
  35
  36static __read_mostly uint32_t amd_counters_base;
  37static __read_mostly uint32_t amd_ctrls_base;
  38static __read_mostly int amd_msr_step;
  39static __read_mostly int k7_counters_mirrored;
  40static __read_mostly int amd_num_counters;
  41
  42/* Intel PMU */
  43#define MSR_TYPE_COUNTER            0
  44#define MSR_TYPE_CTRL               1
  45#define MSR_TYPE_GLOBAL             2
  46#define MSR_TYPE_ARCH_COUNTER       3
  47#define MSR_TYPE_ARCH_CTRL          4
  48
  49/* Number of general pmu registers (CPUID.EAX[0xa].EAX[8..15]) */
  50#define PMU_GENERAL_NR_SHIFT        8
  51#define PMU_GENERAL_NR_BITS         8
  52#define PMU_GENERAL_NR_MASK         (((1 << PMU_GENERAL_NR_BITS) - 1) \
  53                                     << PMU_GENERAL_NR_SHIFT)
  54
  55/* Number of fixed pmu registers (CPUID.EDX[0xa].EDX[0..4]) */
  56#define PMU_FIXED_NR_SHIFT          0
  57#define PMU_FIXED_NR_BITS           5
  58#define PMU_FIXED_NR_MASK           (((1 << PMU_FIXED_NR_BITS) - 1) \
  59                                     << PMU_FIXED_NR_SHIFT)
  60
  61/* Alias registers (0x4c1) for full-width writes to PMCs */
  62#define MSR_PMC_ALIAS_MASK          (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_PMC0))
  63
  64#define INTEL_PMC_TYPE_SHIFT        30
  65
  66static __read_mostly int intel_num_arch_counters, intel_num_fixed_counters;
  67
  68
  69static void xen_pmu_arch_init(void)
  70{
  71        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
  72
  73                switch (boot_cpu_data.x86) {
  74                case 0x15:
  75                        amd_num_counters = F15H_NUM_COUNTERS;
  76                        amd_counters_base = MSR_F15H_PERF_CTR;
  77                        amd_ctrls_base = MSR_F15H_PERF_CTL;
  78                        amd_msr_step = 2;
  79                        k7_counters_mirrored = 1;
  80                        break;
  81                case 0x10:
  82                case 0x12:
  83                case 0x14:
  84                case 0x16:
  85                default:
  86                        amd_num_counters = F10H_NUM_COUNTERS;
  87                        amd_counters_base = MSR_K7_PERFCTR0;
  88                        amd_ctrls_base = MSR_K7_EVNTSEL0;
  89                        amd_msr_step = 1;
  90                        k7_counters_mirrored = 0;
  91                        break;
  92                }
  93        } else {
  94                uint32_t eax, ebx, ecx, edx;
  95
  96                cpuid(0xa, &eax, &ebx, &ecx, &edx);
  97
  98                intel_num_arch_counters = (eax & PMU_GENERAL_NR_MASK) >>
  99                        PMU_GENERAL_NR_SHIFT;
 100                intel_num_fixed_counters = (edx & PMU_FIXED_NR_MASK) >>
 101                        PMU_FIXED_NR_SHIFT;
 102        }
 103}
 104
 105static inline uint32_t get_fam15h_addr(u32 addr)
 106{
 107        switch (addr) {
 108        case MSR_K7_PERFCTR0:
 109        case MSR_K7_PERFCTR1:
 110        case MSR_K7_PERFCTR2:
 111        case MSR_K7_PERFCTR3:
 112                return MSR_F15H_PERF_CTR + (addr - MSR_K7_PERFCTR0);
 113        case MSR_K7_EVNTSEL0:
 114        case MSR_K7_EVNTSEL1:
 115        case MSR_K7_EVNTSEL2:
 116        case MSR_K7_EVNTSEL3:
 117                return MSR_F15H_PERF_CTL + (addr - MSR_K7_EVNTSEL0);
 118        default:
 119                break;
 120        }
 121
 122        return addr;
 123}
 124
 125static inline bool is_amd_pmu_msr(unsigned int msr)
 126{
 127        if ((msr >= MSR_F15H_PERF_CTL &&
 128             msr < MSR_F15H_PERF_CTR + (amd_num_counters * 2)) ||
 129            (msr >= MSR_K7_EVNTSEL0 &&
 130             msr < MSR_K7_PERFCTR0 + amd_num_counters))
 131                return true;
 132
 133        return false;
 134}
 135
 136static int is_intel_pmu_msr(u32 msr_index, int *type, int *index)
 137{
 138        u32 msr_index_pmc;
 139
 140        switch (msr_index) {
 141        case MSR_CORE_PERF_FIXED_CTR_CTRL:
 142        case MSR_IA32_DS_AREA:
 143        case MSR_IA32_PEBS_ENABLE:
 144                *type = MSR_TYPE_CTRL;
 145                return true;
 146
 147        case MSR_CORE_PERF_GLOBAL_CTRL:
 148        case MSR_CORE_PERF_GLOBAL_STATUS:
 149        case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 150                *type = MSR_TYPE_GLOBAL;
 151                return true;
 152
 153        default:
 154
 155                if ((msr_index >= MSR_CORE_PERF_FIXED_CTR0) &&
 156                    (msr_index < MSR_CORE_PERF_FIXED_CTR0 +
 157                                 intel_num_fixed_counters)) {
 158                        *index = msr_index - MSR_CORE_PERF_FIXED_CTR0;
 159                        *type = MSR_TYPE_COUNTER;
 160                        return true;
 161                }
 162
 163                if ((msr_index >= MSR_P6_EVNTSEL0) &&
 164                    (msr_index < MSR_P6_EVNTSEL0 +  intel_num_arch_counters)) {
 165                        *index = msr_index - MSR_P6_EVNTSEL0;
 166                        *type = MSR_TYPE_ARCH_CTRL;
 167                        return true;
 168                }
 169
 170                msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK;
 171                if ((msr_index_pmc >= MSR_IA32_PERFCTR0) &&
 172                    (msr_index_pmc < MSR_IA32_PERFCTR0 +
 173                                     intel_num_arch_counters)) {
 174                        *type = MSR_TYPE_ARCH_COUNTER;
 175                        *index = msr_index_pmc - MSR_IA32_PERFCTR0;
 176                        return true;
 177                }
 178                return false;
 179        }
 180}
 181
 182static bool xen_intel_pmu_emulate(unsigned int msr, u64 *val, int type,
 183                                  int index, bool is_read)
 184{
 185        uint64_t *reg = NULL;
 186        struct xen_pmu_intel_ctxt *ctxt;
 187        uint64_t *fix_counters;
 188        struct xen_pmu_cntr_pair *arch_cntr_pair;
 189        struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 190        uint8_t xenpmu_flags = get_xenpmu_flags();
 191
 192
 193        if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
 194                return false;
 195
 196        ctxt = &xenpmu_data->pmu.c.intel;
 197
 198        switch (msr) {
 199        case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 200                reg = &ctxt->global_ovf_ctrl;
 201                break;
 202        case MSR_CORE_PERF_GLOBAL_STATUS:
 203                reg = &ctxt->global_status;
 204                break;
 205        case MSR_CORE_PERF_GLOBAL_CTRL:
 206                reg = &ctxt->global_ctrl;
 207                break;
 208        case MSR_CORE_PERF_FIXED_CTR_CTRL:
 209                reg = &ctxt->fixed_ctrl;
 210                break;
 211        default:
 212                switch (type) {
 213                case MSR_TYPE_COUNTER:
 214                        fix_counters = field_offset(ctxt, fixed_counters);
 215                        reg = &fix_counters[index];
 216                        break;
 217                case MSR_TYPE_ARCH_COUNTER:
 218                        arch_cntr_pair = field_offset(ctxt, arch_counters);
 219                        reg = &arch_cntr_pair[index].counter;
 220                        break;
 221                case MSR_TYPE_ARCH_CTRL:
 222                        arch_cntr_pair = field_offset(ctxt, arch_counters);
 223                        reg = &arch_cntr_pair[index].control;
 224                        break;
 225                default:
 226                        return false;
 227                }
 228        }
 229
 230        if (reg) {
 231                if (is_read)
 232                        *val = *reg;
 233                else {
 234                        *reg = *val;
 235
 236                        if (msr == MSR_CORE_PERF_GLOBAL_OVF_CTRL)
 237                                ctxt->global_status &= (~(*val));
 238                }
 239                return true;
 240        }
 241
 242        return false;
 243}
 244
 245static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read)
 246{
 247        uint64_t *reg = NULL;
 248        int i, off = 0;
 249        struct xen_pmu_amd_ctxt *ctxt;
 250        uint64_t *counter_regs, *ctrl_regs;
 251        struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 252        uint8_t xenpmu_flags = get_xenpmu_flags();
 253
 254        if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
 255                return false;
 256
 257        if (k7_counters_mirrored &&
 258            ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)))
 259                msr = get_fam15h_addr(msr);
 260
 261        ctxt = &xenpmu_data->pmu.c.amd;
 262        for (i = 0; i < amd_num_counters; i++) {
 263                if (msr == amd_ctrls_base + off) {
 264                        ctrl_regs = field_offset(ctxt, ctrls);
 265                        reg = &ctrl_regs[i];
 266                        break;
 267                } else if (msr == amd_counters_base + off) {
 268                        counter_regs = field_offset(ctxt, counters);
 269                        reg = &counter_regs[i];
 270                        break;
 271                }
 272                off += amd_msr_step;
 273        }
 274
 275        if (reg) {
 276                if (is_read)
 277                        *val = *reg;
 278                else
 279                        *reg = *val;
 280
 281                return true;
 282        }
 283        return false;
 284}
 285
 286bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
 287{
 288        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 289                if (is_amd_pmu_msr(msr)) {
 290                        if (!xen_amd_pmu_emulate(msr, val, 1))
 291                                *val = native_read_msr_safe(msr, err);
 292                        return true;
 293                }
 294        } else {
 295                int type, index;
 296
 297                if (is_intel_pmu_msr(msr, &type, &index)) {
 298                        if (!xen_intel_pmu_emulate(msr, val, type, index, 1))
 299                                *val = native_read_msr_safe(msr, err);
 300                        return true;
 301                }
 302        }
 303
 304        return false;
 305}
 306
 307bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err)
 308{
 309        uint64_t val = ((uint64_t)high << 32) | low;
 310
 311        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 312                if (is_amd_pmu_msr(msr)) {
 313                        if (!xen_amd_pmu_emulate(msr, &val, 0))
 314                                *err = native_write_msr_safe(msr, low, high);
 315                        return true;
 316                }
 317        } else {
 318                int type, index;
 319
 320                if (is_intel_pmu_msr(msr, &type, &index)) {
 321                        if (!xen_intel_pmu_emulate(msr, &val, type, index, 0))
 322                                *err = native_write_msr_safe(msr, low, high);
 323                        return true;
 324                }
 325        }
 326
 327        return false;
 328}
 329
 330static unsigned long long xen_amd_read_pmc(int counter)
 331{
 332        struct xen_pmu_amd_ctxt *ctxt;
 333        uint64_t *counter_regs;
 334        struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 335        uint8_t xenpmu_flags = get_xenpmu_flags();
 336
 337        if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
 338                uint32_t msr;
 339                int err;
 340
 341                msr = amd_counters_base + (counter * amd_msr_step);
 342                return native_read_msr_safe(msr, &err);
 343        }
 344
 345        ctxt = &xenpmu_data->pmu.c.amd;
 346        counter_regs = field_offset(ctxt, counters);
 347        return counter_regs[counter];
 348}
 349
 350static unsigned long long xen_intel_read_pmc(int counter)
 351{
 352        struct xen_pmu_intel_ctxt *ctxt;
 353        uint64_t *fixed_counters;
 354        struct xen_pmu_cntr_pair *arch_cntr_pair;
 355        struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 356        uint8_t xenpmu_flags = get_xenpmu_flags();
 357
 358        if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
 359                uint32_t msr;
 360                int err;
 361
 362                if (counter & (1 << INTEL_PMC_TYPE_SHIFT))
 363                        msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff);
 364                else
 365                        msr = MSR_IA32_PERFCTR0 + counter;
 366
 367                return native_read_msr_safe(msr, &err);
 368        }
 369
 370        ctxt = &xenpmu_data->pmu.c.intel;
 371        if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) {
 372                fixed_counters = field_offset(ctxt, fixed_counters);
 373                return fixed_counters[counter & 0xffff];
 374        }
 375
 376        arch_cntr_pair = field_offset(ctxt, arch_counters);
 377        return arch_cntr_pair[counter].counter;
 378}
 379
 380unsigned long long xen_read_pmc(int counter)
 381{
 382        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
 383                return xen_amd_read_pmc(counter);
 384        else
 385                return xen_intel_read_pmc(counter);
 386}
 387
 388int pmu_apic_update(uint32_t val)
 389{
 390        int ret;
 391        struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 392
 393        if (!xenpmu_data) {
 394                pr_warn_once("%s: pmudata not initialized\n", __func__);
 395                return -EINVAL;
 396        }
 397
 398        xenpmu_data->pmu.l.lapic_lvtpc = val;
 399
 400        if (get_xenpmu_flags() & XENPMU_IRQ_PROCESSING)
 401                return 0;
 402
 403        ret = HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set, NULL);
 404
 405        return ret;
 406}
 407
 408/* perf callbacks */
 409static int xen_is_in_guest(void)
 410{
 411        const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 412
 413        if (!xenpmu_data) {
 414                pr_warn_once("%s: pmudata not initialized\n", __func__);
 415                return 0;
 416        }
 417
 418        if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF))
 419                return 0;
 420
 421        return 1;
 422}
 423
 424static int xen_is_user_mode(void)
 425{
 426        const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 427
 428        if (!xenpmu_data) {
 429                pr_warn_once("%s: pmudata not initialized\n", __func__);
 430                return 0;
 431        }
 432
 433        if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV)
 434                return (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER);
 435        else
 436                return !!(xenpmu_data->pmu.r.regs.cpl & 3);
 437}
 438
 439static unsigned long xen_get_guest_ip(void)
 440{
 441        const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 442
 443        if (!xenpmu_data) {
 444                pr_warn_once("%s: pmudata not initialized\n", __func__);
 445                return 0;
 446        }
 447
 448        return xenpmu_data->pmu.r.regs.ip;
 449}
 450
 451static struct perf_guest_info_callbacks xen_guest_cbs = {
 452        .is_in_guest            = xen_is_in_guest,
 453        .is_user_mode           = xen_is_user_mode,
 454        .get_guest_ip           = xen_get_guest_ip,
 455};
 456
 457/* Convert registers from Xen's format to Linux' */
 458static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
 459                             struct pt_regs *regs, uint64_t pmu_flags)
 460{
 461        regs->ip = xen_regs->ip;
 462        regs->cs = xen_regs->cs;
 463        regs->sp = xen_regs->sp;
 464
 465        if (pmu_flags & PMU_SAMPLE_PV) {
 466                if (pmu_flags & PMU_SAMPLE_USER)
 467                        regs->cs |= 3;
 468                else
 469                        regs->cs &= ~3;
 470        } else {
 471                if (xen_regs->cpl)
 472                        regs->cs |= 3;
 473                else
 474                        regs->cs &= ~3;
 475        }
 476}
 477
 478irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
 479{
 480        int err, ret = IRQ_NONE;
 481        struct pt_regs regs;
 482        const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 483        uint8_t xenpmu_flags = get_xenpmu_flags();
 484
 485        if (!xenpmu_data) {
 486                pr_warn_once("%s: pmudata not initialized\n", __func__);
 487                return ret;
 488        }
 489
 490        this_cpu_ptr(&xenpmu_shared)->flags =
 491                xenpmu_flags | XENPMU_IRQ_PROCESSING;
 492        xen_convert_regs(&xenpmu_data->pmu.r.regs, &regs,
 493                         xenpmu_data->pmu.pmu_flags);
 494        if (x86_pmu.handle_irq(&regs))
 495                ret = IRQ_HANDLED;
 496
 497        /* Write out cached context to HW */
 498        err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL);
 499        this_cpu_ptr(&xenpmu_shared)->flags = xenpmu_flags;
 500        if (err) {
 501                pr_warn_once("%s: failed hypercall, err: %d\n", __func__, err);
 502                return IRQ_NONE;
 503        }
 504
 505        return ret;
 506}
 507
 508bool is_xen_pmu(int cpu)
 509{
 510        return (get_xenpmu_data() != NULL);
 511}
 512
 513void xen_pmu_init(int cpu)
 514{
 515        int err;
 516        struct xen_pmu_params xp;
 517        unsigned long pfn;
 518        struct xen_pmu_data *xenpmu_data;
 519
 520        BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE);
 521
 522        if (xen_hvm_domain())
 523                return;
 524
 525        xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL);
 526        if (!xenpmu_data) {
 527                pr_err("VPMU init: No memory\n");
 528                return;
 529        }
 530        pfn = virt_to_pfn(xenpmu_data);
 531
 532        xp.val = pfn_to_mfn(pfn);
 533        xp.vcpu = cpu;
 534        xp.version.maj = XENPMU_VER_MAJ;
 535        xp.version.min = XENPMU_VER_MIN;
 536        err = HYPERVISOR_xenpmu_op(XENPMU_init, &xp);
 537        if (err)
 538                goto fail;
 539
 540        per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data;
 541        per_cpu(xenpmu_shared, cpu).flags = 0;
 542
 543        if (cpu == 0) {
 544                perf_register_guest_info_callbacks(&xen_guest_cbs);
 545                xen_pmu_arch_init();
 546        }
 547
 548        return;
 549
 550fail:
 551        if (err == -EOPNOTSUPP || err == -ENOSYS)
 552                pr_info_once("VPMU disabled by hypervisor.\n");
 553        else
 554                pr_info_once("Could not initialize VPMU for cpu %d, error %d\n",
 555                        cpu, err);
 556        free_pages((unsigned long)xenpmu_data, 0);
 557}
 558
 559void xen_pmu_finish(int cpu)
 560{
 561        struct xen_pmu_params xp;
 562
 563        if (xen_hvm_domain())
 564                return;
 565
 566        xp.vcpu = cpu;
 567        xp.version.maj = XENPMU_VER_MAJ;
 568        xp.version.min = XENPMU_VER_MIN;
 569
 570        (void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp);
 571
 572        free_pages((unsigned long)per_cpu(xenpmu_shared, cpu).xenpmu_data, 0);
 573        per_cpu(xenpmu_shared, cpu).xenpmu_data = NULL;
 574}
 575