linux/arch/x86/xen/pmu.c
<<
>>
Prefs
   1#include <linux/types.h>
   2#include <linux/interrupt.h>
   3
   4#include <asm/xen/hypercall.h>
   5#include <xen/page.h>
   6#include <xen/interface/xen.h>
   7#include <xen/interface/vcpu.h>
   8#include <xen/interface/xenpmu.h>
   9
  10#include "xen-ops.h"
  11#include "pmu.h"
  12
  13/* x86_pmu.handle_irq definition */
  14#include "../events/perf_event.h"
  15
  16#define XENPMU_IRQ_PROCESSING    1
  17struct xenpmu {
  18        /* Shared page between hypervisor and domain */
  19        struct xen_pmu_data *xenpmu_data;
  20
  21        uint8_t flags;
  22};
  23static DEFINE_PER_CPU(struct xenpmu, xenpmu_shared);
  24#define get_xenpmu_data()    (this_cpu_ptr(&xenpmu_shared)->xenpmu_data)
  25#define get_xenpmu_flags()   (this_cpu_ptr(&xenpmu_shared)->flags)
  26
  27/* Macro for computing address of a PMU MSR bank */
  28#define field_offset(ctxt, field) ((void *)((uintptr_t)ctxt + \
  29                                            (uintptr_t)ctxt->field))
  30
  31/* AMD PMU */
  32#define F15H_NUM_COUNTERS   6
  33#define F10H_NUM_COUNTERS   4
  34
  35static __read_mostly uint32_t amd_counters_base;
  36static __read_mostly uint32_t amd_ctrls_base;
  37static __read_mostly int amd_msr_step;
  38static __read_mostly int k7_counters_mirrored;
  39static __read_mostly int amd_num_counters;
  40
  41/* Intel PMU */
  42#define MSR_TYPE_COUNTER            0
  43#define MSR_TYPE_CTRL               1
  44#define MSR_TYPE_GLOBAL             2
  45#define MSR_TYPE_ARCH_COUNTER       3
  46#define MSR_TYPE_ARCH_CTRL          4
  47
  48/* Number of general pmu registers (CPUID.EAX[0xa].EAX[8..15]) */
  49#define PMU_GENERAL_NR_SHIFT        8
  50#define PMU_GENERAL_NR_BITS         8
  51#define PMU_GENERAL_NR_MASK         (((1 << PMU_GENERAL_NR_BITS) - 1) \
  52                                     << PMU_GENERAL_NR_SHIFT)
  53
  54/* Number of fixed pmu registers (CPUID.EDX[0xa].EDX[0..4]) */
  55#define PMU_FIXED_NR_SHIFT          0
  56#define PMU_FIXED_NR_BITS           5
  57#define PMU_FIXED_NR_MASK           (((1 << PMU_FIXED_NR_BITS) - 1) \
  58                                     << PMU_FIXED_NR_SHIFT)
  59
  60/* Alias registers (0x4c1) for full-width writes to PMCs */
  61#define MSR_PMC_ALIAS_MASK          (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_PMC0))
  62
  63#define INTEL_PMC_TYPE_SHIFT        30
  64
  65static __read_mostly int intel_num_arch_counters, intel_num_fixed_counters;
  66
  67
  68static void xen_pmu_arch_init(void)
  69{
  70        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
  71
  72                switch (boot_cpu_data.x86) {
  73                case 0x15:
  74                        amd_num_counters = F15H_NUM_COUNTERS;
  75                        amd_counters_base = MSR_F15H_PERF_CTR;
  76                        amd_ctrls_base = MSR_F15H_PERF_CTL;
  77                        amd_msr_step = 2;
  78                        k7_counters_mirrored = 1;
  79                        break;
  80                case 0x10:
  81                case 0x12:
  82                case 0x14:
  83                case 0x16:
  84                default:
  85                        amd_num_counters = F10H_NUM_COUNTERS;
  86                        amd_counters_base = MSR_K7_PERFCTR0;
  87                        amd_ctrls_base = MSR_K7_EVNTSEL0;
  88                        amd_msr_step = 1;
  89                        k7_counters_mirrored = 0;
  90                        break;
  91                }
  92        } else {
  93                uint32_t eax, ebx, ecx, edx;
  94
  95                cpuid(0xa, &eax, &ebx, &ecx, &edx);
  96
  97                intel_num_arch_counters = (eax & PMU_GENERAL_NR_MASK) >>
  98                        PMU_GENERAL_NR_SHIFT;
  99                intel_num_fixed_counters = (edx & PMU_FIXED_NR_MASK) >>
 100                        PMU_FIXED_NR_SHIFT;
 101        }
 102}
 103
 104static inline uint32_t get_fam15h_addr(u32 addr)
 105{
 106        switch (addr) {
 107        case MSR_K7_PERFCTR0:
 108        case MSR_K7_PERFCTR1:
 109        case MSR_K7_PERFCTR2:
 110        case MSR_K7_PERFCTR3:
 111                return MSR_F15H_PERF_CTR + (addr - MSR_K7_PERFCTR0);
 112        case MSR_K7_EVNTSEL0:
 113        case MSR_K7_EVNTSEL1:
 114        case MSR_K7_EVNTSEL2:
 115        case MSR_K7_EVNTSEL3:
 116                return MSR_F15H_PERF_CTL + (addr - MSR_K7_EVNTSEL0);
 117        default:
 118                break;
 119        }
 120
 121        return addr;
 122}
 123
 124static inline bool is_amd_pmu_msr(unsigned int msr)
 125{
 126        if ((msr >= MSR_F15H_PERF_CTL &&
 127             msr < MSR_F15H_PERF_CTR + (amd_num_counters * 2)) ||
 128            (msr >= MSR_K7_EVNTSEL0 &&
 129             msr < MSR_K7_PERFCTR0 + amd_num_counters))
 130                return true;
 131
 132        return false;
 133}
 134
 135static int is_intel_pmu_msr(u32 msr_index, int *type, int *index)
 136{
 137        u32 msr_index_pmc;
 138
 139        switch (msr_index) {
 140        case MSR_CORE_PERF_FIXED_CTR_CTRL:
 141        case MSR_IA32_DS_AREA:
 142        case MSR_IA32_PEBS_ENABLE:
 143                *type = MSR_TYPE_CTRL;
 144                return true;
 145
 146        case MSR_CORE_PERF_GLOBAL_CTRL:
 147        case MSR_CORE_PERF_GLOBAL_STATUS:
 148        case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 149                *type = MSR_TYPE_GLOBAL;
 150                return true;
 151
 152        default:
 153
 154                if ((msr_index >= MSR_CORE_PERF_FIXED_CTR0) &&
 155                    (msr_index < MSR_CORE_PERF_FIXED_CTR0 +
 156                                 intel_num_fixed_counters)) {
 157                        *index = msr_index - MSR_CORE_PERF_FIXED_CTR0;
 158                        *type = MSR_TYPE_COUNTER;
 159                        return true;
 160                }
 161
 162                if ((msr_index >= MSR_P6_EVNTSEL0) &&
 163                    (msr_index < MSR_P6_EVNTSEL0 +  intel_num_arch_counters)) {
 164                        *index = msr_index - MSR_P6_EVNTSEL0;
 165                        *type = MSR_TYPE_ARCH_CTRL;
 166                        return true;
 167                }
 168
 169                msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK;
 170                if ((msr_index_pmc >= MSR_IA32_PERFCTR0) &&
 171                    (msr_index_pmc < MSR_IA32_PERFCTR0 +
 172                                     intel_num_arch_counters)) {
 173                        *type = MSR_TYPE_ARCH_COUNTER;
 174                        *index = msr_index_pmc - MSR_IA32_PERFCTR0;
 175                        return true;
 176                }
 177                return false;
 178        }
 179}
 180
 181static bool xen_intel_pmu_emulate(unsigned int msr, u64 *val, int type,
 182                                  int index, bool is_read)
 183{
 184        uint64_t *reg = NULL;
 185        struct xen_pmu_intel_ctxt *ctxt;
 186        uint64_t *fix_counters;
 187        struct xen_pmu_cntr_pair *arch_cntr_pair;
 188        struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 189        uint8_t xenpmu_flags = get_xenpmu_flags();
 190
 191
 192        if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
 193                return false;
 194
 195        ctxt = &xenpmu_data->pmu.c.intel;
 196
 197        switch (msr) {
 198        case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 199                reg = &ctxt->global_ovf_ctrl;
 200                break;
 201        case MSR_CORE_PERF_GLOBAL_STATUS:
 202                reg = &ctxt->global_status;
 203                break;
 204        case MSR_CORE_PERF_GLOBAL_CTRL:
 205                reg = &ctxt->global_ctrl;
 206                break;
 207        case MSR_CORE_PERF_FIXED_CTR_CTRL:
 208                reg = &ctxt->fixed_ctrl;
 209                break;
 210        default:
 211                switch (type) {
 212                case MSR_TYPE_COUNTER:
 213                        fix_counters = field_offset(ctxt, fixed_counters);
 214                        reg = &fix_counters[index];
 215                        break;
 216                case MSR_TYPE_ARCH_COUNTER:
 217                        arch_cntr_pair = field_offset(ctxt, arch_counters);
 218                        reg = &arch_cntr_pair[index].counter;
 219                        break;
 220                case MSR_TYPE_ARCH_CTRL:
 221                        arch_cntr_pair = field_offset(ctxt, arch_counters);
 222                        reg = &arch_cntr_pair[index].control;
 223                        break;
 224                default:
 225                        return false;
 226                }
 227        }
 228
 229        if (reg) {
 230                if (is_read)
 231                        *val = *reg;
 232                else {
 233                        *reg = *val;
 234
 235                        if (msr == MSR_CORE_PERF_GLOBAL_OVF_CTRL)
 236                                ctxt->global_status &= (~(*val));
 237                }
 238                return true;
 239        }
 240
 241        return false;
 242}
 243
 244static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read)
 245{
 246        uint64_t *reg = NULL;
 247        int i, off = 0;
 248        struct xen_pmu_amd_ctxt *ctxt;
 249        uint64_t *counter_regs, *ctrl_regs;
 250        struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 251        uint8_t xenpmu_flags = get_xenpmu_flags();
 252
 253        if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
 254                return false;
 255
 256        if (k7_counters_mirrored &&
 257            ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)))
 258                msr = get_fam15h_addr(msr);
 259
 260        ctxt = &xenpmu_data->pmu.c.amd;
 261        for (i = 0; i < amd_num_counters; i++) {
 262                if (msr == amd_ctrls_base + off) {
 263                        ctrl_regs = field_offset(ctxt, ctrls);
 264                        reg = &ctrl_regs[i];
 265                        break;
 266                } else if (msr == amd_counters_base + off) {
 267                        counter_regs = field_offset(ctxt, counters);
 268                        reg = &counter_regs[i];
 269                        break;
 270                }
 271                off += amd_msr_step;
 272        }
 273
 274        if (reg) {
 275                if (is_read)
 276                        *val = *reg;
 277                else
 278                        *reg = *val;
 279
 280                return true;
 281        }
 282        return false;
 283}
 284
 285bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
 286{
 287        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 288                if (is_amd_pmu_msr(msr)) {
 289                        if (!xen_amd_pmu_emulate(msr, val, 1))
 290                                *val = native_read_msr_safe(msr, err);
 291                        return true;
 292                }
 293        } else {
 294                int type, index;
 295
 296                if (is_intel_pmu_msr(msr, &type, &index)) {
 297                        if (!xen_intel_pmu_emulate(msr, val, type, index, 1))
 298                                *val = native_read_msr_safe(msr, err);
 299                        return true;
 300                }
 301        }
 302
 303        return false;
 304}
 305
 306bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err)
 307{
 308        uint64_t val = ((uint64_t)high << 32) | low;
 309
 310        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 311                if (is_amd_pmu_msr(msr)) {
 312                        if (!xen_amd_pmu_emulate(msr, &val, 0))
 313                                *err = native_write_msr_safe(msr, low, high);
 314                        return true;
 315                }
 316        } else {
 317                int type, index;
 318
 319                if (is_intel_pmu_msr(msr, &type, &index)) {
 320                        if (!xen_intel_pmu_emulate(msr, &val, type, index, 0))
 321                                *err = native_write_msr_safe(msr, low, high);
 322                        return true;
 323                }
 324        }
 325
 326        return false;
 327}
 328
 329static unsigned long long xen_amd_read_pmc(int counter)
 330{
 331        struct xen_pmu_amd_ctxt *ctxt;
 332        uint64_t *counter_regs;
 333        struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 334        uint8_t xenpmu_flags = get_xenpmu_flags();
 335
 336        if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
 337                uint32_t msr;
 338                int err;
 339
 340                msr = amd_counters_base + (counter * amd_msr_step);
 341                return native_read_msr_safe(msr, &err);
 342        }
 343
 344        ctxt = &xenpmu_data->pmu.c.amd;
 345        counter_regs = field_offset(ctxt, counters);
 346        return counter_regs[counter];
 347}
 348
 349static unsigned long long xen_intel_read_pmc(int counter)
 350{
 351        struct xen_pmu_intel_ctxt *ctxt;
 352        uint64_t *fixed_counters;
 353        struct xen_pmu_cntr_pair *arch_cntr_pair;
 354        struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 355        uint8_t xenpmu_flags = get_xenpmu_flags();
 356
 357        if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
 358                uint32_t msr;
 359                int err;
 360
 361                if (counter & (1 << INTEL_PMC_TYPE_SHIFT))
 362                        msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff);
 363                else
 364                        msr = MSR_IA32_PERFCTR0 + counter;
 365
 366                return native_read_msr_safe(msr, &err);
 367        }
 368
 369        ctxt = &xenpmu_data->pmu.c.intel;
 370        if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) {
 371                fixed_counters = field_offset(ctxt, fixed_counters);
 372                return fixed_counters[counter & 0xffff];
 373        }
 374
 375        arch_cntr_pair = field_offset(ctxt, arch_counters);
 376        return arch_cntr_pair[counter].counter;
 377}
 378
 379unsigned long long xen_read_pmc(int counter)
 380{
 381        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
 382                return xen_amd_read_pmc(counter);
 383        else
 384                return xen_intel_read_pmc(counter);
 385}
 386
 387int pmu_apic_update(uint32_t val)
 388{
 389        int ret;
 390        struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 391
 392        if (!xenpmu_data) {
 393                pr_warn_once("%s: pmudata not initialized\n", __func__);
 394                return -EINVAL;
 395        }
 396
 397        xenpmu_data->pmu.l.lapic_lvtpc = val;
 398
 399        if (get_xenpmu_flags() & XENPMU_IRQ_PROCESSING)
 400                return 0;
 401
 402        ret = HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set, NULL);
 403
 404        return ret;
 405}
 406
 407/* perf callbacks */
 408static int xen_is_in_guest(void)
 409{
 410        const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 411
 412        if (!xenpmu_data) {
 413                pr_warn_once("%s: pmudata not initialized\n", __func__);
 414                return 0;
 415        }
 416
 417        if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF))
 418                return 0;
 419
 420        return 1;
 421}
 422
 423static int xen_is_user_mode(void)
 424{
 425        const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 426
 427        if (!xenpmu_data) {
 428                pr_warn_once("%s: pmudata not initialized\n", __func__);
 429                return 0;
 430        }
 431
 432        if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV)
 433                return (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER);
 434        else
 435                return !!(xenpmu_data->pmu.r.regs.cpl & 3);
 436}
 437
 438static unsigned long xen_get_guest_ip(void)
 439{
 440        const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 441
 442        if (!xenpmu_data) {
 443                pr_warn_once("%s: pmudata not initialized\n", __func__);
 444                return 0;
 445        }
 446
 447        return xenpmu_data->pmu.r.regs.ip;
 448}
 449
 450static struct perf_guest_info_callbacks xen_guest_cbs = {
 451        .is_in_guest            = xen_is_in_guest,
 452        .is_user_mode           = xen_is_user_mode,
 453        .get_guest_ip           = xen_get_guest_ip,
 454};
 455
 456/* Convert registers from Xen's format to Linux' */
 457static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
 458                             struct pt_regs *regs, uint64_t pmu_flags)
 459{
 460        regs->ip = xen_regs->ip;
 461        regs->cs = xen_regs->cs;
 462        regs->sp = xen_regs->sp;
 463
 464        if (pmu_flags & PMU_SAMPLE_PV) {
 465                if (pmu_flags & PMU_SAMPLE_USER)
 466                        regs->cs |= 3;
 467                else
 468                        regs->cs &= ~3;
 469        } else {
 470                if (xen_regs->cpl)
 471                        regs->cs |= 3;
 472                else
 473                        regs->cs &= ~3;
 474        }
 475}
 476
 477irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
 478{
 479        int err, ret = IRQ_NONE;
 480        struct pt_regs regs;
 481        const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
 482        uint8_t xenpmu_flags = get_xenpmu_flags();
 483
 484        if (!xenpmu_data) {
 485                pr_warn_once("%s: pmudata not initialized\n", __func__);
 486                return ret;
 487        }
 488
 489        this_cpu_ptr(&xenpmu_shared)->flags =
 490                xenpmu_flags | XENPMU_IRQ_PROCESSING;
 491        xen_convert_regs(&xenpmu_data->pmu.r.regs, &regs,
 492                         xenpmu_data->pmu.pmu_flags);
 493        if (x86_pmu.handle_irq(&regs))
 494                ret = IRQ_HANDLED;
 495
 496        /* Write out cached context to HW */
 497        err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL);
 498        this_cpu_ptr(&xenpmu_shared)->flags = xenpmu_flags;
 499        if (err) {
 500                pr_warn_once("%s: failed hypercall, err: %d\n", __func__, err);
 501                return IRQ_NONE;
 502        }
 503
 504        return ret;
 505}
 506
 507bool is_xen_pmu(int cpu)
 508{
 509        return (get_xenpmu_data() != NULL);
 510}
 511
 512void xen_pmu_init(int cpu)
 513{
 514        int err;
 515        struct xen_pmu_params xp;
 516        unsigned long pfn;
 517        struct xen_pmu_data *xenpmu_data;
 518
 519        BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE);
 520
 521        if (xen_hvm_domain())
 522                return;
 523
 524        xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL);
 525        if (!xenpmu_data) {
 526                pr_err("VPMU init: No memory\n");
 527                return;
 528        }
 529        pfn = virt_to_pfn(xenpmu_data);
 530
 531        xp.val = pfn_to_mfn(pfn);
 532        xp.vcpu = cpu;
 533        xp.version.maj = XENPMU_VER_MAJ;
 534        xp.version.min = XENPMU_VER_MIN;
 535        err = HYPERVISOR_xenpmu_op(XENPMU_init, &xp);
 536        if (err)
 537                goto fail;
 538
 539        per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data;
 540        per_cpu(xenpmu_shared, cpu).flags = 0;
 541
 542        if (cpu == 0) {
 543                perf_register_guest_info_callbacks(&xen_guest_cbs);
 544                xen_pmu_arch_init();
 545        }
 546
 547        return;
 548
 549fail:
 550        pr_warn_once("Could not initialize VPMU for cpu %d, error %d\n",
 551                cpu, err);
 552        free_pages((unsigned long)xenpmu_data, 0);
 553}
 554
 555void xen_pmu_finish(int cpu)
 556{
 557        struct xen_pmu_params xp;
 558
 559        if (xen_hvm_domain())
 560                return;
 561
 562        xp.vcpu = cpu;
 563        xp.version.maj = XENPMU_VER_MAJ;
 564        xp.version.min = XENPMU_VER_MIN;
 565
 566        (void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp);
 567
 568        free_pages((unsigned long)per_cpu(xenpmu_shared, cpu).xenpmu_data, 0);
 569        per_cpu(xenpmu_shared, cpu).xenpmu_data = NULL;
 570}
 571