linux/arch/x86/events/msr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <linux/perf_event.h>
   3#include <linux/sysfs.h>
   4#include <linux/nospec.h>
   5#include <asm/intel-family.h>
   6#include "probe.h"
   7
   8enum perf_msr_id {
   9        PERF_MSR_TSC                    = 0,
  10        PERF_MSR_APERF                  = 1,
  11        PERF_MSR_MPERF                  = 2,
  12        PERF_MSR_PPERF                  = 3,
  13        PERF_MSR_SMI                    = 4,
  14        PERF_MSR_PTSC                   = 5,
  15        PERF_MSR_IRPERF                 = 6,
  16        PERF_MSR_THERM                  = 7,
  17        PERF_MSR_EVENT_MAX,
  18};
  19
  20static bool test_aperfmperf(int idx, void *data)
  21{
  22        return boot_cpu_has(X86_FEATURE_APERFMPERF);
  23}
  24
  25static bool test_ptsc(int idx, void *data)
  26{
  27        return boot_cpu_has(X86_FEATURE_PTSC);
  28}
  29
  30static bool test_irperf(int idx, void *data)
  31{
  32        return boot_cpu_has(X86_FEATURE_IRPERF);
  33}
  34
  35static bool test_therm_status(int idx, void *data)
  36{
  37        return boot_cpu_has(X86_FEATURE_DTHERM);
  38}
  39
  40static bool test_intel(int idx, void *data)
  41{
  42        if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
  43            boot_cpu_data.x86 != 6)
  44                return false;
  45
  46        switch (boot_cpu_data.x86_model) {
  47        case INTEL_FAM6_NEHALEM:
  48        case INTEL_FAM6_NEHALEM_G:
  49        case INTEL_FAM6_NEHALEM_EP:
  50        case INTEL_FAM6_NEHALEM_EX:
  51
  52        case INTEL_FAM6_WESTMERE:
  53        case INTEL_FAM6_WESTMERE_EP:
  54        case INTEL_FAM6_WESTMERE_EX:
  55
  56        case INTEL_FAM6_SANDYBRIDGE:
  57        case INTEL_FAM6_SANDYBRIDGE_X:
  58
  59        case INTEL_FAM6_IVYBRIDGE:
  60        case INTEL_FAM6_IVYBRIDGE_X:
  61
  62        case INTEL_FAM6_HASWELL:
  63        case INTEL_FAM6_HASWELL_X:
  64        case INTEL_FAM6_HASWELL_L:
  65        case INTEL_FAM6_HASWELL_G:
  66
  67        case INTEL_FAM6_BROADWELL:
  68        case INTEL_FAM6_BROADWELL_D:
  69        case INTEL_FAM6_BROADWELL_G:
  70        case INTEL_FAM6_BROADWELL_X:
  71
  72        case INTEL_FAM6_ATOM_SILVERMONT:
  73        case INTEL_FAM6_ATOM_SILVERMONT_D:
  74        case INTEL_FAM6_ATOM_AIRMONT:
  75
  76        case INTEL_FAM6_ATOM_GOLDMONT:
  77        case INTEL_FAM6_ATOM_GOLDMONT_D:
  78
  79        case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
  80
  81        case INTEL_FAM6_XEON_PHI_KNL:
  82        case INTEL_FAM6_XEON_PHI_KNM:
  83                if (idx == PERF_MSR_SMI)
  84                        return true;
  85                break;
  86
  87        case INTEL_FAM6_SKYLAKE_L:
  88        case INTEL_FAM6_SKYLAKE:
  89        case INTEL_FAM6_SKYLAKE_X:
  90        case INTEL_FAM6_KABYLAKE_L:
  91        case INTEL_FAM6_KABYLAKE:
  92        case INTEL_FAM6_COMETLAKE_L:
  93        case INTEL_FAM6_COMETLAKE:
  94        case INTEL_FAM6_ICELAKE_L:
  95        case INTEL_FAM6_ICELAKE:
  96        case INTEL_FAM6_ICELAKE_X:
  97        case INTEL_FAM6_ICELAKE_D:
  98        case INTEL_FAM6_TIGERLAKE_L:
  99        case INTEL_FAM6_TIGERLAKE:
 100                if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
 101                        return true;
 102                break;
 103        }
 104
 105        return false;
 106}
 107
 108PMU_EVENT_ATTR_STRING(tsc,                              attr_tsc,               "event=0x00"    );
 109PMU_EVENT_ATTR_STRING(aperf,                            attr_aperf,             "event=0x01"    );
 110PMU_EVENT_ATTR_STRING(mperf,                            attr_mperf,             "event=0x02"    );
 111PMU_EVENT_ATTR_STRING(pperf,                            attr_pperf,             "event=0x03"    );
 112PMU_EVENT_ATTR_STRING(smi,                              attr_smi,               "event=0x04"    );
 113PMU_EVENT_ATTR_STRING(ptsc,                             attr_ptsc,              "event=0x05"    );
 114PMU_EVENT_ATTR_STRING(irperf,                           attr_irperf,            "event=0x06"    );
 115PMU_EVENT_ATTR_STRING(cpu_thermal_margin,               attr_therm,             "event=0x07"    );
 116PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot,      attr_therm_snap,        "1"             );
 117PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit,          attr_therm_unit,        "C"             );
 118
 119static unsigned long msr_mask;
 120
 121PMU_EVENT_GROUP(events, aperf);
 122PMU_EVENT_GROUP(events, mperf);
 123PMU_EVENT_GROUP(events, pperf);
 124PMU_EVENT_GROUP(events, smi);
 125PMU_EVENT_GROUP(events, ptsc);
 126PMU_EVENT_GROUP(events, irperf);
 127
 128static struct attribute *attrs_therm[] = {
 129        &attr_therm.attr.attr,
 130        &attr_therm_snap.attr.attr,
 131        &attr_therm_unit.attr.attr,
 132        NULL,
 133};
 134
 135static struct attribute_group group_therm = {
 136        .name  = "events",
 137        .attrs = attrs_therm,
 138};
 139
 140static struct perf_msr msr[] = {
 141        [PERF_MSR_TSC]          = { .no_check = true,                                                           },
 142        [PERF_MSR_APERF]        = { MSR_IA32_APERF,             &group_aperf,           test_aperfmperf,        },
 143        [PERF_MSR_MPERF]        = { MSR_IA32_MPERF,             &group_mperf,           test_aperfmperf,        },
 144        [PERF_MSR_PPERF]        = { MSR_PPERF,                  &group_pperf,           test_intel,             },
 145        [PERF_MSR_SMI]          = { MSR_SMI_COUNT,              &group_smi,             test_intel,             },
 146        [PERF_MSR_PTSC]         = { MSR_F15H_PTSC,              &group_ptsc,            test_ptsc,              },
 147        [PERF_MSR_IRPERF]       = { MSR_F17H_IRPERF,            &group_irperf,          test_irperf,            },
 148        [PERF_MSR_THERM]        = { MSR_IA32_THERM_STATUS,      &group_therm,           test_therm_status,      },
 149};
 150
 151static struct attribute *events_attrs[] = {
 152        &attr_tsc.attr.attr,
 153        NULL,
 154};
 155
 156static struct attribute_group events_attr_group = {
 157        .name = "events",
 158        .attrs = events_attrs,
 159};
 160
 161PMU_FORMAT_ATTR(event, "config:0-63");
 162static struct attribute *format_attrs[] = {
 163        &format_attr_event.attr,
 164        NULL,
 165};
 166static struct attribute_group format_attr_group = {
 167        .name = "format",
 168        .attrs = format_attrs,
 169};
 170
 171static const struct attribute_group *attr_groups[] = {
 172        &events_attr_group,
 173        &format_attr_group,
 174        NULL,
 175};
 176
 177static const struct attribute_group *attr_update[] = {
 178        &group_aperf,
 179        &group_mperf,
 180        &group_pperf,
 181        &group_smi,
 182        &group_ptsc,
 183        &group_irperf,
 184        &group_therm,
 185        NULL,
 186};
 187
 188static int msr_event_init(struct perf_event *event)
 189{
 190        u64 cfg = event->attr.config;
 191
 192        if (event->attr.type != event->pmu->type)
 193                return -ENOENT;
 194
 195        /* unsupported modes and filters */
 196        if (event->attr.sample_period) /* no sampling */
 197                return -EINVAL;
 198
 199        if (cfg >= PERF_MSR_EVENT_MAX)
 200                return -EINVAL;
 201
 202        cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX);
 203
 204        if (!(msr_mask & (1 << cfg)))
 205                return -EINVAL;
 206
 207        event->hw.idx           = -1;
 208        event->hw.event_base    = msr[cfg].msr;
 209        event->hw.config        = cfg;
 210
 211        return 0;
 212}
 213
 214static inline u64 msr_read_counter(struct perf_event *event)
 215{
 216        u64 now;
 217
 218        if (event->hw.event_base)
 219                rdmsrl(event->hw.event_base, now);
 220        else
 221                now = rdtsc_ordered();
 222
 223        return now;
 224}
 225
 226static void msr_event_update(struct perf_event *event)
 227{
 228        u64 prev, now;
 229        s64 delta;
 230
 231        /* Careful, an NMI might modify the previous event value: */
 232again:
 233        prev = local64_read(&event->hw.prev_count);
 234        now = msr_read_counter(event);
 235
 236        if (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev)
 237                goto again;
 238
 239        delta = now - prev;
 240        if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) {
 241                delta = sign_extend64(delta, 31);
 242                local64_add(delta, &event->count);
 243        } else if (unlikely(event->hw.event_base == MSR_IA32_THERM_STATUS)) {
 244                /* If valid, extract digital readout, otherwise set to -1: */
 245                now = now & (1ULL << 31) ? (now >> 16) & 0x3f :  -1;
 246                local64_set(&event->count, now);
 247        } else {
 248                local64_add(delta, &event->count);
 249        }
 250}
 251
 252static void msr_event_start(struct perf_event *event, int flags)
 253{
 254        u64 now = msr_read_counter(event);
 255
 256        local64_set(&event->hw.prev_count, now);
 257}
 258
 259static void msr_event_stop(struct perf_event *event, int flags)
 260{
 261        msr_event_update(event);
 262}
 263
 264static void msr_event_del(struct perf_event *event, int flags)
 265{
 266        msr_event_stop(event, PERF_EF_UPDATE);
 267}
 268
 269static int msr_event_add(struct perf_event *event, int flags)
 270{
 271        if (flags & PERF_EF_START)
 272                msr_event_start(event, flags);
 273
 274        return 0;
 275}
 276
 277static struct pmu pmu_msr = {
 278        .task_ctx_nr    = perf_sw_context,
 279        .attr_groups    = attr_groups,
 280        .event_init     = msr_event_init,
 281        .add            = msr_event_add,
 282        .del            = msr_event_del,
 283        .start          = msr_event_start,
 284        .stop           = msr_event_stop,
 285        .read           = msr_event_update,
 286        .capabilities   = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
 287        .attr_update    = attr_update,
 288};
 289
 290static int __init msr_init(void)
 291{
 292        if (!boot_cpu_has(X86_FEATURE_TSC)) {
 293                pr_cont("no MSR PMU driver.\n");
 294                return 0;
 295        }
 296
 297        msr_mask = perf_msr_probe(msr, PERF_MSR_EVENT_MAX, true, NULL);
 298
 299        perf_pmu_register(&pmu_msr, "msr", -1);
 300
 301        return 0;
 302}
 303device_initcall(msr_init);
 304