linux/arch/x86/events/msr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <linux/perf_event.h>
   3#include <linux/sysfs.h>
   4#include <linux/nospec.h>
   5#include <asm/intel-family.h>
   6#include "probe.h"
   7
   8enum perf_msr_id {
   9        PERF_MSR_TSC                    = 0,
  10        PERF_MSR_APERF                  = 1,
  11        PERF_MSR_MPERF                  = 2,
  12        PERF_MSR_PPERF                  = 3,
  13        PERF_MSR_SMI                    = 4,
  14        PERF_MSR_PTSC                   = 5,
  15        PERF_MSR_IRPERF                 = 6,
  16        PERF_MSR_THERM                  = 7,
  17        PERF_MSR_EVENT_MAX,
  18};
  19
  20static bool test_aperfmperf(int idx, void *data)
  21{
  22        return boot_cpu_has(X86_FEATURE_APERFMPERF);
  23}
  24
  25static bool test_ptsc(int idx, void *data)
  26{
  27        return boot_cpu_has(X86_FEATURE_PTSC);
  28}
  29
  30static bool test_irperf(int idx, void *data)
  31{
  32        return boot_cpu_has(X86_FEATURE_IRPERF);
  33}
  34
  35static bool test_therm_status(int idx, void *data)
  36{
  37        return boot_cpu_has(X86_FEATURE_DTHERM);
  38}
  39
  40static bool test_intel(int idx, void *data)
  41{
  42        if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
  43            boot_cpu_data.x86 != 6)
  44                return false;
  45
  46        switch (boot_cpu_data.x86_model) {
  47        case INTEL_FAM6_NEHALEM:
  48        case INTEL_FAM6_NEHALEM_G:
  49        case INTEL_FAM6_NEHALEM_EP:
  50        case INTEL_FAM6_NEHALEM_EX:
  51
  52        case INTEL_FAM6_WESTMERE:
  53        case INTEL_FAM6_WESTMERE_EP:
  54        case INTEL_FAM6_WESTMERE_EX:
  55
  56        case INTEL_FAM6_SANDYBRIDGE:
  57        case INTEL_FAM6_SANDYBRIDGE_X:
  58
  59        case INTEL_FAM6_IVYBRIDGE:
  60        case INTEL_FAM6_IVYBRIDGE_X:
  61
  62        case INTEL_FAM6_HASWELL:
  63        case INTEL_FAM6_HASWELL_X:
  64        case INTEL_FAM6_HASWELL_L:
  65        case INTEL_FAM6_HASWELL_G:
  66
  67        case INTEL_FAM6_BROADWELL:
  68        case INTEL_FAM6_BROADWELL_D:
  69        case INTEL_FAM6_BROADWELL_G:
  70        case INTEL_FAM6_BROADWELL_X:
  71        case INTEL_FAM6_SAPPHIRERAPIDS_X:
  72
  73        case INTEL_FAM6_ATOM_SILVERMONT:
  74        case INTEL_FAM6_ATOM_SILVERMONT_D:
  75        case INTEL_FAM6_ATOM_AIRMONT:
  76
  77        case INTEL_FAM6_ATOM_GOLDMONT:
  78        case INTEL_FAM6_ATOM_GOLDMONT_D:
  79        case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
  80        case INTEL_FAM6_ATOM_TREMONT_D:
  81        case INTEL_FAM6_ATOM_TREMONT:
  82        case INTEL_FAM6_ATOM_TREMONT_L:
  83
  84        case INTEL_FAM6_XEON_PHI_KNL:
  85        case INTEL_FAM6_XEON_PHI_KNM:
  86                if (idx == PERF_MSR_SMI)
  87                        return true;
  88                break;
  89
  90        case INTEL_FAM6_SKYLAKE_L:
  91        case INTEL_FAM6_SKYLAKE:
  92        case INTEL_FAM6_SKYLAKE_X:
  93        case INTEL_FAM6_KABYLAKE_L:
  94        case INTEL_FAM6_KABYLAKE:
  95        case INTEL_FAM6_COMETLAKE_L:
  96        case INTEL_FAM6_COMETLAKE:
  97        case INTEL_FAM6_ICELAKE_L:
  98        case INTEL_FAM6_ICELAKE:
  99        case INTEL_FAM6_ICELAKE_X:
 100        case INTEL_FAM6_ICELAKE_D:
 101        case INTEL_FAM6_TIGERLAKE_L:
 102        case INTEL_FAM6_TIGERLAKE:
 103        case INTEL_FAM6_ROCKETLAKE:
 104        case INTEL_FAM6_ALDERLAKE:
 105        case INTEL_FAM6_ALDERLAKE_L:
 106                if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
 107                        return true;
 108                break;
 109        }
 110
 111        return false;
 112}
 113
 114PMU_EVENT_ATTR_STRING(tsc,                              attr_tsc,               "event=0x00"    );
 115PMU_EVENT_ATTR_STRING(aperf,                            attr_aperf,             "event=0x01"    );
 116PMU_EVENT_ATTR_STRING(mperf,                            attr_mperf,             "event=0x02"    );
 117PMU_EVENT_ATTR_STRING(pperf,                            attr_pperf,             "event=0x03"    );
 118PMU_EVENT_ATTR_STRING(smi,                              attr_smi,               "event=0x04"    );
 119PMU_EVENT_ATTR_STRING(ptsc,                             attr_ptsc,              "event=0x05"    );
 120PMU_EVENT_ATTR_STRING(irperf,                           attr_irperf,            "event=0x06"    );
 121PMU_EVENT_ATTR_STRING(cpu_thermal_margin,               attr_therm,             "event=0x07"    );
 122PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot,      attr_therm_snap,        "1"             );
 123PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit,          attr_therm_unit,        "C"             );
 124
 125static unsigned long msr_mask;
 126
 127PMU_EVENT_GROUP(events, aperf);
 128PMU_EVENT_GROUP(events, mperf);
 129PMU_EVENT_GROUP(events, pperf);
 130PMU_EVENT_GROUP(events, smi);
 131PMU_EVENT_GROUP(events, ptsc);
 132PMU_EVENT_GROUP(events, irperf);
 133
 134static struct attribute *attrs_therm[] = {
 135        &attr_therm.attr.attr,
 136        &attr_therm_snap.attr.attr,
 137        &attr_therm_unit.attr.attr,
 138        NULL,
 139};
 140
 141static struct attribute_group group_therm = {
 142        .name  = "events",
 143        .attrs = attrs_therm,
 144};
 145
 146static struct perf_msr msr[] = {
 147        [PERF_MSR_TSC]          = { .no_check = true,                                                           },
 148        [PERF_MSR_APERF]        = { MSR_IA32_APERF,             &group_aperf,           test_aperfmperf,        },
 149        [PERF_MSR_MPERF]        = { MSR_IA32_MPERF,             &group_mperf,           test_aperfmperf,        },
 150        [PERF_MSR_PPERF]        = { MSR_PPERF,                  &group_pperf,           test_intel,             },
 151        [PERF_MSR_SMI]          = { MSR_SMI_COUNT,              &group_smi,             test_intel,             },
 152        [PERF_MSR_PTSC]         = { MSR_F15H_PTSC,              &group_ptsc,            test_ptsc,              },
 153        [PERF_MSR_IRPERF]       = { MSR_F17H_IRPERF,            &group_irperf,          test_irperf,            },
 154        [PERF_MSR_THERM]        = { MSR_IA32_THERM_STATUS,      &group_therm,           test_therm_status,      },
 155};
 156
 157static struct attribute *events_attrs[] = {
 158        &attr_tsc.attr.attr,
 159        NULL,
 160};
 161
 162static struct attribute_group events_attr_group = {
 163        .name = "events",
 164        .attrs = events_attrs,
 165};
 166
 167PMU_FORMAT_ATTR(event, "config:0-63");
 168static struct attribute *format_attrs[] = {
 169        &format_attr_event.attr,
 170        NULL,
 171};
 172static struct attribute_group format_attr_group = {
 173        .name = "format",
 174        .attrs = format_attrs,
 175};
 176
 177static const struct attribute_group *attr_groups[] = {
 178        &events_attr_group,
 179        &format_attr_group,
 180        NULL,
 181};
 182
 183static const struct attribute_group *attr_update[] = {
 184        &group_aperf,
 185        &group_mperf,
 186        &group_pperf,
 187        &group_smi,
 188        &group_ptsc,
 189        &group_irperf,
 190        &group_therm,
 191        NULL,
 192};
 193
 194static int msr_event_init(struct perf_event *event)
 195{
 196        u64 cfg = event->attr.config;
 197
 198        if (event->attr.type != event->pmu->type)
 199                return -ENOENT;
 200
 201        /* unsupported modes and filters */
 202        if (event->attr.sample_period) /* no sampling */
 203                return -EINVAL;
 204
 205        if (cfg >= PERF_MSR_EVENT_MAX)
 206                return -EINVAL;
 207
 208        cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX);
 209
 210        if (!(msr_mask & (1 << cfg)))
 211                return -EINVAL;
 212
 213        event->hw.idx           = -1;
 214        event->hw.event_base    = msr[cfg].msr;
 215        event->hw.config        = cfg;
 216
 217        return 0;
 218}
 219
 220static inline u64 msr_read_counter(struct perf_event *event)
 221{
 222        u64 now;
 223
 224        if (event->hw.event_base)
 225                rdmsrl(event->hw.event_base, now);
 226        else
 227                now = rdtsc_ordered();
 228
 229        return now;
 230}
 231
 232static void msr_event_update(struct perf_event *event)
 233{
 234        u64 prev, now;
 235        s64 delta;
 236
 237        /* Careful, an NMI might modify the previous event value: */
 238again:
 239        prev = local64_read(&event->hw.prev_count);
 240        now = msr_read_counter(event);
 241
 242        if (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev)
 243                goto again;
 244
 245        delta = now - prev;
 246        if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) {
 247                delta = sign_extend64(delta, 31);
 248                local64_add(delta, &event->count);
 249        } else if (unlikely(event->hw.event_base == MSR_IA32_THERM_STATUS)) {
 250                /* If valid, extract digital readout, otherwise set to -1: */
 251                now = now & (1ULL << 31) ? (now >> 16) & 0x3f :  -1;
 252                local64_set(&event->count, now);
 253        } else {
 254                local64_add(delta, &event->count);
 255        }
 256}
 257
 258static void msr_event_start(struct perf_event *event, int flags)
 259{
 260        u64 now = msr_read_counter(event);
 261
 262        local64_set(&event->hw.prev_count, now);
 263}
 264
 265static void msr_event_stop(struct perf_event *event, int flags)
 266{
 267        msr_event_update(event);
 268}
 269
 270static void msr_event_del(struct perf_event *event, int flags)
 271{
 272        msr_event_stop(event, PERF_EF_UPDATE);
 273}
 274
 275static int msr_event_add(struct perf_event *event, int flags)
 276{
 277        if (flags & PERF_EF_START)
 278                msr_event_start(event, flags);
 279
 280        return 0;
 281}
 282
 283static struct pmu pmu_msr = {
 284        .task_ctx_nr    = perf_sw_context,
 285        .attr_groups    = attr_groups,
 286        .event_init     = msr_event_init,
 287        .add            = msr_event_add,
 288        .del            = msr_event_del,
 289        .start          = msr_event_start,
 290        .stop           = msr_event_stop,
 291        .read           = msr_event_update,
 292        .capabilities   = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
 293        .attr_update    = attr_update,
 294};
 295
 296static int __init msr_init(void)
 297{
 298        if (!boot_cpu_has(X86_FEATURE_TSC)) {
 299                pr_cont("no MSR PMU driver.\n");
 300                return 0;
 301        }
 302
 303        msr_mask = perf_msr_probe(msr, PERF_MSR_EVENT_MAX, true, NULL);
 304
 305        perf_pmu_register(&pmu_msr, "msr", -1);
 306
 307        return 0;
 308}
 309device_initcall(msr_init);
 310