1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88#include <linux/module.h>
89#include <linux/slab.h>
90#include <linux/perf_event.h>
91#include <asm/cpu_device_id.h>
92#include <asm/intel-family.h>
93#include "../perf_event.h"
94
95MODULE_LICENSE("GPL");
96
97#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \
98static ssize_t __cstate_##_var##_show(struct kobject *kobj, \
99 struct kobj_attribute *attr, \
100 char *page) \
101{ \
102 BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
103 return sprintf(page, _format "\n"); \
104} \
105static struct kobj_attribute format_attr_##_var = \
106 __ATTR(_name, 0444, __cstate_##_var##_show, NULL)
107
108static ssize_t cstate_get_attr_cpumask(struct device *dev,
109 struct device_attribute *attr,
110 char *buf);
111
112
113struct cstate_model {
114 unsigned long core_events;
115 unsigned long pkg_events;
116 unsigned long quirks;
117};
118
119
120#define SLM_PKG_C6_USE_C7_MSR (1UL << 0)
121
122struct perf_cstate_msr {
123 u64 msr;
124 struct perf_pmu_events_attr *attr;
125};
126
127
128
129static struct pmu cstate_core_pmu;
130static bool has_cstate_core;
131
132enum perf_cstate_core_events {
133 PERF_CSTATE_CORE_C1_RES = 0,
134 PERF_CSTATE_CORE_C3_RES,
135 PERF_CSTATE_CORE_C6_RES,
136 PERF_CSTATE_CORE_C7_RES,
137
138 PERF_CSTATE_CORE_EVENT_MAX,
139};
140
141PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
142PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
143PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
144PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
145
146static struct perf_cstate_msr core_msr[] = {
147 [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1 },
148 [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3 },
149 [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6 },
150 [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7 },
151};
152
153static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
154 NULL,
155};
156
157static struct attribute_group core_events_attr_group = {
158 .name = "events",
159 .attrs = core_events_attrs,
160};
161
162DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63");
163static struct attribute *core_format_attrs[] = {
164 &format_attr_core_event.attr,
165 NULL,
166};
167
168static struct attribute_group core_format_attr_group = {
169 .name = "format",
170 .attrs = core_format_attrs,
171};
172
173static cpumask_t cstate_core_cpu_mask;
174static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL);
175
176static struct attribute *cstate_cpumask_attrs[] = {
177 &dev_attr_cpumask.attr,
178 NULL,
179};
180
181static struct attribute_group cpumask_attr_group = {
182 .attrs = cstate_cpumask_attrs,
183};
184
185static const struct attribute_group *core_attr_groups[] = {
186 &core_events_attr_group,
187 &core_format_attr_group,
188 &cpumask_attr_group,
189 NULL,
190};
191
192
193static struct pmu cstate_pkg_pmu;
194static bool has_cstate_pkg;
195
196enum perf_cstate_pkg_events {
197 PERF_CSTATE_PKG_C2_RES = 0,
198 PERF_CSTATE_PKG_C3_RES,
199 PERF_CSTATE_PKG_C6_RES,
200 PERF_CSTATE_PKG_C7_RES,
201 PERF_CSTATE_PKG_C8_RES,
202 PERF_CSTATE_PKG_C9_RES,
203 PERF_CSTATE_PKG_C10_RES,
204
205 PERF_CSTATE_PKG_EVENT_MAX,
206};
207
208PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
209PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
210PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
211PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03");
212PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04");
213PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
214PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
215
216static struct perf_cstate_msr pkg_msr[] = {
217 [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2 },
218 [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3 },
219 [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6 },
220 [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7 },
221 [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8 },
222 [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9 },
223 [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10 },
224};
225
226static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
227 NULL,
228};
229
230static struct attribute_group pkg_events_attr_group = {
231 .name = "events",
232 .attrs = pkg_events_attrs,
233};
234
235DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63");
236static struct attribute *pkg_format_attrs[] = {
237 &format_attr_pkg_event.attr,
238 NULL,
239};
240static struct attribute_group pkg_format_attr_group = {
241 .name = "format",
242 .attrs = pkg_format_attrs,
243};
244
245static cpumask_t cstate_pkg_cpu_mask;
246
247static const struct attribute_group *pkg_attr_groups[] = {
248 &pkg_events_attr_group,
249 &pkg_format_attr_group,
250 &cpumask_attr_group,
251 NULL,
252};
253
254static ssize_t cstate_get_attr_cpumask(struct device *dev,
255 struct device_attribute *attr,
256 char *buf)
257{
258 struct pmu *pmu = dev_get_drvdata(dev);
259
260 if (pmu == &cstate_core_pmu)
261 return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask);
262 else if (pmu == &cstate_pkg_pmu)
263 return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask);
264 else
265 return 0;
266}
267
268static int cstate_pmu_event_init(struct perf_event *event)
269{
270 u64 cfg = event->attr.config;
271 int cpu;
272
273 if (event->attr.type != event->pmu->type)
274 return -ENOENT;
275
276
277 if (event->attr.exclude_user ||
278 event->attr.exclude_kernel ||
279 event->attr.exclude_hv ||
280 event->attr.exclude_idle ||
281 event->attr.exclude_host ||
282 event->attr.exclude_guest ||
283 event->attr.sample_period)
284 return -EINVAL;
285
286 if (event->cpu < 0)
287 return -EINVAL;
288
289 if (event->pmu == &cstate_core_pmu) {
290 if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
291 return -EINVAL;
292 if (!core_msr[cfg].attr)
293 return -EINVAL;
294 event->hw.event_base = core_msr[cfg].msr;
295 cpu = cpumask_any_and(&cstate_core_cpu_mask,
296 topology_sibling_cpumask(event->cpu));
297 } else if (event->pmu == &cstate_pkg_pmu) {
298 if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
299 return -EINVAL;
300 if (!pkg_msr[cfg].attr)
301 return -EINVAL;
302 event->hw.event_base = pkg_msr[cfg].msr;
303 cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
304 topology_core_cpumask(event->cpu));
305 } else {
306 return -ENOENT;
307 }
308
309 if (cpu >= nr_cpu_ids)
310 return -ENODEV;
311
312 event->cpu = cpu;
313 event->hw.config = cfg;
314 event->hw.idx = -1;
315 return 0;
316}
317
318static inline u64 cstate_pmu_read_counter(struct perf_event *event)
319{
320 u64 val;
321
322 rdmsrl(event->hw.event_base, val);
323 return val;
324}
325
326static void cstate_pmu_event_update(struct perf_event *event)
327{
328 struct hw_perf_event *hwc = &event->hw;
329 u64 prev_raw_count, new_raw_count;
330
331again:
332 prev_raw_count = local64_read(&hwc->prev_count);
333 new_raw_count = cstate_pmu_read_counter(event);
334
335 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
336 new_raw_count) != prev_raw_count)
337 goto again;
338
339 local64_add(new_raw_count - prev_raw_count, &event->count);
340}
341
342static void cstate_pmu_event_start(struct perf_event *event, int mode)
343{
344 local64_set(&event->hw.prev_count, cstate_pmu_read_counter(event));
345}
346
347static void cstate_pmu_event_stop(struct perf_event *event, int mode)
348{
349 cstate_pmu_event_update(event);
350}
351
352static void cstate_pmu_event_del(struct perf_event *event, int mode)
353{
354 cstate_pmu_event_stop(event, PERF_EF_UPDATE);
355}
356
357static int cstate_pmu_event_add(struct perf_event *event, int mode)
358{
359 if (mode & PERF_EF_START)
360 cstate_pmu_event_start(event, mode);
361
362 return 0;
363}
364
365
366
367
368
369static int cstate_cpu_exit(unsigned int cpu)
370{
371 unsigned int target;
372
373 if (has_cstate_core &&
374 cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) {
375
376 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
377
378 if (target < nr_cpu_ids) {
379 cpumask_set_cpu(target, &cstate_core_cpu_mask);
380 perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
381 }
382 }
383
384 if (has_cstate_pkg &&
385 cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
386
387 target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
388
389 if (target < nr_cpu_ids) {
390 cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
391 perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
392 }
393 }
394 return 0;
395}
396
397static int cstate_cpu_init(unsigned int cpu)
398{
399 unsigned int target;
400
401
402
403
404
405 target = cpumask_any_and(&cstate_core_cpu_mask,
406 topology_sibling_cpumask(cpu));
407
408 if (has_cstate_core && target >= nr_cpu_ids)
409 cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
410
411
412
413
414
415 target = cpumask_any_and(&cstate_pkg_cpu_mask,
416 topology_core_cpumask(cpu));
417 if (has_cstate_pkg && target >= nr_cpu_ids)
418 cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
419
420 return 0;
421}
422
423static struct pmu cstate_core_pmu = {
424 .attr_groups = core_attr_groups,
425 .name = "cstate_core",
426 .task_ctx_nr = perf_invalid_context,
427 .event_init = cstate_pmu_event_init,
428 .add = cstate_pmu_event_add,
429 .del = cstate_pmu_event_del,
430 .start = cstate_pmu_event_start,
431 .stop = cstate_pmu_event_stop,
432 .read = cstate_pmu_event_update,
433 .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
434};
435
436static struct pmu cstate_pkg_pmu = {
437 .attr_groups = pkg_attr_groups,
438 .name = "cstate_pkg",
439 .task_ctx_nr = perf_invalid_context,
440 .event_init = cstate_pmu_event_init,
441 .add = cstate_pmu_event_add,
442 .del = cstate_pmu_event_del,
443 .start = cstate_pmu_event_start,
444 .stop = cstate_pmu_event_stop,
445 .read = cstate_pmu_event_update,
446 .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
447};
448
449static const struct cstate_model nhm_cstates __initconst = {
450 .core_events = BIT(PERF_CSTATE_CORE_C3_RES) |
451 BIT(PERF_CSTATE_CORE_C6_RES),
452
453 .pkg_events = BIT(PERF_CSTATE_PKG_C3_RES) |
454 BIT(PERF_CSTATE_PKG_C6_RES) |
455 BIT(PERF_CSTATE_PKG_C7_RES),
456};
457
458static const struct cstate_model snb_cstates __initconst = {
459 .core_events = BIT(PERF_CSTATE_CORE_C3_RES) |
460 BIT(PERF_CSTATE_CORE_C6_RES) |
461 BIT(PERF_CSTATE_CORE_C7_RES),
462
463 .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
464 BIT(PERF_CSTATE_PKG_C3_RES) |
465 BIT(PERF_CSTATE_PKG_C6_RES) |
466 BIT(PERF_CSTATE_PKG_C7_RES),
467};
468
469static const struct cstate_model hswult_cstates __initconst = {
470 .core_events = BIT(PERF_CSTATE_CORE_C3_RES) |
471 BIT(PERF_CSTATE_CORE_C6_RES) |
472 BIT(PERF_CSTATE_CORE_C7_RES),
473
474 .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
475 BIT(PERF_CSTATE_PKG_C3_RES) |
476 BIT(PERF_CSTATE_PKG_C6_RES) |
477 BIT(PERF_CSTATE_PKG_C7_RES) |
478 BIT(PERF_CSTATE_PKG_C8_RES) |
479 BIT(PERF_CSTATE_PKG_C9_RES) |
480 BIT(PERF_CSTATE_PKG_C10_RES),
481};
482
483static const struct cstate_model slm_cstates __initconst = {
484 .core_events = BIT(PERF_CSTATE_CORE_C1_RES) |
485 BIT(PERF_CSTATE_CORE_C6_RES),
486
487 .pkg_events = BIT(PERF_CSTATE_PKG_C6_RES),
488 .quirks = SLM_PKG_C6_USE_C7_MSR,
489};
490
491#define X86_CSTATES_MODEL(model, states) \
492 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
493
494static const struct x86_cpu_id intel_cstates_match[] __initconst = {
495 X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM, nhm_cstates),
496 X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates),
497 X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates),
498
499 X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE, nhm_cstates),
500 X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates),
501 X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates),
502
503 X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE, snb_cstates),
504 X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates),
505
506 X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE, snb_cstates),
507 X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates),
508
509 X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_CORE, snb_cstates),
510 X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X, snb_cstates),
511 X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_GT3E, snb_cstates),
512
513 X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates),
514
515 X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates),
516 X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates),
517 X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates),
518
519 X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE, snb_cstates),
520 X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_XEON_D, snb_cstates),
521 X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_GT3E, snb_cstates),
522 X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X, snb_cstates),
523
524 X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates),
525 X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
526 { },
527};
528MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
529
530
531
532
533
534static bool __init cstate_probe_msr(const unsigned long evmsk, int max,
535 struct perf_cstate_msr *msr,
536 struct attribute **attrs)
537{
538 bool found = false;
539 unsigned int bit;
540 u64 val;
541
542 for (bit = 0; bit < max; bit++) {
543 if (test_bit(bit, &evmsk) && !rdmsrl_safe(msr[bit].msr, &val)) {
544 *attrs++ = &msr[bit].attr->attr.attr;
545 found = true;
546 } else {
547 msr[bit].attr = NULL;
548 }
549 }
550 *attrs = NULL;
551
552 return found;
553}
554
555static int __init cstate_probe(const struct cstate_model *cm)
556{
557
558 if (cm->quirks & SLM_PKG_C6_USE_C7_MSR)
559 pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
560
561 has_cstate_core = cstate_probe_msr(cm->core_events,
562 PERF_CSTATE_CORE_EVENT_MAX,
563 core_msr, core_events_attrs);
564
565 has_cstate_pkg = cstate_probe_msr(cm->pkg_events,
566 PERF_CSTATE_PKG_EVENT_MAX,
567 pkg_msr, pkg_events_attrs);
568
569 return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
570}
571
572static inline void cstate_cleanup(void)
573{
574 if (has_cstate_core)
575 perf_pmu_unregister(&cstate_core_pmu);
576
577 if (has_cstate_pkg)
578 perf_pmu_unregister(&cstate_pkg_pmu);
579}
580
581static int __init cstate_init(void)
582{
583 int err;
584
585 cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING,
586 "AP_PERF_X86_CSTATE_STARTING", cstate_cpu_init,
587 NULL);
588 cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE,
589 "AP_PERF_X86_CSTATE_ONLINE", NULL, cstate_cpu_exit);
590
591 if (has_cstate_core) {
592 err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
593 if (err) {
594 has_cstate_core = false;
595 pr_info("Failed to register cstate core pmu\n");
596 return err;
597 }
598 }
599
600 if (has_cstate_pkg) {
601 err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
602 if (err) {
603 has_cstate_pkg = false;
604 pr_info("Failed to register cstate pkg pmu\n");
605 cstate_cleanup();
606 return err;
607 }
608 }
609
610 return err;
611}
612
613static int __init cstate_pmu_init(void)
614{
615 const struct x86_cpu_id *id;
616 int err;
617
618 if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
619 return -ENODEV;
620
621 id = x86_match_cpu(intel_cstates_match);
622 if (!id)
623 return -ENODEV;
624
625 err = cstate_probe((const struct cstate_model *) id->driver_data);
626 if (err)
627 return err;
628
629 return cstate_init();
630}
631module_init(cstate_pmu_init);
632
633static void __exit cstate_pmu_exit(void)
634{
635 cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE);
636 cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING);
637 cstate_cleanup();
638}
639module_exit(cstate_pmu_exit);
640