1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/types.h>
14#include <linux/kvm_host.h>
15#include <linux/perf_event.h>
16#include <linux/bsearch.h>
17#include <linux/sort.h>
18#include <asm/perf_event.h>
19#include "x86.h"
20#include "cpuid.h"
21#include "lapic.h"
22#include "pmu.h"
23
24
25#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
53{
54 struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work);
55 struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
56
57 kvm_pmu_deliver_pmi(vcpu);
58}
59
60static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
61{
62 struct kvm_pmu *pmu = pmc_to_pmu(pmc);
63
64
65 if (test_and_set_bit(pmc->idx, pmu->reprogram_pmi))
66 return;
67
68 __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
69 kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
70
71 if (!pmc->intr)
72 return;
73
74
75
76
77
78
79
80
81
82 if (in_pmi && !kvm_handling_nmi_from_guest(pmc->vcpu))
83 irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
84 else
85 kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
86}
87
88static void kvm_perf_overflow(struct perf_event *perf_event,
89 struct perf_sample_data *data,
90 struct pt_regs *regs)
91{
92 struct kvm_pmc *pmc = perf_event->overflow_handler_context;
93
94 __kvm_perf_overflow(pmc, true);
95}
96
97static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
98 u64 config, bool exclude_user,
99 bool exclude_kernel, bool intr,
100 bool in_tx, bool in_tx_cp)
101{
102 struct perf_event *event;
103 struct perf_event_attr attr = {
104 .type = type,
105 .size = sizeof(attr),
106 .pinned = true,
107 .exclude_idle = true,
108 .exclude_host = 1,
109 .exclude_user = exclude_user,
110 .exclude_kernel = exclude_kernel,
111 .config = config,
112 };
113
114 if (type == PERF_TYPE_HARDWARE && config >= PERF_COUNT_HW_MAX)
115 return;
116
117 attr.sample_period = get_sample_period(pmc, pmc->counter);
118
119 if (in_tx)
120 attr.config |= HSW_IN_TX;
121 if (in_tx_cp) {
122
123
124
125
126
127 attr.sample_period = 0;
128 attr.config |= HSW_IN_TX_CHECKPOINTED;
129 }
130
131 event = perf_event_create_kernel_counter(&attr, -1, current,
132 kvm_perf_overflow, pmc);
133 if (IS_ERR(event)) {
134 pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
135 PTR_ERR(event), pmc->idx);
136 return;
137 }
138
139 pmc->perf_event = event;
140 pmc_to_pmu(pmc)->event_count++;
141 clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
142 pmc->is_paused = false;
143 pmc->intr = intr;
144}
145
146static void pmc_pause_counter(struct kvm_pmc *pmc)
147{
148 u64 counter = pmc->counter;
149
150 if (!pmc->perf_event || pmc->is_paused)
151 return;
152
153
154 counter += perf_event_pause(pmc->perf_event, true);
155 pmc->counter = counter & pmc_bitmask(pmc);
156 pmc->is_paused = true;
157}
158
159static bool pmc_resume_counter(struct kvm_pmc *pmc)
160{
161 if (!pmc->perf_event)
162 return false;
163
164
165 if (perf_event_period(pmc->perf_event,
166 get_sample_period(pmc, pmc->counter)))
167 return false;
168
169
170 perf_event_enable(pmc->perf_event);
171 pmc->is_paused = false;
172
173 clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
174 return true;
175}
176
177static int cmp_u64(const void *a, const void *b)
178{
179 return *(__u64 *)a - *(__u64 *)b;
180}
181
182void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
183{
184 u64 config;
185 u32 type = PERF_TYPE_RAW;
186 struct kvm *kvm = pmc->vcpu->kvm;
187 struct kvm_pmu_event_filter *filter;
188 bool allow_event = true;
189
190 if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
191 printk_once("kvm pmu: pin control bit is ignored\n");
192
193 pmc->eventsel = eventsel;
194
195 pmc_pause_counter(pmc);
196
197 if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
198 return;
199
200 filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
201 if (filter) {
202 __u64 key = eventsel & AMD64_RAW_EVENT_MASK_NB;
203
204 if (bsearch(&key, filter->events, filter->nevents,
205 sizeof(__u64), cmp_u64))
206 allow_event = filter->action == KVM_PMU_EVENT_ALLOW;
207 else
208 allow_event = filter->action == KVM_PMU_EVENT_DENY;
209 }
210 if (!allow_event)
211 return;
212
213 if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
214 ARCH_PERFMON_EVENTSEL_INV |
215 ARCH_PERFMON_EVENTSEL_CMASK |
216 HSW_IN_TX |
217 HSW_IN_TX_CHECKPOINTED))) {
218 config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc);
219 if (config != PERF_COUNT_HW_MAX)
220 type = PERF_TYPE_HARDWARE;
221 }
222
223 if (type == PERF_TYPE_RAW)
224 config = eventsel & AMD64_RAW_EVENT_MASK;
225
226 if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
227 return;
228
229 pmc_release_perf_event(pmc);
230
231 pmc->current_config = eventsel;
232 pmc_reprogram_counter(pmc, type, config,
233 !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
234 !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
235 eventsel & ARCH_PERFMON_EVENTSEL_INT,
236 (eventsel & HSW_IN_TX),
237 (eventsel & HSW_IN_TX_CHECKPOINTED));
238}
239EXPORT_SYMBOL_GPL(reprogram_gp_counter);
240
241void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
242{
243 unsigned en_field = ctrl & 0x3;
244 bool pmi = ctrl & 0x8;
245 struct kvm_pmu_event_filter *filter;
246 struct kvm *kvm = pmc->vcpu->kvm;
247
248 pmc_pause_counter(pmc);
249
250 if (!en_field || !pmc_is_enabled(pmc))
251 return;
252
253 filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
254 if (filter) {
255 if (filter->action == KVM_PMU_EVENT_DENY &&
256 test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
257 return;
258 if (filter->action == KVM_PMU_EVENT_ALLOW &&
259 !test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
260 return;
261 }
262
263 if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc))
264 return;
265
266 pmc_release_perf_event(pmc);
267
268 pmc->current_config = (u64)ctrl;
269 pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
270 kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc),
271 !(en_field & 0x2),
272 !(en_field & 0x1),
273 pmi, false, false);
274}
275EXPORT_SYMBOL_GPL(reprogram_fixed_counter);
276
277void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx)
278{
279 struct kvm_pmc *pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, pmc_idx);
280
281 if (!pmc)
282 return;
283
284 if (pmc_is_gp(pmc))
285 reprogram_gp_counter(pmc, pmc->eventsel);
286 else {
287 int idx = pmc_idx - INTEL_PMC_IDX_FIXED;
288 u8 ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, idx);
289
290 reprogram_fixed_counter(pmc, ctrl, idx);
291 }
292}
293EXPORT_SYMBOL_GPL(reprogram_counter);
294
295void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
296{
297 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
298 int bit;
299
300 for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) {
301 struct kvm_pmc *pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, bit);
302
303 if (unlikely(!pmc || !pmc->perf_event)) {
304 clear_bit(bit, pmu->reprogram_pmi);
305 continue;
306 }
307
308 reprogram_counter(pmu, bit);
309 }
310
311
312
313
314
315
316 if (unlikely(pmu->need_cleanup))
317 kvm_pmu_cleanup(vcpu);
318}
319
320
321bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
322{
323 return kvm_x86_ops.pmu_ops->is_valid_rdpmc_ecx(vcpu, idx);
324}
325
326bool is_vmware_backdoor_pmc(u32 pmc_idx)
327{
328 switch (pmc_idx) {
329 case VMWARE_BACKDOOR_PMC_HOST_TSC:
330 case VMWARE_BACKDOOR_PMC_REAL_TIME:
331 case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
332 return true;
333 }
334 return false;
335}
336
337static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
338{
339 u64 ctr_val;
340
341 switch (idx) {
342 case VMWARE_BACKDOOR_PMC_HOST_TSC:
343 ctr_val = rdtsc();
344 break;
345 case VMWARE_BACKDOOR_PMC_REAL_TIME:
346 ctr_val = ktime_get_boottime_ns();
347 break;
348 case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
349 ctr_val = ktime_get_boottime_ns() +
350 vcpu->kvm->arch.kvmclock_offset;
351 break;
352 default:
353 return 1;
354 }
355
356 *data = ctr_val;
357 return 0;
358}
359
360int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
361{
362 bool fast_mode = idx & (1u << 31);
363 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
364 struct kvm_pmc *pmc;
365 u64 mask = fast_mode ? ~0u : ~0ull;
366
367 if (!pmu->version)
368 return 1;
369
370 if (is_vmware_backdoor_pmc(idx))
371 return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
372
373 pmc = kvm_x86_ops.pmu_ops->rdpmc_ecx_to_pmc(vcpu, idx, &mask);
374 if (!pmc)
375 return 1;
376
377 if (!(kvm_read_cr4(vcpu) & X86_CR4_PCE) &&
378 (static_call(kvm_x86_get_cpl)(vcpu) != 0) &&
379 (kvm_read_cr0(vcpu) & X86_CR0_PE))
380 return 1;
381
382 *data = pmc_read_counter(pmc) & mask;
383 return 0;
384}
385
386void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
387{
388 if (lapic_in_kernel(vcpu)) {
389 if (kvm_x86_ops.pmu_ops->deliver_pmi)
390 kvm_x86_ops.pmu_ops->deliver_pmi(vcpu);
391 kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
392 }
393}
394
395bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
396{
397 return kvm_x86_ops.pmu_ops->msr_idx_to_pmc(vcpu, msr) ||
398 kvm_x86_ops.pmu_ops->is_valid_msr(vcpu, msr);
399}
400
401static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
402{
403 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
404 struct kvm_pmc *pmc = kvm_x86_ops.pmu_ops->msr_idx_to_pmc(vcpu, msr);
405
406 if (pmc)
407 __set_bit(pmc->idx, pmu->pmc_in_use);
408}
409
410int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
411{
412 return kvm_x86_ops.pmu_ops->get_msr(vcpu, msr_info);
413}
414
415int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
416{
417 kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
418 return kvm_x86_ops.pmu_ops->set_msr(vcpu, msr_info);
419}
420
421
422
423
424
425void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
426{
427 kvm_x86_ops.pmu_ops->refresh(vcpu);
428}
429
430void kvm_pmu_reset(struct kvm_vcpu *vcpu)
431{
432 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
433
434 irq_work_sync(&pmu->irq_work);
435 kvm_x86_ops.pmu_ops->reset(vcpu);
436}
437
438void kvm_pmu_init(struct kvm_vcpu *vcpu)
439{
440 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
441
442 memset(pmu, 0, sizeof(*pmu));
443 kvm_x86_ops.pmu_ops->init(vcpu);
444 init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
445 pmu->event_count = 0;
446 pmu->need_cleanup = false;
447 kvm_pmu_refresh(vcpu);
448}
449
450static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
451{
452 struct kvm_pmu *pmu = pmc_to_pmu(pmc);
453
454 if (pmc_is_fixed(pmc))
455 return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
456 pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
457
458 return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
459}
460
461
462void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
463{
464 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
465 struct kvm_pmc *pmc = NULL;
466 DECLARE_BITMAP(bitmask, X86_PMC_IDX_MAX);
467 int i;
468
469 pmu->need_cleanup = false;
470
471 bitmap_andnot(bitmask, pmu->all_valid_pmc_idx,
472 pmu->pmc_in_use, X86_PMC_IDX_MAX);
473
474 for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) {
475 pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, i);
476
477 if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc))
478 pmc_stop_counter(pmc);
479 }
480
481 if (kvm_x86_ops.pmu_ops->cleanup)
482 kvm_x86_ops.pmu_ops->cleanup(vcpu);
483
484 bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX);
485}
486
487void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
488{
489 kvm_pmu_reset(vcpu);
490}
491
492static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
493{
494 struct kvm_pmu *pmu = pmc_to_pmu(pmc);
495 u64 prev_count;
496
497 prev_count = pmc->counter;
498 pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc);
499
500 reprogram_counter(pmu, pmc->idx);
501 if (pmc->counter < prev_count)
502 __kvm_perf_overflow(pmc, false);
503}
504
505static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
506 unsigned int perf_hw_id)
507{
508 u64 old_eventsel = pmc->eventsel;
509 unsigned int config;
510
511 pmc->eventsel &= (ARCH_PERFMON_EVENTSEL_EVENT | ARCH_PERFMON_EVENTSEL_UMASK);
512 config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc);
513 pmc->eventsel = old_eventsel;
514 return config == perf_hw_id;
515}
516
517static inline bool cpl_is_matched(struct kvm_pmc *pmc)
518{
519 bool select_os, select_user;
520 u64 config = pmc->current_config;
521
522 if (pmc_is_gp(pmc)) {
523 select_os = config & ARCH_PERFMON_EVENTSEL_OS;
524 select_user = config & ARCH_PERFMON_EVENTSEL_USR;
525 } else {
526 select_os = config & 0x1;
527 select_user = config & 0x2;
528 }
529
530 return (static_call(kvm_x86_get_cpl)(pmc->vcpu) == 0) ? select_os : select_user;
531}
532
533void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
534{
535 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
536 struct kvm_pmc *pmc;
537 int i;
538
539 for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
540 pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, i);
541
542 if (!pmc || !pmc_is_enabled(pmc) || !pmc_speculative_in_use(pmc))
543 continue;
544
545
546 if (eventsel_match_perf_hw_id(pmc, perf_hw_id) && cpl_is_matched(pmc))
547 kvm_pmu_incr_counter(pmc);
548 }
549}
550EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event);
551
552int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
553{
554 struct kvm_pmu_event_filter tmp, *filter;
555 size_t size;
556 int r;
557
558 if (copy_from_user(&tmp, argp, sizeof(tmp)))
559 return -EFAULT;
560
561 if (tmp.action != KVM_PMU_EVENT_ALLOW &&
562 tmp.action != KVM_PMU_EVENT_DENY)
563 return -EINVAL;
564
565 if (tmp.flags != 0)
566 return -EINVAL;
567
568 if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS)
569 return -E2BIG;
570
571 size = struct_size(filter, events, tmp.nevents);
572 filter = kmalloc(size, GFP_KERNEL_ACCOUNT);
573 if (!filter)
574 return -ENOMEM;
575
576 r = -EFAULT;
577 if (copy_from_user(filter, argp, size))
578 goto cleanup;
579
580
581 *filter = tmp;
582
583
584
585
586 sort(&filter->events, filter->nevents, sizeof(__u64), cmp_u64, NULL);
587
588 mutex_lock(&kvm->lock);
589 filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
590 mutex_is_locked(&kvm->lock));
591 mutex_unlock(&kvm->lock);
592
593 synchronize_srcu_expedited(&kvm->srcu);
594 r = 0;
595cleanup:
596 kfree(filter);
597 return r;
598}
599