1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/types.h>
14#include <linux/kvm_host.h>
15#include <linux/perf_event.h>
16#include <asm/perf_event.h>
17#include "x86.h"
18#include "cpuid.h"
19#include "lapic.h"
20#include "pmu.h"
21
22
23#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
51{
52 struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work);
53 struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
54
55 kvm_pmu_deliver_pmi(vcpu);
56}
57
58static void kvm_perf_overflow(struct perf_event *perf_event,
59 struct perf_sample_data *data,
60 struct pt_regs *regs)
61{
62 struct kvm_pmc *pmc = perf_event->overflow_handler_context;
63 struct kvm_pmu *pmu = pmc_to_pmu(pmc);
64
65 if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
66 __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
67 kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
68 }
69}
70
71static void kvm_perf_overflow_intr(struct perf_event *perf_event,
72 struct perf_sample_data *data,
73 struct pt_regs *regs)
74{
75 struct kvm_pmc *pmc = perf_event->overflow_handler_context;
76 struct kvm_pmu *pmu = pmc_to_pmu(pmc);
77
78 if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
79 __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
80 kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
81
82
83
84
85
86
87
88
89
90 if (!kvm_is_in_guest())
91 irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
92 else
93 kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
94 }
95}
96
97static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
98 unsigned config, bool exclude_user,
99 bool exclude_kernel, bool intr,
100 bool in_tx, bool in_tx_cp)
101{
102 struct perf_event *event;
103 struct perf_event_attr attr = {
104 .type = type,
105 .size = sizeof(attr),
106 .pinned = true,
107 .exclude_idle = true,
108 .exclude_host = 1,
109 .exclude_user = exclude_user,
110 .exclude_kernel = exclude_kernel,
111 .config = config,
112 };
113
114 attr.sample_period = get_sample_period(pmc, pmc->counter);
115
116 if (in_tx)
117 attr.config |= HSW_IN_TX;
118 if (in_tx_cp) {
119
120
121
122
123
124 attr.sample_period = 0;
125 attr.config |= HSW_IN_TX_CHECKPOINTED;
126 }
127
128 event = perf_event_create_kernel_counter(&attr, -1, current,
129 intr ? kvm_perf_overflow_intr :
130 kvm_perf_overflow, pmc);
131 if (IS_ERR(event)) {
132 pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
133 PTR_ERR(event), pmc->idx);
134 return;
135 }
136
137 pmc->perf_event = event;
138 pmc_to_pmu(pmc)->event_count++;
139 clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
140}
141
142static void pmc_pause_counter(struct kvm_pmc *pmc)
143{
144 u64 counter = pmc->counter;
145
146 if (!pmc->perf_event)
147 return;
148
149
150 counter += perf_event_pause(pmc->perf_event, true);
151 pmc->counter = counter & pmc_bitmask(pmc);
152}
153
154static bool pmc_resume_counter(struct kvm_pmc *pmc)
155{
156 if (!pmc->perf_event)
157 return false;
158
159
160 if (perf_event_period(pmc->perf_event,
161 get_sample_period(pmc, pmc->counter)))
162 return false;
163
164
165 perf_event_enable(pmc->perf_event);
166
167 clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
168 return true;
169}
170
171void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
172{
173 unsigned config, type = PERF_TYPE_RAW;
174 u8 event_select, unit_mask;
175 struct kvm *kvm = pmc->vcpu->kvm;
176 struct kvm_pmu_event_filter *filter;
177 int i;
178 bool allow_event = true;
179
180 if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
181 printk_once("kvm pmu: pin control bit is ignored\n");
182
183 pmc->eventsel = eventsel;
184
185 pmc_pause_counter(pmc);
186
187 if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
188 return;
189
190 filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
191 if (filter) {
192 for (i = 0; i < filter->nevents; i++)
193 if (filter->events[i] ==
194 (eventsel & AMD64_RAW_EVENT_MASK_NB))
195 break;
196 if (filter->action == KVM_PMU_EVENT_ALLOW &&
197 i == filter->nevents)
198 allow_event = false;
199 if (filter->action == KVM_PMU_EVENT_DENY &&
200 i < filter->nevents)
201 allow_event = false;
202 }
203 if (!allow_event)
204 return;
205
206 event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
207 unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
208
209 if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
210 ARCH_PERFMON_EVENTSEL_INV |
211 ARCH_PERFMON_EVENTSEL_CMASK |
212 HSW_IN_TX |
213 HSW_IN_TX_CHECKPOINTED))) {
214 config = kvm_x86_ops.pmu_ops->find_arch_event(pmc_to_pmu(pmc),
215 event_select,
216 unit_mask);
217 if (config != PERF_COUNT_HW_MAX)
218 type = PERF_TYPE_HARDWARE;
219 }
220
221 if (type == PERF_TYPE_RAW)
222 config = eventsel & X86_RAW_EVENT_MASK;
223
224 if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
225 return;
226
227 pmc_release_perf_event(pmc);
228
229 pmc->current_config = eventsel;
230 pmc_reprogram_counter(pmc, type, config,
231 !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
232 !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
233 eventsel & ARCH_PERFMON_EVENTSEL_INT,
234 (eventsel & HSW_IN_TX),
235 (eventsel & HSW_IN_TX_CHECKPOINTED));
236}
237EXPORT_SYMBOL_GPL(reprogram_gp_counter);
238
239void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
240{
241 unsigned en_field = ctrl & 0x3;
242 bool pmi = ctrl & 0x8;
243 struct kvm_pmu_event_filter *filter;
244 struct kvm *kvm = pmc->vcpu->kvm;
245
246 pmc_pause_counter(pmc);
247
248 if (!en_field || !pmc_is_enabled(pmc))
249 return;
250
251 filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
252 if (filter) {
253 if (filter->action == KVM_PMU_EVENT_DENY &&
254 test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
255 return;
256 if (filter->action == KVM_PMU_EVENT_ALLOW &&
257 !test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
258 return;
259 }
260
261 if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc))
262 return;
263
264 pmc_release_perf_event(pmc);
265
266 pmc->current_config = (u64)ctrl;
267 pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
268 kvm_x86_ops.pmu_ops->find_fixed_event(idx),
269 !(en_field & 0x2),
270 !(en_field & 0x1),
271 pmi, false, false);
272}
273EXPORT_SYMBOL_GPL(reprogram_fixed_counter);
274
275void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx)
276{
277 struct kvm_pmc *pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, pmc_idx);
278
279 if (!pmc)
280 return;
281
282 if (pmc_is_gp(pmc))
283 reprogram_gp_counter(pmc, pmc->eventsel);
284 else {
285 int idx = pmc_idx - INTEL_PMC_IDX_FIXED;
286 u8 ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, idx);
287
288 reprogram_fixed_counter(pmc, ctrl, idx);
289 }
290}
291EXPORT_SYMBOL_GPL(reprogram_counter);
292
293void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
294{
295 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
296 int bit;
297
298 for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) {
299 struct kvm_pmc *pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, bit);
300
301 if (unlikely(!pmc || !pmc->perf_event)) {
302 clear_bit(bit, pmu->reprogram_pmi);
303 continue;
304 }
305
306 reprogram_counter(pmu, bit);
307 }
308
309
310
311
312
313
314 if (unlikely(pmu->need_cleanup))
315 kvm_pmu_cleanup(vcpu);
316}
317
318
319int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
320{
321 return kvm_x86_ops.pmu_ops->is_valid_rdpmc_ecx(vcpu, idx);
322}
323
324bool is_vmware_backdoor_pmc(u32 pmc_idx)
325{
326 switch (pmc_idx) {
327 case VMWARE_BACKDOOR_PMC_HOST_TSC:
328 case VMWARE_BACKDOOR_PMC_REAL_TIME:
329 case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
330 return true;
331 }
332 return false;
333}
334
335static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
336{
337 u64 ctr_val;
338
339 switch (idx) {
340 case VMWARE_BACKDOOR_PMC_HOST_TSC:
341 ctr_val = rdtsc();
342 break;
343 case VMWARE_BACKDOOR_PMC_REAL_TIME:
344 ctr_val = ktime_get_boot_ns();
345 break;
346 case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
347 ctr_val = ktime_get_boot_ns() +
348 vcpu->kvm->arch.kvmclock_offset;
349 break;
350 default:
351 return 1;
352 }
353
354 *data = ctr_val;
355 return 0;
356}
357
358int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
359{
360 bool fast_mode = idx & (1u << 31);
361 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
362 struct kvm_pmc *pmc;
363 u64 mask = fast_mode ? ~0u : ~0ull;
364
365 if (!pmu->version)
366 return 1;
367
368 if (is_vmware_backdoor_pmc(idx))
369 return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
370
371 pmc = kvm_x86_ops.pmu_ops->rdpmc_ecx_to_pmc(vcpu, idx, &mask);
372 if (!pmc)
373 return 1;
374
375 if (!(kvm_read_cr4(vcpu) & X86_CR4_PCE) &&
376 (static_call(kvm_x86_get_cpl)(vcpu) != 0) &&
377 (kvm_read_cr0(vcpu) & X86_CR0_PE))
378 return 1;
379
380 *data = pmc_read_counter(pmc) & mask;
381 return 0;
382}
383
384void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
385{
386 if (lapic_in_kernel(vcpu)) {
387 if (kvm_x86_ops.pmu_ops->deliver_pmi)
388 kvm_x86_ops.pmu_ops->deliver_pmi(vcpu);
389 kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
390 }
391}
392
393bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
394{
395 return kvm_x86_ops.pmu_ops->msr_idx_to_pmc(vcpu, msr) ||
396 kvm_x86_ops.pmu_ops->is_valid_msr(vcpu, msr);
397}
398
399static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
400{
401 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
402 struct kvm_pmc *pmc = kvm_x86_ops.pmu_ops->msr_idx_to_pmc(vcpu, msr);
403
404 if (pmc)
405 __set_bit(pmc->idx, pmu->pmc_in_use);
406}
407
408int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
409{
410 return kvm_x86_ops.pmu_ops->get_msr(vcpu, msr_info);
411}
412
413int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
414{
415 kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
416 return kvm_x86_ops.pmu_ops->set_msr(vcpu, msr_info);
417}
418
419
420
421
422
423void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
424{
425 kvm_x86_ops.pmu_ops->refresh(vcpu);
426}
427
428void kvm_pmu_reset(struct kvm_vcpu *vcpu)
429{
430 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
431
432 irq_work_sync(&pmu->irq_work);
433 kvm_x86_ops.pmu_ops->reset(vcpu);
434}
435
436void kvm_pmu_init(struct kvm_vcpu *vcpu)
437{
438 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
439
440 memset(pmu, 0, sizeof(*pmu));
441 kvm_x86_ops.pmu_ops->init(vcpu);
442 init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
443 pmu->event_count = 0;
444 pmu->need_cleanup = false;
445 kvm_pmu_refresh(vcpu);
446}
447
448static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
449{
450 struct kvm_pmu *pmu = pmc_to_pmu(pmc);
451
452 if (pmc_is_fixed(pmc))
453 return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
454 pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
455
456 return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
457}
458
459
460void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
461{
462 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
463 struct kvm_pmc *pmc = NULL;
464 DECLARE_BITMAP(bitmask, X86_PMC_IDX_MAX);
465 int i;
466
467 pmu->need_cleanup = false;
468
469 bitmap_andnot(bitmask, pmu->all_valid_pmc_idx,
470 pmu->pmc_in_use, X86_PMC_IDX_MAX);
471
472 for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) {
473 pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, i);
474
475 if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc))
476 pmc_stop_counter(pmc);
477 }
478
479 if (kvm_x86_ops.pmu_ops->cleanup)
480 kvm_x86_ops.pmu_ops->cleanup(vcpu);
481
482 bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX);
483}
484
485void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
486{
487 kvm_pmu_reset(vcpu);
488}
489
490int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
491{
492 struct kvm_pmu_event_filter tmp, *filter;
493 size_t size;
494 int r;
495
496 if (copy_from_user(&tmp, argp, sizeof(tmp)))
497 return -EFAULT;
498
499 if (tmp.action != KVM_PMU_EVENT_ALLOW &&
500 tmp.action != KVM_PMU_EVENT_DENY)
501 return -EINVAL;
502
503 if (tmp.flags != 0)
504 return -EINVAL;
505
506 if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS)
507 return -E2BIG;
508
509 size = struct_size(filter, events, tmp.nevents);
510 filter = kmalloc(size, GFP_KERNEL_ACCOUNT);
511 if (!filter)
512 return -ENOMEM;
513
514 r = -EFAULT;
515 if (copy_from_user(filter, argp, size))
516 goto cleanup;
517
518
519 *filter = tmp;
520
521 mutex_lock(&kvm->lock);
522 filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
523 mutex_is_locked(&kvm->lock));
524 mutex_unlock(&kvm->lock);
525
526 synchronize_srcu_expedited(&kvm->srcu);
527 r = 0;
528cleanup:
529 kfree(filter);
530 return r;
531}
532