1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/types.h>
14#include <linux/kvm_host.h>
15#include <linux/perf_event.h>
16#include <asm/perf_event.h>
17#include "x86.h"
18#include "cpuid.h"
19#include "lapic.h"
20#include "pmu.h"
21
22
23#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
51{
52 struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work);
53 struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
54
55 kvm_pmu_deliver_pmi(vcpu);
56}
57
58static void kvm_perf_overflow(struct perf_event *perf_event,
59 struct perf_sample_data *data,
60 struct pt_regs *regs)
61{
62 struct kvm_pmc *pmc = perf_event->overflow_handler_context;
63 struct kvm_pmu *pmu = pmc_to_pmu(pmc);
64
65 if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
66 __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
67 kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
68 }
69}
70
71static void kvm_perf_overflow_intr(struct perf_event *perf_event,
72 struct perf_sample_data *data,
73 struct pt_regs *regs)
74{
75 struct kvm_pmc *pmc = perf_event->overflow_handler_context;
76 struct kvm_pmu *pmu = pmc_to_pmu(pmc);
77
78 if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
79 __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
80 kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
81
82
83
84
85
86
87
88
89
90 if (!kvm_is_in_guest())
91 irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
92 else
93 kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
94 }
95}
96
97static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
98 unsigned config, bool exclude_user,
99 bool exclude_kernel, bool intr,
100 bool in_tx, bool in_tx_cp)
101{
102 struct perf_event *event;
103 struct perf_event_attr attr = {
104 .type = type,
105 .size = sizeof(attr),
106 .pinned = true,
107 .exclude_idle = true,
108 .exclude_host = 1,
109 .exclude_user = exclude_user,
110 .exclude_kernel = exclude_kernel,
111 .config = config,
112 };
113
114 attr.sample_period = get_sample_period(pmc, pmc->counter);
115
116 if (in_tx)
117 attr.config |= HSW_IN_TX;
118 if (in_tx_cp) {
119
120
121
122
123
124 attr.sample_period = 0;
125 attr.config |= HSW_IN_TX_CHECKPOINTED;
126 }
127
128 event = perf_event_create_kernel_counter(&attr, -1, current,
129 intr ? kvm_perf_overflow_intr :
130 kvm_perf_overflow, pmc);
131 if (IS_ERR(event)) {
132 pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
133 PTR_ERR(event), pmc->idx);
134 return;
135 }
136
137 pmc->perf_event = event;
138 pmc_to_pmu(pmc)->event_count++;
139 clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
140 pmc->is_paused = false;
141}
142
143static void pmc_pause_counter(struct kvm_pmc *pmc)
144{
145 u64 counter = pmc->counter;
146
147 if (!pmc->perf_event || pmc->is_paused)
148 return;
149
150
151 counter += perf_event_pause(pmc->perf_event, true);
152 pmc->counter = counter & pmc_bitmask(pmc);
153 pmc->is_paused = true;
154}
155
156static bool pmc_resume_counter(struct kvm_pmc *pmc)
157{
158 if (!pmc->perf_event)
159 return false;
160
161
162 if (perf_event_period(pmc->perf_event,
163 get_sample_period(pmc, pmc->counter)))
164 return false;
165
166
167 perf_event_enable(pmc->perf_event);
168 pmc->is_paused = false;
169
170 clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
171 return true;
172}
173
174void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
175{
176 unsigned config, type = PERF_TYPE_RAW;
177 u8 event_select, unit_mask;
178 struct kvm *kvm = pmc->vcpu->kvm;
179 struct kvm_pmu_event_filter *filter;
180 int i;
181 bool allow_event = true;
182
183 if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
184 printk_once("kvm pmu: pin control bit is ignored\n");
185
186 pmc->eventsel = eventsel;
187
188 pmc_pause_counter(pmc);
189
190 if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
191 return;
192
193 filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
194 if (filter) {
195 for (i = 0; i < filter->nevents; i++)
196 if (filter->events[i] ==
197 (eventsel & AMD64_RAW_EVENT_MASK_NB))
198 break;
199 if (filter->action == KVM_PMU_EVENT_ALLOW &&
200 i == filter->nevents)
201 allow_event = false;
202 if (filter->action == KVM_PMU_EVENT_DENY &&
203 i < filter->nevents)
204 allow_event = false;
205 }
206 if (!allow_event)
207 return;
208
209 event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
210 unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
211
212 if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
213 ARCH_PERFMON_EVENTSEL_INV |
214 ARCH_PERFMON_EVENTSEL_CMASK |
215 HSW_IN_TX |
216 HSW_IN_TX_CHECKPOINTED))) {
217 config = kvm_x86_ops.pmu_ops->find_arch_event(pmc_to_pmu(pmc),
218 event_select,
219 unit_mask);
220 if (config != PERF_COUNT_HW_MAX)
221 type = PERF_TYPE_HARDWARE;
222 }
223
224 if (type == PERF_TYPE_RAW)
225 config = eventsel & X86_RAW_EVENT_MASK;
226
227 if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
228 return;
229
230 pmc_release_perf_event(pmc);
231
232 pmc->current_config = eventsel;
233 pmc_reprogram_counter(pmc, type, config,
234 !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
235 !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
236 eventsel & ARCH_PERFMON_EVENTSEL_INT,
237 (eventsel & HSW_IN_TX),
238 (eventsel & HSW_IN_TX_CHECKPOINTED));
239}
240EXPORT_SYMBOL_GPL(reprogram_gp_counter);
241
242void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
243{
244 unsigned en_field = ctrl & 0x3;
245 bool pmi = ctrl & 0x8;
246 struct kvm_pmu_event_filter *filter;
247 struct kvm *kvm = pmc->vcpu->kvm;
248
249 pmc_pause_counter(pmc);
250
251 if (!en_field || !pmc_is_enabled(pmc))
252 return;
253
254 filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
255 if (filter) {
256 if (filter->action == KVM_PMU_EVENT_DENY &&
257 test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
258 return;
259 if (filter->action == KVM_PMU_EVENT_ALLOW &&
260 !test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
261 return;
262 }
263
264 if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc))
265 return;
266
267 pmc_release_perf_event(pmc);
268
269 pmc->current_config = (u64)ctrl;
270 pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
271 kvm_x86_ops.pmu_ops->find_fixed_event(idx),
272 !(en_field & 0x2),
273 !(en_field & 0x1),
274 pmi, false, false);
275}
276EXPORT_SYMBOL_GPL(reprogram_fixed_counter);
277
278void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx)
279{
280 struct kvm_pmc *pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, pmc_idx);
281
282 if (!pmc)
283 return;
284
285 if (pmc_is_gp(pmc))
286 reprogram_gp_counter(pmc, pmc->eventsel);
287 else {
288 int idx = pmc_idx - INTEL_PMC_IDX_FIXED;
289 u8 ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, idx);
290
291 reprogram_fixed_counter(pmc, ctrl, idx);
292 }
293}
294EXPORT_SYMBOL_GPL(reprogram_counter);
295
296void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
297{
298 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
299 int bit;
300
301 for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) {
302 struct kvm_pmc *pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, bit);
303
304 if (unlikely(!pmc || !pmc->perf_event)) {
305 clear_bit(bit, pmu->reprogram_pmi);
306 continue;
307 }
308
309 reprogram_counter(pmu, bit);
310 }
311
312
313
314
315
316
317 if (unlikely(pmu->need_cleanup))
318 kvm_pmu_cleanup(vcpu);
319}
320
321
322bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
323{
324 return kvm_x86_ops.pmu_ops->is_valid_rdpmc_ecx(vcpu, idx);
325}
326
327bool is_vmware_backdoor_pmc(u32 pmc_idx)
328{
329 switch (pmc_idx) {
330 case VMWARE_BACKDOOR_PMC_HOST_TSC:
331 case VMWARE_BACKDOOR_PMC_REAL_TIME:
332 case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
333 return true;
334 }
335 return false;
336}
337
338static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
339{
340 u64 ctr_val;
341
342 switch (idx) {
343 case VMWARE_BACKDOOR_PMC_HOST_TSC:
344 ctr_val = rdtsc();
345 break;
346 case VMWARE_BACKDOOR_PMC_REAL_TIME:
347 ctr_val = ktime_get_boottime_ns();
348 break;
349 case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
350 ctr_val = ktime_get_boottime_ns() +
351 vcpu->kvm->arch.kvmclock_offset;
352 break;
353 default:
354 return 1;
355 }
356
357 *data = ctr_val;
358 return 0;
359}
360
361int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
362{
363 bool fast_mode = idx & (1u << 31);
364 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
365 struct kvm_pmc *pmc;
366 u64 mask = fast_mode ? ~0u : ~0ull;
367
368 if (!pmu->version)
369 return 1;
370
371 if (is_vmware_backdoor_pmc(idx))
372 return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
373
374 pmc = kvm_x86_ops.pmu_ops->rdpmc_ecx_to_pmc(vcpu, idx, &mask);
375 if (!pmc)
376 return 1;
377
378 if (!(kvm_read_cr4(vcpu) & X86_CR4_PCE) &&
379 (static_call(kvm_x86_get_cpl)(vcpu) != 0) &&
380 (kvm_read_cr0(vcpu) & X86_CR0_PE))
381 return 1;
382
383 *data = pmc_read_counter(pmc) & mask;
384 return 0;
385}
386
387void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
388{
389 if (lapic_in_kernel(vcpu)) {
390 if (kvm_x86_ops.pmu_ops->deliver_pmi)
391 kvm_x86_ops.pmu_ops->deliver_pmi(vcpu);
392 kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
393 }
394}
395
396bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
397{
398 return kvm_x86_ops.pmu_ops->msr_idx_to_pmc(vcpu, msr) ||
399 kvm_x86_ops.pmu_ops->is_valid_msr(vcpu, msr);
400}
401
402static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
403{
404 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
405 struct kvm_pmc *pmc = kvm_x86_ops.pmu_ops->msr_idx_to_pmc(vcpu, msr);
406
407 if (pmc)
408 __set_bit(pmc->idx, pmu->pmc_in_use);
409}
410
411int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
412{
413 return kvm_x86_ops.pmu_ops->get_msr(vcpu, msr_info);
414}
415
416int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
417{
418 kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
419 return kvm_x86_ops.pmu_ops->set_msr(vcpu, msr_info);
420}
421
422
423
424
425
426void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
427{
428 kvm_x86_ops.pmu_ops->refresh(vcpu);
429}
430
431void kvm_pmu_reset(struct kvm_vcpu *vcpu)
432{
433 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
434
435 irq_work_sync(&pmu->irq_work);
436 kvm_x86_ops.pmu_ops->reset(vcpu);
437}
438
439void kvm_pmu_init(struct kvm_vcpu *vcpu)
440{
441 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
442
443 memset(pmu, 0, sizeof(*pmu));
444 kvm_x86_ops.pmu_ops->init(vcpu);
445 init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
446 pmu->event_count = 0;
447 pmu->need_cleanup = false;
448 kvm_pmu_refresh(vcpu);
449}
450
451static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
452{
453 struct kvm_pmu *pmu = pmc_to_pmu(pmc);
454
455 if (pmc_is_fixed(pmc))
456 return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
457 pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
458
459 return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
460}
461
462
463void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
464{
465 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
466 struct kvm_pmc *pmc = NULL;
467 DECLARE_BITMAP(bitmask, X86_PMC_IDX_MAX);
468 int i;
469
470 pmu->need_cleanup = false;
471
472 bitmap_andnot(bitmask, pmu->all_valid_pmc_idx,
473 pmu->pmc_in_use, X86_PMC_IDX_MAX);
474
475 for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) {
476 pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, i);
477
478 if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc))
479 pmc_stop_counter(pmc);
480 }
481
482 if (kvm_x86_ops.pmu_ops->cleanup)
483 kvm_x86_ops.pmu_ops->cleanup(vcpu);
484
485 bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX);
486}
487
488void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
489{
490 kvm_pmu_reset(vcpu);
491}
492
493int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
494{
495 struct kvm_pmu_event_filter tmp, *filter;
496 size_t size;
497 int r;
498
499 if (copy_from_user(&tmp, argp, sizeof(tmp)))
500 return -EFAULT;
501
502 if (tmp.action != KVM_PMU_EVENT_ALLOW &&
503 tmp.action != KVM_PMU_EVENT_DENY)
504 return -EINVAL;
505
506 if (tmp.flags != 0)
507 return -EINVAL;
508
509 if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS)
510 return -E2BIG;
511
512 size = struct_size(filter, events, tmp.nevents);
513 filter = kmalloc(size, GFP_KERNEL_ACCOUNT);
514 if (!filter)
515 return -ENOMEM;
516
517 r = -EFAULT;
518 if (copy_from_user(filter, argp, size))
519 goto cleanup;
520
521
522 *filter = tmp;
523
524 mutex_lock(&kvm->lock);
525 filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
526 mutex_is_locked(&kvm->lock));
527 mutex_unlock(&kvm->lock);
528
529 synchronize_srcu_expedited(&kvm->srcu);
530 r = 0;
531cleanup:
532 kfree(filter);
533 return r;
534}
535