1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/types.h>
14#include <linux/kvm_host.h>
15#include <linux/perf_event.h>
16#include <asm/perf_event.h>
17#include "x86.h"
18#include "cpuid.h"
19#include "lapic.h"
20#include "pmu.h"
21
22
23#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
51{
52 struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work);
53 struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
54
55 kvm_pmu_deliver_pmi(vcpu);
56}
57
58static void kvm_perf_overflow(struct perf_event *perf_event,
59 struct perf_sample_data *data,
60 struct pt_regs *regs)
61{
62 struct kvm_pmc *pmc = perf_event->overflow_handler_context;
63 struct kvm_pmu *pmu = pmc_to_pmu(pmc);
64
65 if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
66 __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
67 kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
68 }
69}
70
71static void kvm_perf_overflow_intr(struct perf_event *perf_event,
72 struct perf_sample_data *data,
73 struct pt_regs *regs)
74{
75 struct kvm_pmc *pmc = perf_event->overflow_handler_context;
76 struct kvm_pmu *pmu = pmc_to_pmu(pmc);
77
78 if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
79 __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
80 kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
81
82
83
84
85
86
87
88
89
90 if (!kvm_is_in_guest())
91 irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
92 else
93 kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
94 }
95}
96
97static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
98 unsigned config, bool exclude_user,
99 bool exclude_kernel, bool intr,
100 bool in_tx, bool in_tx_cp)
101{
102 struct perf_event *event;
103 struct perf_event_attr attr = {
104 .type = type,
105 .size = sizeof(attr),
106 .pinned = true,
107 .exclude_idle = true,
108 .exclude_host = 1,
109 .exclude_user = exclude_user,
110 .exclude_kernel = exclude_kernel,
111 .config = config,
112 };
113
114 attr.sample_period = get_sample_period(pmc, pmc->counter);
115
116 if (in_tx)
117 attr.config |= HSW_IN_TX;
118 if (in_tx_cp) {
119
120
121
122
123
124 attr.sample_period = 0;
125 attr.config |= HSW_IN_TX_CHECKPOINTED;
126 }
127
128 event = perf_event_create_kernel_counter(&attr, -1, current,
129 intr ? kvm_perf_overflow_intr :
130 kvm_perf_overflow, pmc);
131 if (IS_ERR(event)) {
132 pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
133 PTR_ERR(event), pmc->idx);
134 return;
135 }
136
137 pmc->perf_event = event;
138 pmc_to_pmu(pmc)->event_count++;
139 clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
140}
141
142static void pmc_pause_counter(struct kvm_pmc *pmc)
143{
144 u64 counter = pmc->counter;
145
146 if (!pmc->perf_event)
147 return;
148
149
150 counter += perf_event_pause(pmc->perf_event, true);
151 pmc->counter = counter & pmc_bitmask(pmc);
152}
153
154static bool pmc_resume_counter(struct kvm_pmc *pmc)
155{
156 if (!pmc->perf_event)
157 return false;
158
159
160 if (perf_event_period(pmc->perf_event,
161 get_sample_period(pmc, pmc->counter)))
162 return false;
163
164
165 perf_event_enable(pmc->perf_event);
166
167 clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
168 return true;
169}
170
171void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
172{
173 unsigned config, type = PERF_TYPE_RAW;
174 u8 event_select, unit_mask;
175 struct kvm *kvm = pmc->vcpu->kvm;
176 struct kvm_pmu_event_filter *filter;
177 int i;
178 bool allow_event = true;
179
180 if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
181 printk_once("kvm pmu: pin control bit is ignored\n");
182
183 pmc->eventsel = eventsel;
184
185 pmc_pause_counter(pmc);
186
187 if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
188 return;
189
190 filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
191 if (filter) {
192 for (i = 0; i < filter->nevents; i++)
193 if (filter->events[i] ==
194 (eventsel & AMD64_RAW_EVENT_MASK_NB))
195 break;
196 if (filter->action == KVM_PMU_EVENT_ALLOW &&
197 i == filter->nevents)
198 allow_event = false;
199 if (filter->action == KVM_PMU_EVENT_DENY &&
200 i < filter->nevents)
201 allow_event = false;
202 }
203 if (!allow_event)
204 return;
205
206 event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
207 unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
208
209 if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
210 ARCH_PERFMON_EVENTSEL_INV |
211 ARCH_PERFMON_EVENTSEL_CMASK |
212 HSW_IN_TX |
213 HSW_IN_TX_CHECKPOINTED))) {
214 config = kvm_x86_ops.pmu_ops->find_arch_event(pmc_to_pmu(pmc),
215 event_select,
216 unit_mask);
217 if (config != PERF_COUNT_HW_MAX)
218 type = PERF_TYPE_HARDWARE;
219 }
220
221 if (type == PERF_TYPE_RAW)
222 config = eventsel & X86_RAW_EVENT_MASK;
223
224 if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
225 return;
226
227 pmc_release_perf_event(pmc);
228
229 pmc->current_config = eventsel;
230 pmc_reprogram_counter(pmc, type, config,
231 !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
232 !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
233 eventsel & ARCH_PERFMON_EVENTSEL_INT,
234 (eventsel & HSW_IN_TX),
235 (eventsel & HSW_IN_TX_CHECKPOINTED));
236}
237EXPORT_SYMBOL_GPL(reprogram_gp_counter);
238
239void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
240{
241 unsigned en_field = ctrl & 0x3;
242 bool pmi = ctrl & 0x8;
243 struct kvm_pmu_event_filter *filter;
244 struct kvm *kvm = pmc->vcpu->kvm;
245
246 pmc_pause_counter(pmc);
247
248 if (!en_field || !pmc_is_enabled(pmc))
249 return;
250
251 filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
252 if (filter) {
253 if (filter->action == KVM_PMU_EVENT_DENY &&
254 test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
255 return;
256 if (filter->action == KVM_PMU_EVENT_ALLOW &&
257 !test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
258 return;
259 }
260
261 if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc))
262 return;
263
264 pmc_release_perf_event(pmc);
265
266 pmc->current_config = (u64)ctrl;
267 pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
268 kvm_x86_ops.pmu_ops->find_fixed_event(idx),
269 !(en_field & 0x2),
270 !(en_field & 0x1),
271 pmi, false, false);
272}
273EXPORT_SYMBOL_GPL(reprogram_fixed_counter);
274
275void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx)
276{
277 struct kvm_pmc *pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, pmc_idx);
278
279 if (!pmc)
280 return;
281
282 if (pmc_is_gp(pmc))
283 reprogram_gp_counter(pmc, pmc->eventsel);
284 else {
285 int idx = pmc_idx - INTEL_PMC_IDX_FIXED;
286 u8 ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, idx);
287
288 reprogram_fixed_counter(pmc, ctrl, idx);
289 }
290}
291EXPORT_SYMBOL_GPL(reprogram_counter);
292
293void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
294{
295 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
296 int bit;
297
298 for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) {
299 struct kvm_pmc *pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, bit);
300
301 if (unlikely(!pmc || !pmc->perf_event)) {
302 clear_bit(bit, pmu->reprogram_pmi);
303 continue;
304 }
305
306 reprogram_counter(pmu, bit);
307 }
308
309
310
311
312
313
314 if (unlikely(pmu->need_cleanup))
315 kvm_pmu_cleanup(vcpu);
316}
317
318
319int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
320{
321 return kvm_x86_ops.pmu_ops->is_valid_rdpmc_ecx(vcpu, idx);
322}
323
324bool is_vmware_backdoor_pmc(u32 pmc_idx)
325{
326 switch (pmc_idx) {
327 case VMWARE_BACKDOOR_PMC_HOST_TSC:
328 case VMWARE_BACKDOOR_PMC_REAL_TIME:
329 case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
330 return true;
331 }
332 return false;
333}
334
335static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
336{
337 u64 ctr_val;
338
339 switch (idx) {
340 case VMWARE_BACKDOOR_PMC_HOST_TSC:
341 ctr_val = rdtsc();
342 break;
343 case VMWARE_BACKDOOR_PMC_REAL_TIME:
344 ctr_val = ktime_get_boottime_ns();
345 break;
346 case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
347 ctr_val = ktime_get_boottime_ns() +
348 vcpu->kvm->arch.kvmclock_offset;
349 break;
350 default:
351 return 1;
352 }
353
354 *data = ctr_val;
355 return 0;
356}
357
358int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
359{
360 bool fast_mode = idx & (1u << 31);
361 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
362 struct kvm_pmc *pmc;
363 u64 mask = fast_mode ? ~0u : ~0ull;
364
365 if (!pmu->version)
366 return 1;
367
368 if (is_vmware_backdoor_pmc(idx))
369 return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
370
371 pmc = kvm_x86_ops.pmu_ops->rdpmc_ecx_to_pmc(vcpu, idx, &mask);
372 if (!pmc)
373 return 1;
374
375 *data = pmc_read_counter(pmc) & mask;
376 return 0;
377}
378
379void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
380{
381 if (lapic_in_kernel(vcpu))
382 kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
383}
384
385bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
386{
387 return kvm_x86_ops.pmu_ops->msr_idx_to_pmc(vcpu, msr) ||
388 kvm_x86_ops.pmu_ops->is_valid_msr(vcpu, msr);
389}
390
391static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
392{
393 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
394 struct kvm_pmc *pmc = kvm_x86_ops.pmu_ops->msr_idx_to_pmc(vcpu, msr);
395
396 if (pmc)
397 __set_bit(pmc->idx, pmu->pmc_in_use);
398}
399
400int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
401{
402 return kvm_x86_ops.pmu_ops->get_msr(vcpu, msr, data);
403}
404
405int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
406{
407 kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
408 return kvm_x86_ops.pmu_ops->set_msr(vcpu, msr_info);
409}
410
411
412
413
414
415void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
416{
417 kvm_x86_ops.pmu_ops->refresh(vcpu);
418}
419
420void kvm_pmu_reset(struct kvm_vcpu *vcpu)
421{
422 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
423
424 irq_work_sync(&pmu->irq_work);
425 kvm_x86_ops.pmu_ops->reset(vcpu);
426}
427
428void kvm_pmu_init(struct kvm_vcpu *vcpu)
429{
430 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
431
432 memset(pmu, 0, sizeof(*pmu));
433 kvm_x86_ops.pmu_ops->init(vcpu);
434 init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
435 pmu->event_count = 0;
436 pmu->need_cleanup = false;
437 kvm_pmu_refresh(vcpu);
438}
439
440static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
441{
442 struct kvm_pmu *pmu = pmc_to_pmu(pmc);
443
444 if (pmc_is_fixed(pmc))
445 return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
446 pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
447
448 return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
449}
450
451
452void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
453{
454 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
455 struct kvm_pmc *pmc = NULL;
456 DECLARE_BITMAP(bitmask, X86_PMC_IDX_MAX);
457 int i;
458
459 pmu->need_cleanup = false;
460
461 bitmap_andnot(bitmask, pmu->all_valid_pmc_idx,
462 pmu->pmc_in_use, X86_PMC_IDX_MAX);
463
464 for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) {
465 pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, i);
466
467 if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc))
468 pmc_stop_counter(pmc);
469 }
470
471 bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX);
472}
473
474void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
475{
476 kvm_pmu_reset(vcpu);
477}
478
479int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
480{
481 struct kvm_pmu_event_filter tmp, *filter;
482 size_t size;
483 int r;
484
485 if (copy_from_user(&tmp, argp, sizeof(tmp)))
486 return -EFAULT;
487
488 if (tmp.action != KVM_PMU_EVENT_ALLOW &&
489 tmp.action != KVM_PMU_EVENT_DENY)
490 return -EINVAL;
491
492 if (tmp.flags != 0)
493 return -EINVAL;
494
495 if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS)
496 return -E2BIG;
497
498 size = struct_size(filter, events, tmp.nevents);
499 filter = kmalloc(size, GFP_KERNEL_ACCOUNT);
500 if (!filter)
501 return -ENOMEM;
502
503 r = -EFAULT;
504 if (copy_from_user(filter, argp, size))
505 goto cleanup;
506
507
508 *filter = tmp;
509
510 mutex_lock(&kvm->lock);
511 filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
512 mutex_is_locked(&kvm->lock));
513 mutex_unlock(&kvm->lock);
514
515 synchronize_srcu_expedited(&kvm->srcu);
516 r = 0;
517cleanup:
518 kfree(filter);
519 return r;
520}
521