1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/kvm_host.h>
19#include <linux/kernel.h>
20#include <linux/err.h>
21#include <linux/slab.h>
22#include <linux/preempt.h>
23#include <linux/sched/signal.h>
24#include <linux/sched/stat.h>
25#include <linux/delay.h>
26#include <linux/export.h>
27#include <linux/fs.h>
28#include <linux/anon_inodes.h>
29#include <linux/cpu.h>
30#include <linux/cpumask.h>
31#include <linux/spinlock.h>
32#include <linux/page-flags.h>
33#include <linux/srcu.h>
34#include <linux/miscdevice.h>
35#include <linux/debugfs.h>
36#include <linux/gfp.h>
37#include <linux/vmalloc.h>
38#include <linux/highmem.h>
39#include <linux/hugetlb.h>
40#include <linux/kvm_irqfd.h>
41#include <linux/irqbypass.h>
42#include <linux/module.h>
43#include <linux/compiler.h>
44#include <linux/of.h>
45
46#include <asm/ftrace.h>
47#include <asm/reg.h>
48#include <asm/ppc-opcode.h>
49#include <asm/asm-prototypes.h>
50#include <asm/archrandom.h>
51#include <asm/debug.h>
52#include <asm/disassemble.h>
53#include <asm/cputable.h>
54#include <asm/cacheflush.h>
55#include <linux/uaccess.h>
56#include <asm/io.h>
57#include <asm/kvm_ppc.h>
58#include <asm/kvm_book3s.h>
59#include <asm/mmu_context.h>
60#include <asm/lppaca.h>
61#include <asm/processor.h>
62#include <asm/cputhreads.h>
63#include <asm/page.h>
64#include <asm/hvcall.h>
65#include <asm/switch_to.h>
66#include <asm/smp.h>
67#include <asm/dbell.h>
68#include <asm/hmi.h>
69#include <asm/pnv-pci.h>
70#include <asm/mmu.h>
71#include <asm/opal.h>
72#include <asm/xics.h>
73#include <asm/xive.h>
74#include <asm/hw_breakpoint.h>
75
76#include "book3s.h"
77
78#define CREATE_TRACE_POINTS
79#include "trace_hv.h"
80
81
82
83
84
85
86#define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1)
87
88#define RESUME_PASSTHROUGH (RESUME_GUEST | RESUME_FLAG_ARCH2)
89
90
91#define TB_NIL (~(u64)0)
92
93static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
94
95static int dynamic_mt_modes = 6;
96module_param(dynamic_mt_modes, int, 0644);
97MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)");
98static int target_smt_mode;
99module_param(target_smt_mode, int, 0644);
100MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
101
102static bool indep_threads_mode = true;
103module_param(indep_threads_mode, bool, S_IRUGO | S_IWUSR);
104MODULE_PARM_DESC(indep_threads_mode, "Independent-threads mode (only on POWER9)");
105
106static bool one_vm_per_core;
107module_param(one_vm_per_core, bool, S_IRUGO | S_IWUSR);
108MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires indep_threads_mode=N)");
109
110#ifdef CONFIG_KVM_XICS
111static struct kernel_param_ops module_param_ops = {
112 .set = param_set_int,
113 .get = param_get_int,
114};
115
116module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass, 0644);
117MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization");
118
119module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, 0644);
120MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
121#endif
122
123
124static bool nested = true;
125module_param(nested, bool, S_IRUGO | S_IWUSR);
126MODULE_PARM_DESC(nested, "Enable nested virtualization (only on POWER9)");
127
128static inline bool nesting_enabled(struct kvm *kvm)
129{
130 return kvm->arch.nested_enable && kvm_is_radix(kvm);
131}
132
133
134static bool no_mixing_hpt_and_radix;
135
136static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
137static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
138
139
140
141
142
143
144#define RWMR_RPA_P8_1THREAD 0x164520C62609AECAUL
145#define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9UL
146#define RWMR_RPA_P8_3THREAD 0x164520C62609AECAUL
147#define RWMR_RPA_P8_4THREAD 0x199A421245058DA9UL
148#define RWMR_RPA_P8_5THREAD 0x164520C62609AECAUL
149#define RWMR_RPA_P8_6THREAD 0x164520C62609AECAUL
150#define RWMR_RPA_P8_7THREAD 0x164520C62609AECAUL
151#define RWMR_RPA_P8_8THREAD 0x164520C62609AECAUL
152
153static unsigned long p8_rwmr_values[MAX_SMT_THREADS + 1] = {
154 RWMR_RPA_P8_1THREAD,
155 RWMR_RPA_P8_1THREAD,
156 RWMR_RPA_P8_2THREAD,
157 RWMR_RPA_P8_3THREAD,
158 RWMR_RPA_P8_4THREAD,
159 RWMR_RPA_P8_5THREAD,
160 RWMR_RPA_P8_6THREAD,
161 RWMR_RPA_P8_7THREAD,
162 RWMR_RPA_P8_8THREAD,
163};
164
165static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
166 int *ip)
167{
168 int i = *ip;
169 struct kvm_vcpu *vcpu;
170
171 while (++i < MAX_SMT_THREADS) {
172 vcpu = READ_ONCE(vc->runnable_threads[i]);
173 if (vcpu) {
174 *ip = i;
175 return vcpu;
176 }
177 }
178 return NULL;
179}
180
181
182#define for_each_runnable_thread(i, vcpu, vc) \
183 for (i = -1; (vcpu = next_runnable_thread(vc, &i)); )
184
185static bool kvmppc_ipi_thread(int cpu)
186{
187 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
188
189
190 if (kvmhv_on_pseries())
191 return false;
192
193
194 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
195 msg |= get_hard_smp_processor_id(cpu);
196 smp_mb();
197 __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
198 return true;
199 }
200
201
202 if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
203 preempt_disable();
204 if (cpu_first_thread_sibling(cpu) ==
205 cpu_first_thread_sibling(smp_processor_id())) {
206 msg |= cpu_thread_in_core(cpu);
207 smp_mb();
208 __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
209 preempt_enable();
210 return true;
211 }
212 preempt_enable();
213 }
214
215#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
216 if (cpu >= 0 && cpu < nr_cpu_ids) {
217 if (paca_ptrs[cpu]->kvm_hstate.xics_phys) {
218 xics_wake_cpu(cpu);
219 return true;
220 }
221 opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY);
222 return true;
223 }
224#endif
225
226 return false;
227}
228
229static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
230{
231 int cpu;
232 struct swait_queue_head *wqp;
233
234 wqp = kvm_arch_vcpu_wq(vcpu);
235 if (swq_has_sleeper(wqp)) {
236 swake_up_one(wqp);
237 ++vcpu->stat.halt_wakeup;
238 }
239
240 cpu = READ_ONCE(vcpu->arch.thread_cpu);
241 if (cpu >= 0 && kvmppc_ipi_thread(cpu))
242 return;
243
244
245 cpu = vcpu->cpu;
246 if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu))
247 smp_send_reschedule(cpu);
248}
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc)
284{
285 unsigned long flags;
286
287 spin_lock_irqsave(&vc->stoltb_lock, flags);
288 vc->preempt_tb = mftb();
289 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
290}
291
292static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc)
293{
294 unsigned long flags;
295
296 spin_lock_irqsave(&vc->stoltb_lock, flags);
297 if (vc->preempt_tb != TB_NIL) {
298 vc->stolen_tb += mftb() - vc->preempt_tb;
299 vc->preempt_tb = TB_NIL;
300 }
301 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
302}
303
304static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
305{
306 struct kvmppc_vcore *vc = vcpu->arch.vcore;
307 unsigned long flags;
308
309
310
311
312
313
314
315 if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
316 kvmppc_core_end_stolen(vc);
317
318 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
319 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
320 vcpu->arch.busy_preempt != TB_NIL) {
321 vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
322 vcpu->arch.busy_preempt = TB_NIL;
323 }
324 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
325}
326
327static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
328{
329 struct kvmppc_vcore *vc = vcpu->arch.vcore;
330 unsigned long flags;
331
332 if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
333 kvmppc_core_start_stolen(vc);
334
335 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
336 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
337 vcpu->arch.busy_preempt = mftb();
338 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
339}
340
341static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
342{
343
344
345
346
347 if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
348 msr &= ~MSR_TS_MASK;
349 vcpu->arch.shregs.msr = msr;
350 kvmppc_end_cede(vcpu);
351}
352
353static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
354{
355 vcpu->arch.pvr = pvr;
356}
357
358
359#define PCR_ARCH_300 (PCR_ARCH_207 << 1)
360
361static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
362{
363 unsigned long host_pcr_bit = 0, guest_pcr_bit = 0;
364 struct kvmppc_vcore *vc = vcpu->arch.vcore;
365
366
367 if (cpu_has_feature(CPU_FTR_ARCH_300))
368 host_pcr_bit = PCR_ARCH_300;
369 else if (cpu_has_feature(CPU_FTR_ARCH_207S))
370 host_pcr_bit = PCR_ARCH_207;
371 else if (cpu_has_feature(CPU_FTR_ARCH_206))
372 host_pcr_bit = PCR_ARCH_206;
373 else
374 host_pcr_bit = PCR_ARCH_205;
375
376
377 guest_pcr_bit = host_pcr_bit;
378 if (arch_compat) {
379 switch (arch_compat) {
380 case PVR_ARCH_205:
381 guest_pcr_bit = PCR_ARCH_205;
382 break;
383 case PVR_ARCH_206:
384 case PVR_ARCH_206p:
385 guest_pcr_bit = PCR_ARCH_206;
386 break;
387 case PVR_ARCH_207:
388 guest_pcr_bit = PCR_ARCH_207;
389 break;
390 case PVR_ARCH_300:
391 guest_pcr_bit = PCR_ARCH_300;
392 break;
393 default:
394 return -EINVAL;
395 }
396 }
397
398
399 if (guest_pcr_bit > host_pcr_bit)
400 return -EINVAL;
401
402 spin_lock(&vc->lock);
403 vc->arch_compat = arch_compat;
404
405
406
407
408 vc->pcr = (host_pcr_bit - guest_pcr_bit) | PCR_MASK;
409 spin_unlock(&vc->lock);
410
411 return 0;
412}
413
414static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
415{
416 int r;
417
418 pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id);
419 pr_err("pc = %.16lx msr = %.16llx trap = %x\n",
420 vcpu->arch.regs.nip, vcpu->arch.shregs.msr, vcpu->arch.trap);
421 for (r = 0; r < 16; ++r)
422 pr_err("r%2d = %.16lx r%d = %.16lx\n",
423 r, kvmppc_get_gpr(vcpu, r),
424 r+16, kvmppc_get_gpr(vcpu, r+16));
425 pr_err("ctr = %.16lx lr = %.16lx\n",
426 vcpu->arch.regs.ctr, vcpu->arch.regs.link);
427 pr_err("srr0 = %.16llx srr1 = %.16llx\n",
428 vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1);
429 pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
430 vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
431 pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
432 vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
433 pr_err("cr = %.8lx xer = %.16lx dsisr = %.8x\n",
434 vcpu->arch.regs.ccr, vcpu->arch.regs.xer, vcpu->arch.shregs.dsisr);
435 pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
436 pr_err("fault dar = %.16lx dsisr = %.8x\n",
437 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
438 pr_err("SLB (%d entries):\n", vcpu->arch.slb_max);
439 for (r = 0; r < vcpu->arch.slb_max; ++r)
440 pr_err(" ESID = %.16llx VSID = %.16llx\n",
441 vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
442 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
443 vcpu->arch.vcore->lpcr, vcpu->kvm->arch.sdr1,
444 vcpu->arch.last_inst);
445}
446
447static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
448{
449 return kvm_get_vcpu_by_id(kvm, id);
450}
451
452static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
453{
454 vpa->__old_status |= LPPACA_OLD_SHARED_PROC;
455 vpa->yield_count = cpu_to_be32(1);
456}
457
458static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v,
459 unsigned long addr, unsigned long len)
460{
461
462 if (addr & (L1_CACHE_BYTES - 1))
463 return -EINVAL;
464 spin_lock(&vcpu->arch.vpa_update_lock);
465 if (v->next_gpa != addr || v->len != len) {
466 v->next_gpa = addr;
467 v->len = addr ? len : 0;
468 v->update_pending = 1;
469 }
470 spin_unlock(&vcpu->arch.vpa_update_lock);
471 return 0;
472}
473
474
475struct reg_vpa {
476 u32 dummy;
477 union {
478 __be16 hword;
479 __be32 word;
480 } length;
481};
482
483static int vpa_is_registered(struct kvmppc_vpa *vpap)
484{
485 if (vpap->update_pending)
486 return vpap->next_gpa != 0;
487 return vpap->pinned_addr != NULL;
488}
489
490static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
491 unsigned long flags,
492 unsigned long vcpuid, unsigned long vpa)
493{
494 struct kvm *kvm = vcpu->kvm;
495 unsigned long len, nb;
496 void *va;
497 struct kvm_vcpu *tvcpu;
498 int err;
499 int subfunc;
500 struct kvmppc_vpa *vpap;
501
502 tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
503 if (!tvcpu)
504 return H_PARAMETER;
505
506 subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK;
507 if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL ||
508 subfunc == H_VPA_REG_SLB) {
509
510 if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa)
511 return H_PARAMETER;
512
513
514 va = kvmppc_pin_guest_page(kvm, vpa, &nb);
515 if (va == NULL)
516 return H_PARAMETER;
517 if (subfunc == H_VPA_REG_VPA)
518 len = be16_to_cpu(((struct reg_vpa *)va)->length.hword);
519 else
520 len = be32_to_cpu(((struct reg_vpa *)va)->length.word);
521 kvmppc_unpin_guest_page(kvm, va, vpa, false);
522
523
524 if (len > nb || len < sizeof(struct reg_vpa))
525 return H_PARAMETER;
526 } else {
527 vpa = 0;
528 len = 0;
529 }
530
531 err = H_PARAMETER;
532 vpap = NULL;
533 spin_lock(&tvcpu->arch.vpa_update_lock);
534
535 switch (subfunc) {
536 case H_VPA_REG_VPA:
537
538
539
540
541
542
543 BUILD_BUG_ON(sizeof(struct lppaca) != 640);
544 if (len < sizeof(struct lppaca))
545 break;
546 vpap = &tvcpu->arch.vpa;
547 err = 0;
548 break;
549
550 case H_VPA_REG_DTL:
551 if (len < sizeof(struct dtl_entry))
552 break;
553 len -= len % sizeof(struct dtl_entry);
554
555
556 err = H_RESOURCE;
557 if (!vpa_is_registered(&tvcpu->arch.vpa))
558 break;
559
560 vpap = &tvcpu->arch.dtl;
561 err = 0;
562 break;
563
564 case H_VPA_REG_SLB:
565
566 err = H_RESOURCE;
567 if (!vpa_is_registered(&tvcpu->arch.vpa))
568 break;
569
570 vpap = &tvcpu->arch.slb_shadow;
571 err = 0;
572 break;
573
574 case H_VPA_DEREG_VPA:
575
576 err = H_RESOURCE;
577 if (vpa_is_registered(&tvcpu->arch.dtl) ||
578 vpa_is_registered(&tvcpu->arch.slb_shadow))
579 break;
580
581 vpap = &tvcpu->arch.vpa;
582 err = 0;
583 break;
584
585 case H_VPA_DEREG_DTL:
586 vpap = &tvcpu->arch.dtl;
587 err = 0;
588 break;
589
590 case H_VPA_DEREG_SLB:
591 vpap = &tvcpu->arch.slb_shadow;
592 err = 0;
593 break;
594 }
595
596 if (vpap) {
597 vpap->next_gpa = vpa;
598 vpap->len = len;
599 vpap->update_pending = 1;
600 }
601
602 spin_unlock(&tvcpu->arch.vpa_update_lock);
603
604 return err;
605}
606
607static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
608{
609 struct kvm *kvm = vcpu->kvm;
610 void *va;
611 unsigned long nb;
612 unsigned long gpa;
613
614
615
616
617
618
619
620
621
622 for (;;) {
623 gpa = vpap->next_gpa;
624 spin_unlock(&vcpu->arch.vpa_update_lock);
625 va = NULL;
626 nb = 0;
627 if (gpa)
628 va = kvmppc_pin_guest_page(kvm, gpa, &nb);
629 spin_lock(&vcpu->arch.vpa_update_lock);
630 if (gpa == vpap->next_gpa)
631 break;
632
633 if (va)
634 kvmppc_unpin_guest_page(kvm, va, gpa, false);
635 }
636
637 vpap->update_pending = 0;
638 if (va && nb < vpap->len) {
639
640
641
642
643
644 kvmppc_unpin_guest_page(kvm, va, gpa, false);
645 va = NULL;
646 }
647 if (vpap->pinned_addr)
648 kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa,
649 vpap->dirty);
650 vpap->gpa = gpa;
651 vpap->pinned_addr = va;
652 vpap->dirty = false;
653 if (va)
654 vpap->pinned_end = va + vpap->len;
655}
656
657static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
658{
659 if (!(vcpu->arch.vpa.update_pending ||
660 vcpu->arch.slb_shadow.update_pending ||
661 vcpu->arch.dtl.update_pending))
662 return;
663
664 spin_lock(&vcpu->arch.vpa_update_lock);
665 if (vcpu->arch.vpa.update_pending) {
666 kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
667 if (vcpu->arch.vpa.pinned_addr)
668 init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
669 }
670 if (vcpu->arch.dtl.update_pending) {
671 kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
672 vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
673 vcpu->arch.dtl_index = 0;
674 }
675 if (vcpu->arch.slb_shadow.update_pending)
676 kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow);
677 spin_unlock(&vcpu->arch.vpa_update_lock);
678}
679
680
681
682
683
684static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
685{
686 u64 p;
687 unsigned long flags;
688
689 spin_lock_irqsave(&vc->stoltb_lock, flags);
690 p = vc->stolen_tb;
691 if (vc->vcore_state != VCORE_INACTIVE &&
692 vc->preempt_tb != TB_NIL)
693 p += now - vc->preempt_tb;
694 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
695 return p;
696}
697
698static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
699 struct kvmppc_vcore *vc)
700{
701 struct dtl_entry *dt;
702 struct lppaca *vpa;
703 unsigned long stolen;
704 unsigned long core_stolen;
705 u64 now;
706 unsigned long flags;
707
708 dt = vcpu->arch.dtl_ptr;
709 vpa = vcpu->arch.vpa.pinned_addr;
710 now = mftb();
711 core_stolen = vcore_stolen_time(vc, now);
712 stolen = core_stolen - vcpu->arch.stolen_logged;
713 vcpu->arch.stolen_logged = core_stolen;
714 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
715 stolen += vcpu->arch.busy_stolen;
716 vcpu->arch.busy_stolen = 0;
717 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
718 if (!dt || !vpa)
719 return;
720 memset(dt, 0, sizeof(struct dtl_entry));
721 dt->dispatch_reason = 7;
722 dt->processor_id = cpu_to_be16(vc->pcpu + vcpu->arch.ptid);
723 dt->timebase = cpu_to_be64(now + vc->tb_offset);
724 dt->enqueue_to_dispatch_time = cpu_to_be32(stolen);
725 dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu));
726 dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr);
727 ++dt;
728 if (dt == vcpu->arch.dtl.pinned_end)
729 dt = vcpu->arch.dtl.pinned_addr;
730 vcpu->arch.dtl_ptr = dt;
731
732 smp_wmb();
733 vpa->dtl_idx = cpu_to_be64(++vcpu->arch.dtl_index);
734 vcpu->arch.dtl.dirty = true;
735}
736
737
738static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
739{
740 int thr;
741 struct kvmppc_vcore *vc;
742
743 if (vcpu->arch.doorbell_request)
744 return true;
745
746
747
748
749
750 smp_rmb();
751 vc = vcpu->arch.vcore;
752 thr = vcpu->vcpu_id - vc->first_vcpuid;
753 return !!(vc->dpdes & (1 << thr));
754}
755
756static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu)
757{
758 if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207)
759 return true;
760 if ((!vcpu->arch.vcore->arch_compat) &&
761 cpu_has_feature(CPU_FTR_ARCH_207S))
762 return true;
763 return false;
764}
765
766static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
767 unsigned long resource, unsigned long value1,
768 unsigned long value2)
769{
770 switch (resource) {
771 case H_SET_MODE_RESOURCE_SET_CIABR:
772 if (!kvmppc_power8_compatible(vcpu))
773 return H_P2;
774 if (value2)
775 return H_P4;
776 if (mflags)
777 return H_UNSUPPORTED_FLAG_START;
778
779 if ((value1 & CIABR_PRIV) == CIABR_PRIV_HYPER)
780 return H_P3;
781 vcpu->arch.ciabr = value1;
782 return H_SUCCESS;
783 case H_SET_MODE_RESOURCE_SET_DAWR:
784 if (!kvmppc_power8_compatible(vcpu))
785 return H_P2;
786 if (!ppc_breakpoint_available())
787 return H_P2;
788 if (mflags)
789 return H_UNSUPPORTED_FLAG_START;
790 if (value2 & DABRX_HYP)
791 return H_P4;
792 vcpu->arch.dawr = value1;
793 vcpu->arch.dawrx = value2;
794 return H_SUCCESS;
795 default:
796 return H_TOO_HARD;
797 }
798}
799
800
801static int kvmppc_copy_guest(struct kvm *kvm, gpa_t to, gpa_t from,
802 unsigned long len)
803{
804 struct kvm_memory_slot *to_memslot = NULL;
805 struct kvm_memory_slot *from_memslot = NULL;
806 unsigned long to_addr, from_addr;
807 int r;
808
809
810 from_memslot = gfn_to_memslot(kvm, from >> PAGE_SHIFT);
811 if (!from_memslot)
812 return -EFAULT;
813 if ((from + len) >= ((from_memslot->base_gfn + from_memslot->npages)
814 << PAGE_SHIFT))
815 return -EINVAL;
816 from_addr = gfn_to_hva_memslot(from_memslot, from >> PAGE_SHIFT);
817 if (kvm_is_error_hva(from_addr))
818 return -EFAULT;
819 from_addr |= (from & (PAGE_SIZE - 1));
820
821
822 to_memslot = gfn_to_memslot(kvm, to >> PAGE_SHIFT);
823 if (!to_memslot)
824 return -EFAULT;
825 if ((to + len) >= ((to_memslot->base_gfn + to_memslot->npages)
826 << PAGE_SHIFT))
827 return -EINVAL;
828 to_addr = gfn_to_hva_memslot(to_memslot, to >> PAGE_SHIFT);
829 if (kvm_is_error_hva(to_addr))
830 return -EFAULT;
831 to_addr |= (to & (PAGE_SIZE - 1));
832
833
834 r = raw_copy_in_user((void __user *)to_addr, (void __user *)from_addr,
835 len);
836 if (r)
837 return -EFAULT;
838 mark_page_dirty(kvm, to >> PAGE_SHIFT);
839 return 0;
840}
841
842static long kvmppc_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
843 unsigned long dest, unsigned long src)
844{
845 u64 pg_sz = SZ_4K;
846 u64 pg_mask = SZ_4K - 1;
847 int ret;
848
849
850 if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE |
851 H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED))
852 return H_PARAMETER;
853
854
855 if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask)))
856 return H_PARAMETER;
857
858
859 if (flags & H_COPY_PAGE) {
860 ret = kvmppc_copy_guest(vcpu->kvm, dest, src, pg_sz);
861 if (ret < 0)
862 return H_PARAMETER;
863 } else if (flags & H_ZERO_PAGE) {
864 ret = kvm_clear_guest(vcpu->kvm, dest, pg_sz);
865 if (ret < 0)
866 return H_PARAMETER;
867 }
868
869
870
871 return H_SUCCESS;
872}
873
874static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
875{
876 struct kvmppc_vcore *vcore = target->arch.vcore;
877
878
879
880
881
882
883
884
885
886 spin_lock(&vcore->lock);
887 if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
888 vcore->vcore_state != VCORE_INACTIVE &&
889 vcore->runner)
890 target = vcore->runner;
891 spin_unlock(&vcore->lock);
892
893 return kvm_vcpu_yield_to(target);
894}
895
896static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
897{
898 int yield_count = 0;
899 struct lppaca *lppaca;
900
901 spin_lock(&vcpu->arch.vpa_update_lock);
902 lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr;
903 if (lppaca)
904 yield_count = be32_to_cpu(lppaca->yield_count);
905 spin_unlock(&vcpu->arch.vpa_update_lock);
906 return yield_count;
907}
908
909int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
910{
911 unsigned long req = kvmppc_get_gpr(vcpu, 3);
912 unsigned long target, ret = H_SUCCESS;
913 int yield_count;
914 struct kvm_vcpu *tvcpu;
915 int idx, rc;
916
917 if (req <= MAX_HCALL_OPCODE &&
918 !test_bit(req/4, vcpu->kvm->arch.enabled_hcalls))
919 return RESUME_HOST;
920
921 switch (req) {
922 case H_CEDE:
923 break;
924 case H_PROD:
925 target = kvmppc_get_gpr(vcpu, 4);
926 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
927 if (!tvcpu) {
928 ret = H_PARAMETER;
929 break;
930 }
931 tvcpu->arch.prodded = 1;
932 smp_mb();
933 if (tvcpu->arch.ceded)
934 kvmppc_fast_vcpu_kick_hv(tvcpu);
935 break;
936 case H_CONFER:
937 target = kvmppc_get_gpr(vcpu, 4);
938 if (target == -1)
939 break;
940 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
941 if (!tvcpu) {
942 ret = H_PARAMETER;
943 break;
944 }
945 yield_count = kvmppc_get_gpr(vcpu, 5);
946 if (kvmppc_get_yield_count(tvcpu) != yield_count)
947 break;
948 kvm_arch_vcpu_yield_to(tvcpu);
949 break;
950 case H_REGISTER_VPA:
951 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
952 kvmppc_get_gpr(vcpu, 5),
953 kvmppc_get_gpr(vcpu, 6));
954 break;
955 case H_RTAS:
956 if (list_empty(&vcpu->kvm->arch.rtas_tokens))
957 return RESUME_HOST;
958
959 idx = srcu_read_lock(&vcpu->kvm->srcu);
960 rc = kvmppc_rtas_hcall(vcpu);
961 srcu_read_unlock(&vcpu->kvm->srcu, idx);
962
963 if (rc == -ENOENT)
964 return RESUME_HOST;
965 else if (rc == 0)
966 break;
967
968
969 return rc;
970 case H_LOGICAL_CI_LOAD:
971 ret = kvmppc_h_logical_ci_load(vcpu);
972 if (ret == H_TOO_HARD)
973 return RESUME_HOST;
974 break;
975 case H_LOGICAL_CI_STORE:
976 ret = kvmppc_h_logical_ci_store(vcpu);
977 if (ret == H_TOO_HARD)
978 return RESUME_HOST;
979 break;
980 case H_SET_MODE:
981 ret = kvmppc_h_set_mode(vcpu, kvmppc_get_gpr(vcpu, 4),
982 kvmppc_get_gpr(vcpu, 5),
983 kvmppc_get_gpr(vcpu, 6),
984 kvmppc_get_gpr(vcpu, 7));
985 if (ret == H_TOO_HARD)
986 return RESUME_HOST;
987 break;
988 case H_XIRR:
989 case H_CPPR:
990 case H_EOI:
991 case H_IPI:
992 case H_IPOLL:
993 case H_XIRR_X:
994 if (kvmppc_xics_enabled(vcpu)) {
995 if (xics_on_xive()) {
996 ret = H_NOT_AVAILABLE;
997 return RESUME_GUEST;
998 }
999 ret = kvmppc_xics_hcall(vcpu, req);
1000 break;
1001 }
1002 return RESUME_HOST;
1003 case H_SET_DABR:
1004 ret = kvmppc_h_set_dabr(vcpu, kvmppc_get_gpr(vcpu, 4));
1005 break;
1006 case H_SET_XDABR:
1007 ret = kvmppc_h_set_xdabr(vcpu, kvmppc_get_gpr(vcpu, 4),
1008 kvmppc_get_gpr(vcpu, 5));
1009 break;
1010#ifdef CONFIG_SPAPR_TCE_IOMMU
1011 case H_GET_TCE:
1012 ret = kvmppc_h_get_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
1013 kvmppc_get_gpr(vcpu, 5));
1014 if (ret == H_TOO_HARD)
1015 return RESUME_HOST;
1016 break;
1017 case H_PUT_TCE:
1018 ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
1019 kvmppc_get_gpr(vcpu, 5),
1020 kvmppc_get_gpr(vcpu, 6));
1021 if (ret == H_TOO_HARD)
1022 return RESUME_HOST;
1023 break;
1024 case H_PUT_TCE_INDIRECT:
1025 ret = kvmppc_h_put_tce_indirect(vcpu, kvmppc_get_gpr(vcpu, 4),
1026 kvmppc_get_gpr(vcpu, 5),
1027 kvmppc_get_gpr(vcpu, 6),
1028 kvmppc_get_gpr(vcpu, 7));
1029 if (ret == H_TOO_HARD)
1030 return RESUME_HOST;
1031 break;
1032 case H_STUFF_TCE:
1033 ret = kvmppc_h_stuff_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
1034 kvmppc_get_gpr(vcpu, 5),
1035 kvmppc_get_gpr(vcpu, 6),
1036 kvmppc_get_gpr(vcpu, 7));
1037 if (ret == H_TOO_HARD)
1038 return RESUME_HOST;
1039 break;
1040#endif
1041 case H_RANDOM:
1042 if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4]))
1043 ret = H_HARDWARE;
1044 break;
1045
1046 case H_SET_PARTITION_TABLE:
1047 ret = H_FUNCTION;
1048 if (nesting_enabled(vcpu->kvm))
1049 ret = kvmhv_set_partition_table(vcpu);
1050 break;
1051 case H_ENTER_NESTED:
1052 ret = H_FUNCTION;
1053 if (!nesting_enabled(vcpu->kvm))
1054 break;
1055 ret = kvmhv_enter_nested_guest(vcpu);
1056 if (ret == H_INTERRUPT) {
1057 kvmppc_set_gpr(vcpu, 3, 0);
1058 vcpu->arch.hcall_needed = 0;
1059 return -EINTR;
1060 } else if (ret == H_TOO_HARD) {
1061 kvmppc_set_gpr(vcpu, 3, 0);
1062 vcpu->arch.hcall_needed = 0;
1063 return RESUME_HOST;
1064 }
1065 break;
1066 case H_TLB_INVALIDATE:
1067 ret = H_FUNCTION;
1068 if (nesting_enabled(vcpu->kvm))
1069 ret = kvmhv_do_nested_tlbie(vcpu);
1070 break;
1071 case H_COPY_TOFROM_GUEST:
1072 ret = H_FUNCTION;
1073 if (nesting_enabled(vcpu->kvm))
1074 ret = kvmhv_copy_tofrom_guest_nested(vcpu);
1075 break;
1076 case H_PAGE_INIT:
1077 ret = kvmppc_h_page_init(vcpu, kvmppc_get_gpr(vcpu, 4),
1078 kvmppc_get_gpr(vcpu, 5),
1079 kvmppc_get_gpr(vcpu, 6));
1080 break;
1081 default:
1082 return RESUME_HOST;
1083 }
1084 kvmppc_set_gpr(vcpu, 3, ret);
1085 vcpu->arch.hcall_needed = 0;
1086 return RESUME_GUEST;
1087}
1088
1089
1090
1091
1092
1093
1094
1095static void kvmppc_nested_cede(struct kvm_vcpu *vcpu)
1096{
1097 vcpu->arch.shregs.msr |= MSR_EE;
1098 vcpu->arch.ceded = 1;
1099 smp_mb();
1100 if (vcpu->arch.prodded) {
1101 vcpu->arch.prodded = 0;
1102 smp_mb();
1103 vcpu->arch.ceded = 0;
1104 }
1105}
1106
1107static int kvmppc_hcall_impl_hv(unsigned long cmd)
1108{
1109 switch (cmd) {
1110 case H_CEDE:
1111 case H_PROD:
1112 case H_CONFER:
1113 case H_REGISTER_VPA:
1114 case H_SET_MODE:
1115 case H_LOGICAL_CI_LOAD:
1116 case H_LOGICAL_CI_STORE:
1117#ifdef CONFIG_KVM_XICS
1118 case H_XIRR:
1119 case H_CPPR:
1120 case H_EOI:
1121 case H_IPI:
1122 case H_IPOLL:
1123 case H_XIRR_X:
1124#endif
1125 case H_PAGE_INIT:
1126 return 1;
1127 }
1128
1129
1130 return kvmppc_hcall_impl_hv_realmode(cmd);
1131}
1132
1133static int kvmppc_emulate_debug_inst(struct kvm_run *run,
1134 struct kvm_vcpu *vcpu)
1135{
1136 u32 last_inst;
1137
1138 if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) !=
1139 EMULATE_DONE) {
1140
1141
1142
1143
1144 return RESUME_GUEST;
1145 }
1146
1147 if (last_inst == KVMPPC_INST_SW_BREAKPOINT) {
1148 run->exit_reason = KVM_EXIT_DEBUG;
1149 run->debug.arch.address = kvmppc_get_pc(vcpu);
1150 return RESUME_HOST;
1151 } else {
1152 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
1153 return RESUME_GUEST;
1154 }
1155}
1156
1157static void do_nothing(void *x)
1158{
1159}
1160
1161static unsigned long kvmppc_read_dpdes(struct kvm_vcpu *vcpu)
1162{
1163 int thr, cpu, pcpu, nthreads;
1164 struct kvm_vcpu *v;
1165 unsigned long dpdes;
1166
1167 nthreads = vcpu->kvm->arch.emul_smt_mode;
1168 dpdes = 0;
1169 cpu = vcpu->vcpu_id & ~(nthreads - 1);
1170 for (thr = 0; thr < nthreads; ++thr, ++cpu) {
1171 v = kvmppc_find_vcpu(vcpu->kvm, cpu);
1172 if (!v)
1173 continue;
1174
1175
1176
1177
1178
1179 pcpu = READ_ONCE(v->cpu);
1180 if (pcpu >= 0)
1181 smp_call_function_single(pcpu, do_nothing, NULL, 1);
1182 if (kvmppc_doorbell_pending(v))
1183 dpdes |= 1 << thr;
1184 }
1185 return dpdes;
1186}
1187
1188
1189
1190
1191
1192
1193
1194static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
1195{
1196 u32 inst, rb, thr;
1197 unsigned long arg;
1198 struct kvm *kvm = vcpu->kvm;
1199 struct kvm_vcpu *tvcpu;
1200
1201 if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE)
1202 return RESUME_GUEST;
1203 if (get_op(inst) != 31)
1204 return EMULATE_FAIL;
1205 rb = get_rb(inst);
1206 thr = vcpu->vcpu_id & (kvm->arch.emul_smt_mode - 1);
1207 switch (get_xop(inst)) {
1208 case OP_31_XOP_MSGSNDP:
1209 arg = kvmppc_get_gpr(vcpu, rb);
1210 if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
1211 break;
1212 arg &= 0x3f;
1213 if (arg >= kvm->arch.emul_smt_mode)
1214 break;
1215 tvcpu = kvmppc_find_vcpu(kvm, vcpu->vcpu_id - thr + arg);
1216 if (!tvcpu)
1217 break;
1218 if (!tvcpu->arch.doorbell_request) {
1219 tvcpu->arch.doorbell_request = 1;
1220 kvmppc_fast_vcpu_kick_hv(tvcpu);
1221 }
1222 break;
1223 case OP_31_XOP_MSGCLRP:
1224 arg = kvmppc_get_gpr(vcpu, rb);
1225 if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
1226 break;
1227 vcpu->arch.vcore->dpdes = 0;
1228 vcpu->arch.doorbell_request = 0;
1229 break;
1230 case OP_31_XOP_MFSPR:
1231 switch (get_sprn(inst)) {
1232 case SPRN_TIR:
1233 arg = thr;
1234 break;
1235 case SPRN_DPDES:
1236 arg = kvmppc_read_dpdes(vcpu);
1237 break;
1238 default:
1239 return EMULATE_FAIL;
1240 }
1241 kvmppc_set_gpr(vcpu, get_rt(inst), arg);
1242 break;
1243 default:
1244 return EMULATE_FAIL;
1245 }
1246 kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
1247 return RESUME_GUEST;
1248}
1249
1250static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
1251 struct task_struct *tsk)
1252{
1253 int r = RESUME_HOST;
1254
1255 vcpu->stat.sum_exits++;
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265 if (vcpu->arch.shregs.msr & MSR_HV) {
1266 printk(KERN_EMERG "KVM trap in HV mode!\n");
1267 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
1268 vcpu->arch.trap, kvmppc_get_pc(vcpu),
1269 vcpu->arch.shregs.msr);
1270 kvmppc_dump_regs(vcpu);
1271 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1272 run->hw.hardware_exit_reason = vcpu->arch.trap;
1273 return RESUME_HOST;
1274 }
1275 run->exit_reason = KVM_EXIT_UNKNOWN;
1276 run->ready_for_interrupt_injection = 1;
1277 switch (vcpu->arch.trap) {
1278
1279 case BOOK3S_INTERRUPT_HV_DECREMENTER:
1280 vcpu->stat.dec_exits++;
1281 r = RESUME_GUEST;
1282 break;
1283 case BOOK3S_INTERRUPT_EXTERNAL:
1284 case BOOK3S_INTERRUPT_H_DOORBELL:
1285 case BOOK3S_INTERRUPT_H_VIRT:
1286 vcpu->stat.ext_intr_exits++;
1287 r = RESUME_GUEST;
1288 break;
1289
1290 case BOOK3S_INTERRUPT_HMI:
1291 case BOOK3S_INTERRUPT_PERFMON:
1292 case BOOK3S_INTERRUPT_SYSTEM_RESET:
1293 r = RESUME_GUEST;
1294 break;
1295 case BOOK3S_INTERRUPT_MACHINE_CHECK:
1296
1297 machine_check_print_event_info(&vcpu->arch.mce_evt, false, true);
1298
1299
1300
1301
1302
1303
1304
1305 if (!vcpu->kvm->arch.fwnmi_enabled) {
1306 ulong flags = vcpu->arch.shregs.msr & 0x083c0000;
1307 kvmppc_core_queue_machine_check(vcpu, flags);
1308 r = RESUME_GUEST;
1309 break;
1310 }
1311
1312
1313 run->exit_reason = KVM_EXIT_NMI;
1314 run->hw.hardware_exit_reason = vcpu->arch.trap;
1315
1316 run->flags &= ~KVM_RUN_PPC_NMI_DISP_MASK;
1317
1318 if (vcpu->arch.mce_evt.disposition == MCE_DISPOSITION_RECOVERED)
1319 run->flags |= KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
1320 else
1321 run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
1322
1323 r = RESUME_HOST;
1324 break;
1325 case BOOK3S_INTERRUPT_PROGRAM:
1326 {
1327 ulong flags;
1328
1329
1330
1331
1332
1333
1334 flags = vcpu->arch.shregs.msr & 0x1f0000ull;
1335 kvmppc_core_queue_program(vcpu, flags);
1336 r = RESUME_GUEST;
1337 break;
1338 }
1339 case BOOK3S_INTERRUPT_SYSCALL:
1340 {
1341
1342 int i;
1343
1344
1345
1346
1347
1348 run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
1349 for (i = 0; i < 9; ++i)
1350 run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
1351 run->exit_reason = KVM_EXIT_PAPR_HCALL;
1352 vcpu->arch.hcall_needed = 1;
1353 r = RESUME_HOST;
1354 break;
1355 }
1356
1357
1358
1359
1360
1361
1362
1363 case BOOK3S_INTERRUPT_H_DATA_STORAGE:
1364 r = RESUME_PAGE_FAULT;
1365 break;
1366 case BOOK3S_INTERRUPT_H_INST_STORAGE:
1367 vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
1368 vcpu->arch.fault_dsisr = vcpu->arch.shregs.msr &
1369 DSISR_SRR1_MATCH_64S;
1370 if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
1371 vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
1372 r = RESUME_PAGE_FAULT;
1373 break;
1374
1375
1376
1377
1378
1379
1380
1381 case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
1382 if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED)
1383 vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ?
1384 swab32(vcpu->arch.emul_inst) :
1385 vcpu->arch.emul_inst;
1386 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
1387 r = kvmppc_emulate_debug_inst(run, vcpu);
1388 } else {
1389 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
1390 r = RESUME_GUEST;
1391 }
1392 break;
1393
1394
1395
1396
1397
1398
1399
1400 case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
1401 r = EMULATE_FAIL;
1402 if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) &&
1403 cpu_has_feature(CPU_FTR_ARCH_300))
1404 r = kvmppc_emulate_doorbell_instr(vcpu);
1405 if (r == EMULATE_FAIL) {
1406 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
1407 r = RESUME_GUEST;
1408 }
1409 break;
1410
1411#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1412 case BOOK3S_INTERRUPT_HV_SOFTPATCH:
1413
1414
1415
1416
1417
1418
1419 r = kvmhv_p9_tm_emulation(vcpu);
1420 break;
1421#endif
1422
1423 case BOOK3S_INTERRUPT_HV_RM_HARD:
1424 r = RESUME_PASSTHROUGH;
1425 break;
1426 default:
1427 kvmppc_dump_regs(vcpu);
1428 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
1429 vcpu->arch.trap, kvmppc_get_pc(vcpu),
1430 vcpu->arch.shregs.msr);
1431 run->hw.hardware_exit_reason = vcpu->arch.trap;
1432 r = RESUME_HOST;
1433 break;
1434 }
1435
1436 return r;
1437}
1438
1439static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
1440{
1441 int r;
1442 int srcu_idx;
1443
1444 vcpu->stat.sum_exits++;
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454 if (vcpu->arch.shregs.msr & MSR_HV) {
1455 pr_emerg("KVM trap in HV mode while nested!\n");
1456 pr_emerg("trap=0x%x | pc=0x%lx | msr=0x%llx\n",
1457 vcpu->arch.trap, kvmppc_get_pc(vcpu),
1458 vcpu->arch.shregs.msr);
1459 kvmppc_dump_regs(vcpu);
1460 return RESUME_HOST;
1461 }
1462 switch (vcpu->arch.trap) {
1463
1464 case BOOK3S_INTERRUPT_HV_DECREMENTER:
1465 vcpu->stat.dec_exits++;
1466 r = RESUME_GUEST;
1467 break;
1468 case BOOK3S_INTERRUPT_EXTERNAL:
1469 vcpu->stat.ext_intr_exits++;
1470 r = RESUME_HOST;
1471 break;
1472 case BOOK3S_INTERRUPT_H_DOORBELL:
1473 case BOOK3S_INTERRUPT_H_VIRT:
1474 vcpu->stat.ext_intr_exits++;
1475 r = RESUME_GUEST;
1476 break;
1477
1478 case BOOK3S_INTERRUPT_HMI:
1479 case BOOK3S_INTERRUPT_PERFMON:
1480 case BOOK3S_INTERRUPT_SYSTEM_RESET:
1481 r = RESUME_GUEST;
1482 break;
1483 case BOOK3S_INTERRUPT_MACHINE_CHECK:
1484
1485 r = RESUME_HOST;
1486
1487 machine_check_print_event_info(&vcpu->arch.mce_evt, false, true);
1488 break;
1489
1490
1491
1492
1493
1494
1495 case BOOK3S_INTERRUPT_H_DATA_STORAGE:
1496 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1497 r = kvmhv_nested_page_fault(run, vcpu);
1498 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
1499 break;
1500 case BOOK3S_INTERRUPT_H_INST_STORAGE:
1501 vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
1502 vcpu->arch.fault_dsisr = kvmppc_get_msr(vcpu) &
1503 DSISR_SRR1_MATCH_64S;
1504 if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
1505 vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
1506 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1507 r = kvmhv_nested_page_fault(run, vcpu);
1508 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
1509 break;
1510
1511#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1512 case BOOK3S_INTERRUPT_HV_SOFTPATCH:
1513
1514
1515
1516
1517
1518
1519 r = kvmhv_p9_tm_emulation(vcpu);
1520 break;
1521#endif
1522
1523 case BOOK3S_INTERRUPT_HV_RM_HARD:
1524 vcpu->arch.trap = 0;
1525 r = RESUME_GUEST;
1526 if (!xics_on_xive())
1527 kvmppc_xics_rm_complete(vcpu, 0);
1528 break;
1529 default:
1530 r = RESUME_HOST;
1531 break;
1532 }
1533
1534 return r;
1535}
1536
1537static int kvm_arch_vcpu_ioctl_get_sregs_hv(struct kvm_vcpu *vcpu,
1538 struct kvm_sregs *sregs)
1539{
1540 int i;
1541
1542 memset(sregs, 0, sizeof(struct kvm_sregs));
1543 sregs->pvr = vcpu->arch.pvr;
1544 for (i = 0; i < vcpu->arch.slb_max; i++) {
1545 sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
1546 sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
1547 }
1548
1549 return 0;
1550}
1551
1552static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
1553 struct kvm_sregs *sregs)
1554{
1555 int i, j;
1556
1557
1558 if (sregs->pvr != vcpu->arch.pvr)
1559 return -EINVAL;
1560
1561 j = 0;
1562 for (i = 0; i < vcpu->arch.slb_nr; i++) {
1563 if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) {
1564 vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe;
1565 vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv;
1566 ++j;
1567 }
1568 }
1569 vcpu->arch.slb_max = j;
1570
1571 return 0;
1572}
1573
1574static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
1575 bool preserve_top32)
1576{
1577 struct kvm *kvm = vcpu->kvm;
1578 struct kvmppc_vcore *vc = vcpu->arch.vcore;
1579 u64 mask;
1580
1581 spin_lock(&vc->lock);
1582
1583
1584
1585
1586 if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) {
1587 struct kvm_vcpu *vcpu;
1588 int i;
1589
1590 kvm_for_each_vcpu(i, vcpu, kvm) {
1591 if (vcpu->arch.vcore != vc)
1592 continue;
1593 if (new_lpcr & LPCR_ILE)
1594 vcpu->arch.intr_msr |= MSR_LE;
1595 else
1596 vcpu->arch.intr_msr &= ~MSR_LE;
1597 }
1598 }
1599
1600
1601
1602
1603
1604
1605 mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
1606 if (cpu_has_feature(CPU_FTR_ARCH_207S))
1607 mask |= LPCR_AIL;
1608
1609
1610
1611
1612 if (cpu_has_feature(CPU_FTR_ARCH_300))
1613 mask |= LPCR_LD;
1614
1615
1616 if (preserve_top32)
1617 mask &= 0xFFFFFFFF;
1618 vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
1619 spin_unlock(&vc->lock);
1620}
1621
1622static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1623 union kvmppc_one_reg *val)
1624{
1625 int r = 0;
1626 long int i;
1627
1628 switch (id) {
1629 case KVM_REG_PPC_DEBUG_INST:
1630 *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
1631 break;
1632 case KVM_REG_PPC_HIOR:
1633 *val = get_reg_val(id, 0);
1634 break;
1635 case KVM_REG_PPC_DABR:
1636 *val = get_reg_val(id, vcpu->arch.dabr);
1637 break;
1638 case KVM_REG_PPC_DABRX:
1639 *val = get_reg_val(id, vcpu->arch.dabrx);
1640 break;
1641 case KVM_REG_PPC_DSCR:
1642 *val = get_reg_val(id, vcpu->arch.dscr);
1643 break;
1644 case KVM_REG_PPC_PURR:
1645 *val = get_reg_val(id, vcpu->arch.purr);
1646 break;
1647 case KVM_REG_PPC_SPURR:
1648 *val = get_reg_val(id, vcpu->arch.spurr);
1649 break;
1650 case KVM_REG_PPC_AMR:
1651 *val = get_reg_val(id, vcpu->arch.amr);
1652 break;
1653 case KVM_REG_PPC_UAMOR:
1654 *val = get_reg_val(id, vcpu->arch.uamor);
1655 break;
1656 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:
1657 i = id - KVM_REG_PPC_MMCR0;
1658 *val = get_reg_val(id, vcpu->arch.mmcr[i]);
1659 break;
1660 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
1661 i = id - KVM_REG_PPC_PMC1;
1662 *val = get_reg_val(id, vcpu->arch.pmc[i]);
1663 break;
1664 case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
1665 i = id - KVM_REG_PPC_SPMC1;
1666 *val = get_reg_val(id, vcpu->arch.spmc[i]);
1667 break;
1668 case KVM_REG_PPC_SIAR:
1669 *val = get_reg_val(id, vcpu->arch.siar);
1670 break;
1671 case KVM_REG_PPC_SDAR:
1672 *val = get_reg_val(id, vcpu->arch.sdar);
1673 break;
1674 case KVM_REG_PPC_SIER:
1675 *val = get_reg_val(id, vcpu->arch.sier);
1676 break;
1677 case KVM_REG_PPC_IAMR:
1678 *val = get_reg_val(id, vcpu->arch.iamr);
1679 break;
1680 case KVM_REG_PPC_PSPB:
1681 *val = get_reg_val(id, vcpu->arch.pspb);
1682 break;
1683 case KVM_REG_PPC_DPDES:
1684
1685
1686
1687
1688
1689
1690 *val = get_reg_val(id, vcpu->arch.vcore->dpdes |
1691 vcpu->arch.doorbell_request);
1692 break;
1693 case KVM_REG_PPC_VTB:
1694 *val = get_reg_val(id, vcpu->arch.vcore->vtb);
1695 break;
1696 case KVM_REG_PPC_DAWR:
1697 *val = get_reg_val(id, vcpu->arch.dawr);
1698 break;
1699 case KVM_REG_PPC_DAWRX:
1700 *val = get_reg_val(id, vcpu->arch.dawrx);
1701 break;
1702 case KVM_REG_PPC_CIABR:
1703 *val = get_reg_val(id, vcpu->arch.ciabr);
1704 break;
1705 case KVM_REG_PPC_CSIGR:
1706 *val = get_reg_val(id, vcpu->arch.csigr);
1707 break;
1708 case KVM_REG_PPC_TACR:
1709 *val = get_reg_val(id, vcpu->arch.tacr);
1710 break;
1711 case KVM_REG_PPC_TCSCR:
1712 *val = get_reg_val(id, vcpu->arch.tcscr);
1713 break;
1714 case KVM_REG_PPC_PID:
1715 *val = get_reg_val(id, vcpu->arch.pid);
1716 break;
1717 case KVM_REG_PPC_ACOP:
1718 *val = get_reg_val(id, vcpu->arch.acop);
1719 break;
1720 case KVM_REG_PPC_WORT:
1721 *val = get_reg_val(id, vcpu->arch.wort);
1722 break;
1723 case KVM_REG_PPC_TIDR:
1724 *val = get_reg_val(id, vcpu->arch.tid);
1725 break;
1726 case KVM_REG_PPC_PSSCR:
1727 *val = get_reg_val(id, vcpu->arch.psscr);
1728 break;
1729 case KVM_REG_PPC_VPA_ADDR:
1730 spin_lock(&vcpu->arch.vpa_update_lock);
1731 *val = get_reg_val(id, vcpu->arch.vpa.next_gpa);
1732 spin_unlock(&vcpu->arch.vpa_update_lock);
1733 break;
1734 case KVM_REG_PPC_VPA_SLB:
1735 spin_lock(&vcpu->arch.vpa_update_lock);
1736 val->vpaval.addr = vcpu->arch.slb_shadow.next_gpa;
1737 val->vpaval.length = vcpu->arch.slb_shadow.len;
1738 spin_unlock(&vcpu->arch.vpa_update_lock);
1739 break;
1740 case KVM_REG_PPC_VPA_DTL:
1741 spin_lock(&vcpu->arch.vpa_update_lock);
1742 val->vpaval.addr = vcpu->arch.dtl.next_gpa;
1743 val->vpaval.length = vcpu->arch.dtl.len;
1744 spin_unlock(&vcpu->arch.vpa_update_lock);
1745 break;
1746 case KVM_REG_PPC_TB_OFFSET:
1747 *val = get_reg_val(id, vcpu->arch.vcore->tb_offset);
1748 break;
1749 case KVM_REG_PPC_LPCR:
1750 case KVM_REG_PPC_LPCR_64:
1751 *val = get_reg_val(id, vcpu->arch.vcore->lpcr);
1752 break;
1753 case KVM_REG_PPC_PPR:
1754 *val = get_reg_val(id, vcpu->arch.ppr);
1755 break;
1756#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1757 case KVM_REG_PPC_TFHAR:
1758 *val = get_reg_val(id, vcpu->arch.tfhar);
1759 break;
1760 case KVM_REG_PPC_TFIAR:
1761 *val = get_reg_val(id, vcpu->arch.tfiar);
1762 break;
1763 case KVM_REG_PPC_TEXASR:
1764 *val = get_reg_val(id, vcpu->arch.texasr);
1765 break;
1766 case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
1767 i = id - KVM_REG_PPC_TM_GPR0;
1768 *val = get_reg_val(id, vcpu->arch.gpr_tm[i]);
1769 break;
1770 case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
1771 {
1772 int j;
1773 i = id - KVM_REG_PPC_TM_VSR0;
1774 if (i < 32)
1775 for (j = 0; j < TS_FPRWIDTH; j++)
1776 val->vsxval[j] = vcpu->arch.fp_tm.fpr[i][j];
1777 else {
1778 if (cpu_has_feature(CPU_FTR_ALTIVEC))
1779 val->vval = vcpu->arch.vr_tm.vr[i-32];
1780 else
1781 r = -ENXIO;
1782 }
1783 break;
1784 }
1785 case KVM_REG_PPC_TM_CR:
1786 *val = get_reg_val(id, vcpu->arch.cr_tm);
1787 break;
1788 case KVM_REG_PPC_TM_XER:
1789 *val = get_reg_val(id, vcpu->arch.xer_tm);
1790 break;
1791 case KVM_REG_PPC_TM_LR:
1792 *val = get_reg_val(id, vcpu->arch.lr_tm);
1793 break;
1794 case KVM_REG_PPC_TM_CTR:
1795 *val = get_reg_val(id, vcpu->arch.ctr_tm);
1796 break;
1797 case KVM_REG_PPC_TM_FPSCR:
1798 *val = get_reg_val(id, vcpu->arch.fp_tm.fpscr);
1799 break;
1800 case KVM_REG_PPC_TM_AMR:
1801 *val = get_reg_val(id, vcpu->arch.amr_tm);
1802 break;
1803 case KVM_REG_PPC_TM_PPR:
1804 *val = get_reg_val(id, vcpu->arch.ppr_tm);
1805 break;
1806 case KVM_REG_PPC_TM_VRSAVE:
1807 *val = get_reg_val(id, vcpu->arch.vrsave_tm);
1808 break;
1809 case KVM_REG_PPC_TM_VSCR:
1810 if (cpu_has_feature(CPU_FTR_ALTIVEC))
1811 *val = get_reg_val(id, vcpu->arch.vr_tm.vscr.u[3]);
1812 else
1813 r = -ENXIO;
1814 break;
1815 case KVM_REG_PPC_TM_DSCR:
1816 *val = get_reg_val(id, vcpu->arch.dscr_tm);
1817 break;
1818 case KVM_REG_PPC_TM_TAR:
1819 *val = get_reg_val(id, vcpu->arch.tar_tm);
1820 break;
1821#endif
1822 case KVM_REG_PPC_ARCH_COMPAT:
1823 *val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
1824 break;
1825 case KVM_REG_PPC_DEC_EXPIRY:
1826 *val = get_reg_val(id, vcpu->arch.dec_expires +
1827 vcpu->arch.vcore->tb_offset);
1828 break;
1829 case KVM_REG_PPC_ONLINE:
1830 *val = get_reg_val(id, vcpu->arch.online);
1831 break;
1832 case KVM_REG_PPC_PTCR:
1833 *val = get_reg_val(id, vcpu->kvm->arch.l1_ptcr);
1834 break;
1835 default:
1836 r = -EINVAL;
1837 break;
1838 }
1839
1840 return r;
1841}
1842
1843static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1844 union kvmppc_one_reg *val)
1845{
1846 int r = 0;
1847 long int i;
1848 unsigned long addr, len;
1849
1850 switch (id) {
1851 case KVM_REG_PPC_HIOR:
1852
1853 if (set_reg_val(id, *val))
1854 r = -EINVAL;
1855 break;
1856 case KVM_REG_PPC_DABR:
1857 vcpu->arch.dabr = set_reg_val(id, *val);
1858 break;
1859 case KVM_REG_PPC_DABRX:
1860 vcpu->arch.dabrx = set_reg_val(id, *val) & ~DABRX_HYP;
1861 break;
1862 case KVM_REG_PPC_DSCR:
1863 vcpu->arch.dscr = set_reg_val(id, *val);
1864 break;
1865 case KVM_REG_PPC_PURR:
1866 vcpu->arch.purr = set_reg_val(id, *val);
1867 break;
1868 case KVM_REG_PPC_SPURR:
1869 vcpu->arch.spurr = set_reg_val(id, *val);
1870 break;
1871 case KVM_REG_PPC_AMR:
1872 vcpu->arch.amr = set_reg_val(id, *val);
1873 break;
1874 case KVM_REG_PPC_UAMOR:
1875 vcpu->arch.uamor = set_reg_val(id, *val);
1876 break;
1877 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:
1878 i = id - KVM_REG_PPC_MMCR0;
1879 vcpu->arch.mmcr[i] = set_reg_val(id, *val);
1880 break;
1881 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
1882 i = id - KVM_REG_PPC_PMC1;
1883 vcpu->arch.pmc[i] = set_reg_val(id, *val);
1884 break;
1885 case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
1886 i = id - KVM_REG_PPC_SPMC1;
1887 vcpu->arch.spmc[i] = set_reg_val(id, *val);
1888 break;
1889 case KVM_REG_PPC_SIAR:
1890 vcpu->arch.siar = set_reg_val(id, *val);
1891 break;
1892 case KVM_REG_PPC_SDAR:
1893 vcpu->arch.sdar = set_reg_val(id, *val);
1894 break;
1895 case KVM_REG_PPC_SIER:
1896 vcpu->arch.sier = set_reg_val(id, *val);
1897 break;
1898 case KVM_REG_PPC_IAMR:
1899 vcpu->arch.iamr = set_reg_val(id, *val);
1900 break;
1901 case KVM_REG_PPC_PSPB:
1902 vcpu->arch.pspb = set_reg_val(id, *val);
1903 break;
1904 case KVM_REG_PPC_DPDES:
1905 vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
1906 break;
1907 case KVM_REG_PPC_VTB:
1908 vcpu->arch.vcore->vtb = set_reg_val(id, *val);
1909 break;
1910 case KVM_REG_PPC_DAWR:
1911 vcpu->arch.dawr = set_reg_val(id, *val);
1912 break;
1913 case KVM_REG_PPC_DAWRX:
1914 vcpu->arch.dawrx = set_reg_val(id, *val) & ~DAWRX_HYP;
1915 break;
1916 case KVM_REG_PPC_CIABR:
1917 vcpu->arch.ciabr = set_reg_val(id, *val);
1918
1919 if ((vcpu->arch.ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
1920 vcpu->arch.ciabr &= ~CIABR_PRIV;
1921 break;
1922 case KVM_REG_PPC_CSIGR:
1923 vcpu->arch.csigr = set_reg_val(id, *val);
1924 break;
1925 case KVM_REG_PPC_TACR:
1926 vcpu->arch.tacr = set_reg_val(id, *val);
1927 break;
1928 case KVM_REG_PPC_TCSCR:
1929 vcpu->arch.tcscr = set_reg_val(id, *val);
1930 break;
1931 case KVM_REG_PPC_PID:
1932 vcpu->arch.pid = set_reg_val(id, *val);
1933 break;
1934 case KVM_REG_PPC_ACOP:
1935 vcpu->arch.acop = set_reg_val(id, *val);
1936 break;
1937 case KVM_REG_PPC_WORT:
1938 vcpu->arch.wort = set_reg_val(id, *val);
1939 break;
1940 case KVM_REG_PPC_TIDR:
1941 vcpu->arch.tid = set_reg_val(id, *val);
1942 break;
1943 case KVM_REG_PPC_PSSCR:
1944 vcpu->arch.psscr = set_reg_val(id, *val) & PSSCR_GUEST_VIS;
1945 break;
1946 case KVM_REG_PPC_VPA_ADDR:
1947 addr = set_reg_val(id, *val);
1948 r = -EINVAL;
1949 if (!addr && (vcpu->arch.slb_shadow.next_gpa ||
1950 vcpu->arch.dtl.next_gpa))
1951 break;
1952 r = set_vpa(vcpu, &vcpu->arch.vpa, addr, sizeof(struct lppaca));
1953 break;
1954 case KVM_REG_PPC_VPA_SLB:
1955 addr = val->vpaval.addr;
1956 len = val->vpaval.length;
1957 r = -EINVAL;
1958 if (addr && !vcpu->arch.vpa.next_gpa)
1959 break;
1960 r = set_vpa(vcpu, &vcpu->arch.slb_shadow, addr, len);
1961 break;
1962 case KVM_REG_PPC_VPA_DTL:
1963 addr = val->vpaval.addr;
1964 len = val->vpaval.length;
1965 r = -EINVAL;
1966 if (addr && (len < sizeof(struct dtl_entry) ||
1967 !vcpu->arch.vpa.next_gpa))
1968 break;
1969 len -= len % sizeof(struct dtl_entry);
1970 r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
1971 break;
1972 case KVM_REG_PPC_TB_OFFSET:
1973
1974 vcpu->arch.vcore->tb_offset =
1975 ALIGN(set_reg_val(id, *val), 1UL << 24);
1976 break;
1977 case KVM_REG_PPC_LPCR:
1978 kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), true);
1979 break;
1980 case KVM_REG_PPC_LPCR_64:
1981 kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), false);
1982 break;
1983 case KVM_REG_PPC_PPR:
1984 vcpu->arch.ppr = set_reg_val(id, *val);
1985 break;
1986#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1987 case KVM_REG_PPC_TFHAR:
1988 vcpu->arch.tfhar = set_reg_val(id, *val);
1989 break;
1990 case KVM_REG_PPC_TFIAR:
1991 vcpu->arch.tfiar = set_reg_val(id, *val);
1992 break;
1993 case KVM_REG_PPC_TEXASR:
1994 vcpu->arch.texasr = set_reg_val(id, *val);
1995 break;
1996 case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
1997 i = id - KVM_REG_PPC_TM_GPR0;
1998 vcpu->arch.gpr_tm[i] = set_reg_val(id, *val);
1999 break;
2000 case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
2001 {
2002 int j;
2003 i = id - KVM_REG_PPC_TM_VSR0;
2004 if (i < 32)
2005 for (j = 0; j < TS_FPRWIDTH; j++)
2006 vcpu->arch.fp_tm.fpr[i][j] = val->vsxval[j];
2007 else
2008 if (cpu_has_feature(CPU_FTR_ALTIVEC))
2009 vcpu->arch.vr_tm.vr[i-32] = val->vval;
2010 else
2011 r = -ENXIO;
2012 break;
2013 }
2014 case KVM_REG_PPC_TM_CR:
2015 vcpu->arch.cr_tm = set_reg_val(id, *val);
2016 break;
2017 case KVM_REG_PPC_TM_XER:
2018 vcpu->arch.xer_tm = set_reg_val(id, *val);
2019 break;
2020 case KVM_REG_PPC_TM_LR:
2021 vcpu->arch.lr_tm = set_reg_val(id, *val);
2022 break;
2023 case KVM_REG_PPC_TM_CTR:
2024 vcpu->arch.ctr_tm = set_reg_val(id, *val);
2025 break;
2026 case KVM_REG_PPC_TM_FPSCR:
2027 vcpu->arch.fp_tm.fpscr = set_reg_val(id, *val);
2028 break;
2029 case KVM_REG_PPC_TM_AMR:
2030 vcpu->arch.amr_tm = set_reg_val(id, *val);
2031 break;
2032 case KVM_REG_PPC_TM_PPR:
2033 vcpu->arch.ppr_tm = set_reg_val(id, *val);
2034 break;
2035 case KVM_REG_PPC_TM_VRSAVE:
2036 vcpu->arch.vrsave_tm = set_reg_val(id, *val);
2037 break;
2038 case KVM_REG_PPC_TM_VSCR:
2039 if (cpu_has_feature(CPU_FTR_ALTIVEC))
2040 vcpu->arch.vr.vscr.u[3] = set_reg_val(id, *val);
2041 else
2042 r = - ENXIO;
2043 break;
2044 case KVM_REG_PPC_TM_DSCR:
2045 vcpu->arch.dscr_tm = set_reg_val(id, *val);
2046 break;
2047 case KVM_REG_PPC_TM_TAR:
2048 vcpu->arch.tar_tm = set_reg_val(id, *val);
2049 break;
2050#endif
2051 case KVM_REG_PPC_ARCH_COMPAT:
2052 r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
2053 break;
2054 case KVM_REG_PPC_DEC_EXPIRY:
2055 vcpu->arch.dec_expires = set_reg_val(id, *val) -
2056 vcpu->arch.vcore->tb_offset;
2057 break;
2058 case KVM_REG_PPC_ONLINE:
2059 i = set_reg_val(id, *val);
2060 if (i && !vcpu->arch.online)
2061 atomic_inc(&vcpu->arch.vcore->online_count);
2062 else if (!i && vcpu->arch.online)
2063 atomic_dec(&vcpu->arch.vcore->online_count);
2064 vcpu->arch.online = i;
2065 break;
2066 case KVM_REG_PPC_PTCR:
2067 vcpu->kvm->arch.l1_ptcr = set_reg_val(id, *val);
2068 break;
2069 default:
2070 r = -EINVAL;
2071 break;
2072 }
2073
2074 return r;
2075}
2076
2077
2078
2079
2080
2081
2082
2083
2084static int threads_per_vcore(struct kvm *kvm)
2085{
2086 if (kvm->arch.threads_indep)
2087 return 1;
2088 return threads_per_subcore;
2089}
2090
2091static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)
2092{
2093 struct kvmppc_vcore *vcore;
2094
2095 vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
2096
2097 if (vcore == NULL)
2098 return NULL;
2099
2100 spin_lock_init(&vcore->lock);
2101 spin_lock_init(&vcore->stoltb_lock);
2102 init_swait_queue_head(&vcore->wq);
2103 vcore->preempt_tb = TB_NIL;
2104 vcore->lpcr = kvm->arch.lpcr;
2105 vcore->first_vcpuid = id;
2106 vcore->kvm = kvm;
2107 INIT_LIST_HEAD(&vcore->preempt_list);
2108
2109 return vcore;
2110}
2111
2112#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
2113static struct debugfs_timings_element {
2114 const char *name;
2115 size_t offset;
2116} timings[] = {
2117 {"rm_entry", offsetof(struct kvm_vcpu, arch.rm_entry)},
2118 {"rm_intr", offsetof(struct kvm_vcpu, arch.rm_intr)},
2119 {"rm_exit", offsetof(struct kvm_vcpu, arch.rm_exit)},
2120 {"guest", offsetof(struct kvm_vcpu, arch.guest_time)},
2121 {"cede", offsetof(struct kvm_vcpu, arch.cede_time)},
2122};
2123
2124#define N_TIMINGS (ARRAY_SIZE(timings))
2125
2126struct debugfs_timings_state {
2127 struct kvm_vcpu *vcpu;
2128 unsigned int buflen;
2129 char buf[N_TIMINGS * 100];
2130};
2131
2132static int debugfs_timings_open(struct inode *inode, struct file *file)
2133{
2134 struct kvm_vcpu *vcpu = inode->i_private;
2135 struct debugfs_timings_state *p;
2136
2137 p = kzalloc(sizeof(*p), GFP_KERNEL);
2138 if (!p)
2139 return -ENOMEM;
2140
2141 kvm_get_kvm(vcpu->kvm);
2142 p->vcpu = vcpu;
2143 file->private_data = p;
2144
2145 return nonseekable_open(inode, file);
2146}
2147
2148static int debugfs_timings_release(struct inode *inode, struct file *file)
2149{
2150 struct debugfs_timings_state *p = file->private_data;
2151
2152 kvm_put_kvm(p->vcpu->kvm);
2153 kfree(p);
2154 return 0;
2155}
2156
2157static ssize_t debugfs_timings_read(struct file *file, char __user *buf,
2158 size_t len, loff_t *ppos)
2159{
2160 struct debugfs_timings_state *p = file->private_data;
2161 struct kvm_vcpu *vcpu = p->vcpu;
2162 char *s, *buf_end;
2163 struct kvmhv_tb_accumulator tb;
2164 u64 count;
2165 loff_t pos;
2166 ssize_t n;
2167 int i, loops;
2168 bool ok;
2169
2170 if (!p->buflen) {
2171 s = p->buf;
2172 buf_end = s + sizeof(p->buf);
2173 for (i = 0; i < N_TIMINGS; ++i) {
2174 struct kvmhv_tb_accumulator *acc;
2175
2176 acc = (struct kvmhv_tb_accumulator *)
2177 ((unsigned long)vcpu + timings[i].offset);
2178 ok = false;
2179 for (loops = 0; loops < 1000; ++loops) {
2180 count = acc->seqcount;
2181 if (!(count & 1)) {
2182 smp_rmb();
2183 tb = *acc;
2184 smp_rmb();
2185 if (count == acc->seqcount) {
2186 ok = true;
2187 break;
2188 }
2189 }
2190 udelay(1);
2191 }
2192 if (!ok)
2193 snprintf(s, buf_end - s, "%s: stuck\n",
2194 timings[i].name);
2195 else
2196 snprintf(s, buf_end - s,
2197 "%s: %llu %llu %llu %llu\n",
2198 timings[i].name, count / 2,
2199 tb_to_ns(tb.tb_total),
2200 tb_to_ns(tb.tb_min),
2201 tb_to_ns(tb.tb_max));
2202 s += strlen(s);
2203 }
2204 p->buflen = s - p->buf;
2205 }
2206
2207 pos = *ppos;
2208 if (pos >= p->buflen)
2209 return 0;
2210 if (len > p->buflen - pos)
2211 len = p->buflen - pos;
2212 n = copy_to_user(buf, p->buf + pos, len);
2213 if (n) {
2214 if (n == len)
2215 return -EFAULT;
2216 len -= n;
2217 }
2218 *ppos = pos + len;
2219 return len;
2220}
2221
2222static ssize_t debugfs_timings_write(struct file *file, const char __user *buf,
2223 size_t len, loff_t *ppos)
2224{
2225 return -EACCES;
2226}
2227
2228static const struct file_operations debugfs_timings_ops = {
2229 .owner = THIS_MODULE,
2230 .open = debugfs_timings_open,
2231 .release = debugfs_timings_release,
2232 .read = debugfs_timings_read,
2233 .write = debugfs_timings_write,
2234 .llseek = generic_file_llseek,
2235};
2236
2237
2238static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
2239{
2240 char buf[16];
2241 struct kvm *kvm = vcpu->kvm;
2242
2243 snprintf(buf, sizeof(buf), "vcpu%u", id);
2244 if (IS_ERR_OR_NULL(kvm->arch.debugfs_dir))
2245 return;
2246 vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir);
2247 if (IS_ERR_OR_NULL(vcpu->arch.debugfs_dir))
2248 return;
2249 vcpu->arch.debugfs_timings =
2250 debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir,
2251 vcpu, &debugfs_timings_ops);
2252}
2253
2254#else
2255static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
2256{
2257}
2258#endif
2259
2260static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
2261 unsigned int id)
2262{
2263 struct kvm_vcpu *vcpu;
2264 int err;
2265 int core;
2266 struct kvmppc_vcore *vcore;
2267
2268 err = -ENOMEM;
2269 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2270 if (!vcpu)
2271 goto out;
2272
2273 err = kvm_vcpu_init(vcpu, kvm, id);
2274 if (err)
2275 goto free_vcpu;
2276
2277 vcpu->arch.shared = &vcpu->arch.shregs;
2278#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
2279
2280
2281
2282
2283#ifdef __BIG_ENDIAN__
2284 vcpu->arch.shared_big_endian = true;
2285#else
2286 vcpu->arch.shared_big_endian = false;
2287#endif
2288#endif
2289 vcpu->arch.mmcr[0] = MMCR0_FC;
2290 vcpu->arch.ctrl = CTRL_RUNLATCH;
2291
2292 kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
2293 spin_lock_init(&vcpu->arch.vpa_update_lock);
2294 spin_lock_init(&vcpu->arch.tbacct_lock);
2295 vcpu->arch.busy_preempt = TB_NIL;
2296 vcpu->arch.intr_msr = MSR_SF | MSR_ME;
2297
2298
2299
2300
2301
2302
2303
2304
2305 vcpu->arch.hfscr = HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB |
2306 HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP;
2307 if (cpu_has_feature(CPU_FTR_HVMODE)) {
2308 vcpu->arch.hfscr &= mfspr(SPRN_HFSCR);
2309 if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
2310 vcpu->arch.hfscr |= HFSCR_TM;
2311 }
2312 if (cpu_has_feature(CPU_FTR_TM_COMP))
2313 vcpu->arch.hfscr |= HFSCR_TM;
2314
2315 kvmppc_mmu_book3s_hv_init(vcpu);
2316
2317 vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
2318
2319 init_waitqueue_head(&vcpu->arch.cpu_run);
2320
2321 mutex_lock(&kvm->lock);
2322 vcore = NULL;
2323 err = -EINVAL;
2324 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
2325 if (id >= (KVM_MAX_VCPUS * kvm->arch.emul_smt_mode)) {
2326 pr_devel("KVM: VCPU ID too high\n");
2327 core = KVM_MAX_VCORES;
2328 } else {
2329 BUG_ON(kvm->arch.smt_mode != 1);
2330 core = kvmppc_pack_vcpu_id(kvm, id);
2331 }
2332 } else {
2333 core = id / kvm->arch.smt_mode;
2334 }
2335 if (core < KVM_MAX_VCORES) {
2336 vcore = kvm->arch.vcores[core];
2337 if (vcore && cpu_has_feature(CPU_FTR_ARCH_300)) {
2338 pr_devel("KVM: collision on id %u", id);
2339 vcore = NULL;
2340 } else if (!vcore) {
2341
2342
2343
2344
2345 err = -ENOMEM;
2346 vcore = kvmppc_vcore_create(kvm,
2347 id & ~(kvm->arch.smt_mode - 1));
2348 mutex_lock(&kvm->arch.mmu_setup_lock);
2349 kvm->arch.vcores[core] = vcore;
2350 kvm->arch.online_vcores++;
2351 mutex_unlock(&kvm->arch.mmu_setup_lock);
2352 }
2353 }
2354 mutex_unlock(&kvm->lock);
2355
2356 if (!vcore)
2357 goto free_vcpu;
2358
2359 spin_lock(&vcore->lock);
2360 ++vcore->num_threads;
2361 spin_unlock(&vcore->lock);
2362 vcpu->arch.vcore = vcore;
2363 vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;
2364 vcpu->arch.thread_cpu = -1;
2365 vcpu->arch.prev_cpu = -1;
2366
2367 vcpu->arch.cpu_type = KVM_CPU_3S_64;
2368 kvmppc_sanity_check(vcpu);
2369
2370 debugfs_vcpu_init(vcpu, id);
2371
2372 return vcpu;
2373
2374free_vcpu:
2375 kmem_cache_free(kvm_vcpu_cache, vcpu);
2376out:
2377 return ERR_PTR(err);
2378}
2379
2380static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode,
2381 unsigned long flags)
2382{
2383 int err;
2384 int esmt = 0;
2385
2386 if (flags)
2387 return -EINVAL;
2388 if (smt_mode > MAX_SMT_THREADS || !is_power_of_2(smt_mode))
2389 return -EINVAL;
2390 if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
2391
2392
2393
2394
2395 if (smt_mode > threads_per_subcore)
2396 return -EINVAL;
2397 } else {
2398
2399
2400
2401
2402 esmt = smt_mode;
2403 smt_mode = 1;
2404 }
2405 mutex_lock(&kvm->lock);
2406 err = -EBUSY;
2407 if (!kvm->arch.online_vcores) {
2408 kvm->arch.smt_mode = smt_mode;
2409 kvm->arch.emul_smt_mode = esmt;
2410 err = 0;
2411 }
2412 mutex_unlock(&kvm->lock);
2413
2414 return err;
2415}
2416
2417static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
2418{
2419 if (vpa->pinned_addr)
2420 kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa,
2421 vpa->dirty);
2422}
2423
2424static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu)
2425{
2426 spin_lock(&vcpu->arch.vpa_update_lock);
2427 unpin_vpa(vcpu->kvm, &vcpu->arch.dtl);
2428 unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow);
2429 unpin_vpa(vcpu->kvm, &vcpu->arch.vpa);
2430 spin_unlock(&vcpu->arch.vpa_update_lock);
2431 kvm_vcpu_uninit(vcpu);
2432 kmem_cache_free(kvm_vcpu_cache, vcpu);
2433}
2434
2435static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu)
2436{
2437
2438 return 1;
2439}
2440
2441static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
2442{
2443 unsigned long dec_nsec, now;
2444
2445 now = get_tb();
2446 if (now > vcpu->arch.dec_expires) {
2447
2448 kvmppc_core_queue_dec(vcpu);
2449 kvmppc_core_prepare_to_enter(vcpu);
2450 return;
2451 }
2452 dec_nsec = tb_to_ns(vcpu->arch.dec_expires - now);
2453 hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL);
2454 vcpu->arch.timer_running = 1;
2455}
2456
2457static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
2458{
2459 vcpu->arch.ceded = 0;
2460 if (vcpu->arch.timer_running) {
2461 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
2462 vcpu->arch.timer_running = 0;
2463 }
2464}
2465
2466extern int __kvmppc_vcore_entry(void);
2467
2468static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
2469 struct kvm_vcpu *vcpu)
2470{
2471 u64 now;
2472
2473 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
2474 return;
2475 spin_lock_irq(&vcpu->arch.tbacct_lock);
2476 now = mftb();
2477 vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) -
2478 vcpu->arch.stolen_logged;
2479 vcpu->arch.busy_preempt = now;
2480 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
2481 spin_unlock_irq(&vcpu->arch.tbacct_lock);
2482 --vc->n_runnable;
2483 WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], NULL);
2484}
2485
2486static int kvmppc_grab_hwthread(int cpu)
2487{
2488 struct paca_struct *tpaca;
2489 long timeout = 10000;
2490
2491 tpaca = paca_ptrs[cpu];
2492
2493
2494 tpaca->kvm_hstate.kvm_vcpu = NULL;
2495 tpaca->kvm_hstate.kvm_vcore = NULL;
2496 tpaca->kvm_hstate.napping = 0;
2497 smp_wmb();
2498 tpaca->kvm_hstate.hwthread_req = 1;
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509 smp_mb();
2510 while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) {
2511 if (--timeout <= 0) {
2512 pr_err("KVM: couldn't grab cpu %d\n", cpu);
2513 return -EBUSY;
2514 }
2515 udelay(1);
2516 }
2517 return 0;
2518}
2519
2520static void kvmppc_release_hwthread(int cpu)
2521{
2522 struct paca_struct *tpaca;
2523
2524 tpaca = paca_ptrs[cpu];
2525 tpaca->kvm_hstate.hwthread_req = 0;
2526 tpaca->kvm_hstate.kvm_vcpu = NULL;
2527 tpaca->kvm_hstate.kvm_vcore = NULL;
2528 tpaca->kvm_hstate.kvm_split_mode = NULL;
2529}
2530
2531static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
2532{
2533 struct kvm_nested_guest *nested = vcpu->arch.nested;
2534 cpumask_t *cpu_in_guest;
2535 int i;
2536
2537 cpu = cpu_first_thread_sibling(cpu);
2538 if (nested) {
2539 cpumask_set_cpu(cpu, &nested->need_tlb_flush);
2540 cpu_in_guest = &nested->cpu_in_guest;
2541 } else {
2542 cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush);
2543 cpu_in_guest = &kvm->arch.cpu_in_guest;
2544 }
2545
2546
2547
2548
2549
2550 smp_mb();
2551 for (i = 0; i < threads_per_core; ++i)
2552 if (cpumask_test_cpu(cpu + i, cpu_in_guest))
2553 smp_call_function_single(cpu + i, do_nothing, NULL, 1);
2554}
2555
2556static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
2557{
2558 struct kvm_nested_guest *nested = vcpu->arch.nested;
2559 struct kvm *kvm = vcpu->kvm;
2560 int prev_cpu;
2561
2562 if (!cpu_has_feature(CPU_FTR_HVMODE))
2563 return;
2564
2565 if (nested)
2566 prev_cpu = nested->prev_cpu[vcpu->arch.nested_vcpu_id];
2567 else
2568 prev_cpu = vcpu->arch.prev_cpu;
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582 if (prev_cpu != pcpu) {
2583 if (prev_cpu >= 0 &&
2584 cpu_first_thread_sibling(prev_cpu) !=
2585 cpu_first_thread_sibling(pcpu))
2586 radix_flush_cpu(kvm, prev_cpu, vcpu);
2587 if (nested)
2588 nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu;
2589 else
2590 vcpu->arch.prev_cpu = pcpu;
2591 }
2592}
2593
2594static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
2595{
2596 int cpu;
2597 struct paca_struct *tpaca;
2598 struct kvm *kvm = vc->kvm;
2599
2600 cpu = vc->pcpu;
2601 if (vcpu) {
2602 if (vcpu->arch.timer_running) {
2603 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
2604 vcpu->arch.timer_running = 0;
2605 }
2606 cpu += vcpu->arch.ptid;
2607 vcpu->cpu = vc->pcpu;
2608 vcpu->arch.thread_cpu = cpu;
2609 cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
2610 }
2611 tpaca = paca_ptrs[cpu];
2612 tpaca->kvm_hstate.kvm_vcpu = vcpu;
2613 tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
2614 tpaca->kvm_hstate.fake_suspend = 0;
2615
2616 smp_wmb();
2617 tpaca->kvm_hstate.kvm_vcore = vc;
2618 if (cpu != smp_processor_id())
2619 kvmppc_ipi_thread(cpu);
2620}
2621
2622static void kvmppc_wait_for_nap(int n_threads)
2623{
2624 int cpu = smp_processor_id();
2625 int i, loops;
2626
2627 if (n_threads <= 1)
2628 return;
2629 for (loops = 0; loops < 1000000; ++loops) {
2630
2631
2632
2633
2634
2635
2636 for (i = 1; i < n_threads; ++i)
2637 if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore)
2638 break;
2639 if (i == n_threads) {
2640 HMT_medium();
2641 return;
2642 }
2643 HMT_low();
2644 }
2645 HMT_medium();
2646 for (i = 1; i < n_threads; ++i)
2647 if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore)
2648 pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
2649}
2650
2651
2652
2653
2654
2655
2656static int on_primary_thread(void)
2657{
2658 int cpu = smp_processor_id();
2659 int thr;
2660
2661
2662 if (cpu_thread_in_subcore(cpu))
2663 return 0;
2664
2665 thr = 0;
2666 while (++thr < threads_per_subcore)
2667 if (cpu_online(cpu + thr))
2668 return 0;
2669
2670
2671 for (thr = 1; thr < threads_per_subcore; ++thr) {
2672 if (kvmppc_grab_hwthread(cpu + thr)) {
2673
2674 do {
2675 kvmppc_release_hwthread(cpu + thr);
2676 } while (--thr > 0);
2677 return 0;
2678 }
2679 }
2680 return 1;
2681}
2682
2683
2684
2685
2686
2687
2688struct preempted_vcore_list {
2689 struct list_head list;
2690 spinlock_t lock;
2691};
2692
2693static DEFINE_PER_CPU(struct preempted_vcore_list, preempted_vcores);
2694
2695static void init_vcore_lists(void)
2696{
2697 int cpu;
2698
2699 for_each_possible_cpu(cpu) {
2700 struct preempted_vcore_list *lp = &per_cpu(preempted_vcores, cpu);
2701 spin_lock_init(&lp->lock);
2702 INIT_LIST_HEAD(&lp->list);
2703 }
2704}
2705
2706static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
2707{
2708 struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
2709
2710 vc->vcore_state = VCORE_PREEMPT;
2711 vc->pcpu = smp_processor_id();
2712 if (vc->num_threads < threads_per_vcore(vc->kvm)) {
2713 spin_lock(&lp->lock);
2714 list_add_tail(&vc->preempt_list, &lp->list);
2715 spin_unlock(&lp->lock);
2716 }
2717
2718
2719 kvmppc_core_start_stolen(vc);
2720}
2721
2722static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc)
2723{
2724 struct preempted_vcore_list *lp;
2725
2726 kvmppc_core_end_stolen(vc);
2727 if (!list_empty(&vc->preempt_list)) {
2728 lp = &per_cpu(preempted_vcores, vc->pcpu);
2729 spin_lock(&lp->lock);
2730 list_del_init(&vc->preempt_list);
2731 spin_unlock(&lp->lock);
2732 }
2733 vc->vcore_state = VCORE_INACTIVE;
2734}
2735
2736
2737
2738
2739
2740struct core_info {
2741 int n_subcores;
2742 int max_subcore_threads;
2743 int total_threads;
2744 int subcore_threads[MAX_SUBCORES];
2745 struct kvmppc_vcore *vc[MAX_SUBCORES];
2746};
2747
2748
2749
2750
2751
2752static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
2753
2754static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
2755{
2756 memset(cip, 0, sizeof(*cip));
2757 cip->n_subcores = 1;
2758 cip->max_subcore_threads = vc->num_threads;
2759 cip->total_threads = vc->num_threads;
2760 cip->subcore_threads[0] = vc->num_threads;
2761 cip->vc[0] = vc;
2762}
2763
2764static bool subcore_config_ok(int n_subcores, int n_threads)
2765{
2766
2767
2768
2769
2770 if (cpu_has_feature(CPU_FTR_ARCH_300))
2771 return n_subcores <= 4 && n_threads == 1;
2772
2773
2774 if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS)
2775 return false;
2776 if (n_subcores > MAX_SUBCORES)
2777 return false;
2778 if (n_subcores > 1) {
2779 if (!(dynamic_mt_modes & 2))
2780 n_subcores = 4;
2781 if (n_subcores > 2 && !(dynamic_mt_modes & 4))
2782 return false;
2783 }
2784
2785 return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
2786}
2787
2788static void init_vcore_to_run(struct kvmppc_vcore *vc)
2789{
2790 vc->entry_exit_map = 0;
2791 vc->in_guest = 0;
2792 vc->napping_threads = 0;
2793 vc->conferring_threads = 0;
2794 vc->tb_offset_applied = 0;
2795}
2796
2797static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
2798{
2799 int n_threads = vc->num_threads;
2800 int sub;
2801
2802 if (!cpu_has_feature(CPU_FTR_ARCH_207S))
2803 return false;
2804
2805
2806 if (one_vm_per_core && vc->kvm != cip->vc[0]->kvm)
2807 return false;
2808
2809
2810 if (no_mixing_hpt_and_radix &&
2811 kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm))
2812 return false;
2813
2814 if (n_threads < cip->max_subcore_threads)
2815 n_threads = cip->max_subcore_threads;
2816 if (!subcore_config_ok(cip->n_subcores + 1, n_threads))
2817 return false;
2818 cip->max_subcore_threads = n_threads;
2819
2820 sub = cip->n_subcores;
2821 ++cip->n_subcores;
2822 cip->total_threads += vc->num_threads;
2823 cip->subcore_threads[sub] = vc->num_threads;
2824 cip->vc[sub] = vc;
2825 init_vcore_to_run(vc);
2826 list_del_init(&vc->preempt_list);
2827
2828 return true;
2829}
2830
2831
2832
2833
2834
2835static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
2836 int target_threads)
2837{
2838 if (cip->total_threads + pvc->num_threads > target_threads)
2839 return false;
2840
2841 return can_dynamic_split(pvc, cip);
2842}
2843
2844static void prepare_threads(struct kvmppc_vcore *vc)
2845{
2846 int i;
2847 struct kvm_vcpu *vcpu;
2848
2849 for_each_runnable_thread(i, vcpu, vc) {
2850 if (signal_pending(vcpu->arch.run_task))
2851 vcpu->arch.ret = -EINTR;
2852 else if (vcpu->arch.vpa.update_pending ||
2853 vcpu->arch.slb_shadow.update_pending ||
2854 vcpu->arch.dtl.update_pending)
2855 vcpu->arch.ret = RESUME_GUEST;
2856 else
2857 continue;
2858 kvmppc_remove_runnable(vc, vcpu);
2859 wake_up(&vcpu->arch.cpu_run);
2860 }
2861}
2862
2863static void collect_piggybacks(struct core_info *cip, int target_threads)
2864{
2865 struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
2866 struct kvmppc_vcore *pvc, *vcnext;
2867
2868 spin_lock(&lp->lock);
2869 list_for_each_entry_safe(pvc, vcnext, &lp->list, preempt_list) {
2870 if (!spin_trylock(&pvc->lock))
2871 continue;
2872 prepare_threads(pvc);
2873 if (!pvc->n_runnable || !pvc->kvm->arch.mmu_ready) {
2874 list_del_init(&pvc->preempt_list);
2875 if (pvc->runner == NULL) {
2876 pvc->vcore_state = VCORE_INACTIVE;
2877 kvmppc_core_end_stolen(pvc);
2878 }
2879 spin_unlock(&pvc->lock);
2880 continue;
2881 }
2882 if (!can_piggyback(pvc, cip, target_threads)) {
2883 spin_unlock(&pvc->lock);
2884 continue;
2885 }
2886 kvmppc_core_end_stolen(pvc);
2887 pvc->vcore_state = VCORE_PIGGYBACK;
2888 if (cip->total_threads >= target_threads)
2889 break;
2890 }
2891 spin_unlock(&lp->lock);
2892}
2893
2894static bool recheck_signals_and_mmu(struct core_info *cip)
2895{
2896 int sub, i;
2897 struct kvm_vcpu *vcpu;
2898 struct kvmppc_vcore *vc;
2899
2900 for (sub = 0; sub < cip->n_subcores; ++sub) {
2901 vc = cip->vc[sub];
2902 if (!vc->kvm->arch.mmu_ready)
2903 return true;
2904 for_each_runnable_thread(i, vcpu, vc)
2905 if (signal_pending(vcpu->arch.run_task))
2906 return true;
2907 }
2908 return false;
2909}
2910
2911static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
2912{
2913 int still_running = 0, i;
2914 u64 now;
2915 long ret;
2916 struct kvm_vcpu *vcpu;
2917
2918 spin_lock(&vc->lock);
2919 now = get_tb();
2920 for_each_runnable_thread(i, vcpu, vc) {
2921
2922
2923
2924
2925
2926
2927
2928 spin_unlock(&vc->lock);
2929
2930 if (now < vcpu->arch.dec_expires &&
2931 kvmppc_core_pending_dec(vcpu))
2932 kvmppc_core_dequeue_dec(vcpu);
2933
2934 trace_kvm_guest_exit(vcpu);
2935
2936 ret = RESUME_GUEST;
2937 if (vcpu->arch.trap)
2938 ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
2939 vcpu->arch.run_task);
2940
2941 vcpu->arch.ret = ret;
2942 vcpu->arch.trap = 0;
2943
2944 spin_lock(&vc->lock);
2945 if (is_kvmppc_resume_guest(vcpu->arch.ret)) {
2946 if (vcpu->arch.pending_exceptions)
2947 kvmppc_core_prepare_to_enter(vcpu);
2948 if (vcpu->arch.ceded)
2949 kvmppc_set_timer(vcpu);
2950 else
2951 ++still_running;
2952 } else {
2953 kvmppc_remove_runnable(vc, vcpu);
2954 wake_up(&vcpu->arch.cpu_run);
2955 }
2956 }
2957 if (!is_master) {
2958 if (still_running > 0) {
2959 kvmppc_vcore_preempt(vc);
2960 } else if (vc->runner) {
2961 vc->vcore_state = VCORE_PREEMPT;
2962 kvmppc_core_start_stolen(vc);
2963 } else {
2964 vc->vcore_state = VCORE_INACTIVE;
2965 }
2966 if (vc->n_runnable > 0 && vc->runner == NULL) {
2967
2968 i = -1;
2969 vcpu = next_runnable_thread(vc, &i);
2970 wake_up(&vcpu->arch.cpu_run);
2971 }
2972 }
2973 spin_unlock(&vc->lock);
2974}
2975
2976
2977
2978
2979
2980
2981static inline int kvmppc_clear_host_core(unsigned int cpu)
2982{
2983 int core;
2984
2985 if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
2986 return 0;
2987
2988
2989
2990
2991
2992 core = cpu >> threads_shift;
2993 kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0;
2994 return 0;
2995}
2996
2997
2998
2999
3000
3001
3002static inline int kvmppc_set_host_core(unsigned int cpu)
3003{
3004 int core;
3005
3006 if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
3007 return 0;
3008
3009
3010
3011
3012
3013 core = cpu >> threads_shift;
3014 kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1;
3015 return 0;
3016}
3017
3018static void set_irq_happened(int trap)
3019{
3020 switch (trap) {
3021 case BOOK3S_INTERRUPT_EXTERNAL:
3022 local_paca->irq_happened |= PACA_IRQ_EE;
3023 break;
3024 case BOOK3S_INTERRUPT_H_DOORBELL:
3025 local_paca->irq_happened |= PACA_IRQ_DBELL;
3026 break;
3027 case BOOK3S_INTERRUPT_HMI:
3028 local_paca->irq_happened |= PACA_IRQ_HMI;
3029 break;
3030 case BOOK3S_INTERRUPT_SYSTEM_RESET:
3031 replay_system_reset();
3032 break;
3033 }
3034}
3035
3036
3037
3038
3039
3040static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
3041{
3042 struct kvm_vcpu *vcpu;
3043 int i;
3044 int srcu_idx;
3045 struct core_info core_info;
3046 struct kvmppc_vcore *pvc;
3047 struct kvm_split_mode split_info, *sip;
3048 int split, subcore_size, active;
3049 int sub;
3050 bool thr0_done;
3051 unsigned long cmd_bit, stat_bit;
3052 int pcpu, thr;
3053 int target_threads;
3054 int controlled_threads;
3055 int trap;
3056 bool is_power8;
3057 bool hpt_on_radix;
3058
3059
3060
3061
3062
3063 prepare_threads(vc);
3064
3065
3066 if (vc->runner->arch.state != KVMPPC_VCPU_RUNNABLE)
3067 return;
3068
3069
3070
3071
3072 init_vcore_to_run(vc);
3073 vc->preempt_tb = TB_NIL;
3074
3075
3076
3077
3078
3079
3080 controlled_threads = threads_per_vcore(vc->kvm);
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090 hpt_on_radix = no_mixing_hpt_and_radix && radix_enabled() &&
3091 !kvm_is_radix(vc->kvm);
3092 if (((controlled_threads > 1) &&
3093 ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) ||
3094 (hpt_on_radix && vc->kvm->arch.threads_indep)) {
3095 for_each_runnable_thread(i, vcpu, vc) {
3096 vcpu->arch.ret = -EBUSY;
3097 kvmppc_remove_runnable(vc, vcpu);
3098 wake_up(&vcpu->arch.cpu_run);
3099 }
3100 goto out;
3101 }
3102
3103
3104
3105
3106
3107 init_core_info(&core_info, vc);
3108 pcpu = smp_processor_id();
3109 target_threads = controlled_threads;
3110 if (target_smt_mode && target_smt_mode < target_threads)
3111 target_threads = target_smt_mode;
3112 if (vc->num_threads < target_threads)
3113 collect_piggybacks(&core_info, target_threads);
3114
3115
3116
3117
3118
3119
3120 pcpu = smp_processor_id();
3121 if (kvm_is_radix(vc->kvm)) {
3122 for (sub = 0; sub < core_info.n_subcores; ++sub)
3123 for_each_runnable_thread(i, vcpu, core_info.vc[sub])
3124 kvmppc_prepare_radix_vcpu(vcpu, pcpu);
3125 }
3126
3127
3128
3129
3130
3131
3132
3133
3134 local_irq_disable();
3135 hard_irq_disable();
3136 if (lazy_irq_pending() || need_resched() ||
3137 recheck_signals_and_mmu(&core_info)) {
3138 local_irq_enable();
3139 vc->vcore_state = VCORE_INACTIVE;
3140
3141 for (sub = 1; sub < core_info.n_subcores; ++sub) {
3142 pvc = core_info.vc[sub];
3143
3144 kvmppc_vcore_preempt(pvc);
3145 spin_unlock(&pvc->lock);
3146 }
3147 for (i = 0; i < controlled_threads; ++i)
3148 kvmppc_release_hwthread(pcpu + i);
3149 return;
3150 }
3151
3152 kvmppc_clear_host_core(pcpu);
3153
3154
3155 subcore_size = threads_per_subcore;
3156 cmd_bit = stat_bit = 0;
3157 split = core_info.n_subcores;
3158 sip = NULL;
3159 is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S)
3160 && !cpu_has_feature(CPU_FTR_ARCH_300);
3161
3162 if (split > 1 || hpt_on_radix) {
3163 sip = &split_info;
3164 memset(&split_info, 0, sizeof(split_info));
3165 for (sub = 0; sub < core_info.n_subcores; ++sub)
3166 split_info.vc[sub] = core_info.vc[sub];
3167
3168 if (is_power8) {
3169 if (split == 2 && (dynamic_mt_modes & 2)) {
3170 cmd_bit = HID0_POWER8_1TO2LPAR;
3171 stat_bit = HID0_POWER8_2LPARMODE;
3172 } else {
3173 split = 4;
3174 cmd_bit = HID0_POWER8_1TO4LPAR;
3175 stat_bit = HID0_POWER8_4LPARMODE;
3176 }
3177 subcore_size = MAX_SMT_THREADS / split;
3178 split_info.rpr = mfspr(SPRN_RPR);
3179 split_info.pmmar = mfspr(SPRN_PMMAR);
3180 split_info.ldbar = mfspr(SPRN_LDBAR);
3181 split_info.subcore_size = subcore_size;
3182 } else {
3183 split_info.subcore_size = 1;
3184 if (hpt_on_radix) {
3185
3186 split_info.lpcr_req = vc->lpcr;
3187 split_info.lpidr_req = vc->kvm->arch.lpid;
3188 split_info.host_lpcr = vc->kvm->arch.host_lpcr;
3189 split_info.do_set = 1;
3190 }
3191 }
3192
3193
3194 smp_wmb();
3195 }
3196
3197 for (thr = 0; thr < controlled_threads; ++thr) {
3198 struct paca_struct *paca = paca_ptrs[pcpu + thr];
3199
3200 paca->kvm_hstate.tid = thr;
3201 paca->kvm_hstate.napping = 0;
3202 paca->kvm_hstate.kvm_split_mode = sip;
3203 }
3204
3205
3206 if (cmd_bit) {
3207 unsigned long hid0 = mfspr(SPRN_HID0);
3208
3209 hid0 |= cmd_bit | HID0_POWER8_DYNLPARDIS;
3210 mb();
3211 mtspr(SPRN_HID0, hid0);
3212 isync();
3213 for (;;) {
3214 hid0 = mfspr(SPRN_HID0);
3215 if (hid0 & stat_bit)
3216 break;
3217 cpu_relax();
3218 }
3219 }
3220
3221
3222
3223
3224
3225
3226 if (is_power8) {
3227 unsigned long rwmr_val = RWMR_RPA_P8_8THREAD;
3228 int n_online = atomic_read(&vc->online_count);
3229
3230
3231
3232
3233
3234 if (split == 1 && threads_per_subcore == MAX_SMT_THREADS &&
3235 n_online >= 1 && n_online <= MAX_SMT_THREADS)
3236 rwmr_val = p8_rwmr_values[n_online];
3237 mtspr(SPRN_RWMR, rwmr_val);
3238 }
3239
3240
3241 active = 0;
3242 for (sub = 0; sub < core_info.n_subcores; ++sub) {
3243 thr = is_power8 ? subcore_thread_map[sub] : sub;
3244 thr0_done = false;
3245 active |= 1 << thr;
3246 pvc = core_info.vc[sub];
3247 pvc->pcpu = pcpu + thr;
3248 for_each_runnable_thread(i, vcpu, pvc) {
3249 kvmppc_start_thread(vcpu, pvc);
3250 kvmppc_create_dtl_entry(vcpu, pvc);
3251 trace_kvm_guest_enter(vcpu);
3252 if (!vcpu->arch.ptid)
3253 thr0_done = true;
3254 active |= 1 << (thr + vcpu->arch.ptid);
3255 }
3256
3257
3258
3259
3260 if (!thr0_done)
3261 kvmppc_start_thread(NULL, pvc);
3262 }
3263
3264
3265
3266
3267
3268 smp_mb();
3269
3270
3271
3272
3273
3274
3275
3276
3277 if (cmd_bit || hpt_on_radix) {
3278 split_info.do_nap = 1;
3279 for (thr = 1; thr < threads_per_subcore; ++thr)
3280 if (!(active & (1 << thr)))
3281 kvmppc_ipi_thread(pcpu + thr);
3282 }
3283
3284 vc->vcore_state = VCORE_RUNNING;
3285 preempt_disable();
3286
3287 trace_kvmppc_run_core(vc, 0);
3288
3289 for (sub = 0; sub < core_info.n_subcores; ++sub)
3290 spin_unlock(&core_info.vc[sub]->lock);
3291
3292 guest_enter_irqoff();
3293
3294 srcu_idx = srcu_read_lock(&vc->kvm->srcu);
3295
3296 this_cpu_disable_ftrace();
3297
3298
3299
3300
3301
3302 trace_hardirqs_on();
3303
3304 trap = __kvmppc_vcore_entry();
3305
3306 trace_hardirqs_off();
3307
3308 this_cpu_enable_ftrace();
3309
3310 srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
3311
3312 set_irq_happened(trap);
3313
3314 spin_lock(&vc->lock);
3315
3316 vc->vcore_state = VCORE_EXITING;
3317
3318
3319 kvmppc_wait_for_nap(controlled_threads);
3320
3321
3322 if (cmd_bit) {
3323 unsigned long hid0 = mfspr(SPRN_HID0);
3324 unsigned long loops = 0;
3325
3326 hid0 &= ~HID0_POWER8_DYNLPARDIS;
3327 stat_bit = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
3328 mb();
3329 mtspr(SPRN_HID0, hid0);
3330 isync();
3331 for (;;) {
3332 hid0 = mfspr(SPRN_HID0);
3333 if (!(hid0 & stat_bit))
3334 break;
3335 cpu_relax();
3336 ++loops;
3337 }
3338 } else if (hpt_on_radix) {
3339
3340 for (thr = 1; thr < controlled_threads; ++thr) {
3341 struct paca_struct *paca = paca_ptrs[pcpu + thr];
3342
3343 while (paca->kvm_hstate.kvm_split_mode) {
3344 HMT_low();
3345 barrier();
3346 }
3347 HMT_medium();
3348 }
3349 }
3350 split_info.do_nap = 0;
3351
3352 kvmppc_set_host_core(pcpu);
3353
3354 local_irq_enable();
3355 guest_exit();
3356
3357
3358 for (i = 0; i < controlled_threads; ++i) {
3359 kvmppc_release_hwthread(pcpu + i);
3360 if (sip && sip->napped[i])
3361 kvmppc_ipi_thread(pcpu + i);
3362 cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
3363 }
3364
3365 spin_unlock(&vc->lock);
3366
3367
3368 smp_mb();
3369
3370 preempt_enable();
3371
3372 for (sub = 0; sub < core_info.n_subcores; ++sub) {
3373 pvc = core_info.vc[sub];
3374 post_guest_process(pvc, pvc == vc);
3375 }
3376
3377 spin_lock(&vc->lock);
3378
3379 out:
3380 vc->vcore_state = VCORE_INACTIVE;
3381 trace_kvmppc_run_core(vc, 1);
3382}
3383
3384
3385
3386
3387static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
3388 unsigned long lpcr)
3389{
3390 struct kvmppc_vcore *vc = vcpu->arch.vcore;
3391 s64 hdec;
3392 u64 tb, purr, spurr;
3393 int trap;
3394 unsigned long host_hfscr = mfspr(SPRN_HFSCR);
3395 unsigned long host_ciabr = mfspr(SPRN_CIABR);
3396 unsigned long host_dawr = mfspr(SPRN_DAWR);
3397 unsigned long host_dawrx = mfspr(SPRN_DAWRX);
3398 unsigned long host_psscr = mfspr(SPRN_PSSCR);
3399 unsigned long host_pidr = mfspr(SPRN_PID);
3400
3401 hdec = time_limit - mftb();
3402 if (hdec < 0)
3403 return BOOK3S_INTERRUPT_HV_DECREMENTER;
3404 mtspr(SPRN_HDEC, hdec);
3405
3406 if (vc->tb_offset) {
3407 u64 new_tb = mftb() + vc->tb_offset;
3408 mtspr(SPRN_TBU40, new_tb);
3409 tb = mftb();
3410 if ((tb & 0xffffff) < (new_tb & 0xffffff))
3411 mtspr(SPRN_TBU40, new_tb + 0x1000000);
3412 vc->tb_offset_applied = vc->tb_offset;
3413 }
3414
3415 if (vc->pcr)
3416 mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
3417 mtspr(SPRN_DPDES, vc->dpdes);
3418 mtspr(SPRN_VTB, vc->vtb);
3419
3420 local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
3421 local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
3422 mtspr(SPRN_PURR, vcpu->arch.purr);
3423 mtspr(SPRN_SPURR, vcpu->arch.spurr);
3424
3425 if (dawr_enabled()) {
3426 mtspr(SPRN_DAWR, vcpu->arch.dawr);
3427 mtspr(SPRN_DAWRX, vcpu->arch.dawrx);
3428 }
3429 mtspr(SPRN_CIABR, vcpu->arch.ciabr);
3430 mtspr(SPRN_IC, vcpu->arch.ic);
3431 mtspr(SPRN_PID, vcpu->arch.pid);
3432
3433 mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
3434 (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
3435
3436 mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
3437
3438 mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
3439 mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
3440 mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
3441 mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
3442
3443 mtspr(SPRN_AMOR, ~0UL);
3444
3445 mtspr(SPRN_LPCR, lpcr);
3446 isync();
3447
3448 kvmppc_xive_push_vcpu(vcpu);
3449
3450 mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
3451 mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
3452
3453 trap = __kvmhv_vcpu_entry_p9(vcpu);
3454
3455
3456 purr = mfspr(SPRN_PURR);
3457 spurr = mfspr(SPRN_SPURR);
3458 mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
3459 purr - vcpu->arch.purr);
3460 mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
3461 spurr - vcpu->arch.spurr);
3462 vcpu->arch.purr = purr;
3463 vcpu->arch.spurr = spurr;
3464
3465 vcpu->arch.ic = mfspr(SPRN_IC);
3466 vcpu->arch.pid = mfspr(SPRN_PID);
3467 vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
3468
3469 vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
3470 vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
3471 vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
3472 vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
3473
3474
3475 mtspr(SPRN_PSSCR, host_psscr |
3476 (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
3477 mtspr(SPRN_HFSCR, host_hfscr);
3478 mtspr(SPRN_CIABR, host_ciabr);
3479 mtspr(SPRN_DAWR, host_dawr);
3480 mtspr(SPRN_DAWRX, host_dawrx);
3481 mtspr(SPRN_PID, host_pidr);
3482
3483
3484
3485
3486
3487 asm volatile("eieio; tlbsync; ptesync");
3488
3489 mtspr(SPRN_LPID, vcpu->kvm->arch.host_lpid);
3490 isync();
3491
3492 vc->dpdes = mfspr(SPRN_DPDES);
3493 vc->vtb = mfspr(SPRN_VTB);
3494 mtspr(SPRN_DPDES, 0);
3495 if (vc->pcr)
3496 mtspr(SPRN_PCR, PCR_MASK);
3497
3498 if (vc->tb_offset_applied) {
3499 u64 new_tb = mftb() - vc->tb_offset_applied;
3500 mtspr(SPRN_TBU40, new_tb);
3501 tb = mftb();
3502 if ((tb & 0xffffff) < (new_tb & 0xffffff))
3503 mtspr(SPRN_TBU40, new_tb + 0x1000000);
3504 vc->tb_offset_applied = 0;
3505 }
3506
3507 mtspr(SPRN_HDEC, 0x7fffffff);
3508 mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr);
3509
3510 return trap;
3511}
3512
3513
3514
3515
3516
3517int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
3518 unsigned long lpcr)
3519{
3520 struct kvmppc_vcore *vc = vcpu->arch.vcore;
3521 unsigned long host_dscr = mfspr(SPRN_DSCR);
3522 unsigned long host_tidr = mfspr(SPRN_TIDR);
3523 unsigned long host_iamr = mfspr(SPRN_IAMR);
3524 unsigned long host_amr = mfspr(SPRN_AMR);
3525 s64 dec;
3526 u64 tb;
3527 int trap, save_pmu;
3528
3529 dec = mfspr(SPRN_DEC);
3530 tb = mftb();
3531 if (dec < 512)
3532 return BOOK3S_INTERRUPT_HV_DECREMENTER;
3533 local_paca->kvm_hstate.dec_expires = dec + tb;
3534 if (local_paca->kvm_hstate.dec_expires < time_limit)
3535 time_limit = local_paca->kvm_hstate.dec_expires;
3536
3537 vcpu->arch.ceded = 0;
3538
3539 kvmhv_save_host_pmu();
3540
3541 kvmppc_subcore_enter_guest();
3542
3543 vc->entry_exit_map = 1;
3544 vc->in_guest = 1;
3545
3546 if (vcpu->arch.vpa.pinned_addr) {
3547 struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
3548 u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
3549 lp->yield_count = cpu_to_be32(yield_count);
3550 vcpu->arch.vpa.dirty = 1;
3551 }
3552
3553 if (cpu_has_feature(CPU_FTR_TM) ||
3554 cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
3555 kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
3556
3557 kvmhv_load_guest_pmu(vcpu);
3558
3559 msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
3560 load_fp_state(&vcpu->arch.fp);
3561#ifdef CONFIG_ALTIVEC
3562 load_vr_state(&vcpu->arch.vr);
3563#endif
3564 mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
3565
3566 mtspr(SPRN_DSCR, vcpu->arch.dscr);
3567 mtspr(SPRN_IAMR, vcpu->arch.iamr);
3568 mtspr(SPRN_PSPB, vcpu->arch.pspb);
3569 mtspr(SPRN_FSCR, vcpu->arch.fscr);
3570 mtspr(SPRN_TAR, vcpu->arch.tar);
3571 mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
3572 mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
3573 mtspr(SPRN_BESCR, vcpu->arch.bescr);
3574 mtspr(SPRN_WORT, vcpu->arch.wort);
3575 mtspr(SPRN_TIDR, vcpu->arch.tid);
3576 mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
3577 mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
3578 mtspr(SPRN_AMR, vcpu->arch.amr);
3579 mtspr(SPRN_UAMOR, vcpu->arch.uamor);
3580
3581 if (!(vcpu->arch.ctrl & 1))
3582 mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
3583
3584 mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
3585
3586 if (kvmhv_on_pseries()) {
3587
3588
3589
3590
3591
3592
3593 unsigned long host_psscr;
3594
3595 struct hv_guest_state hvregs;
3596
3597 host_psscr = mfspr(SPRN_PSSCR_PR);
3598 mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
3599 kvmhv_save_hv_regs(vcpu, &hvregs);
3600 hvregs.lpcr = lpcr;
3601 vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
3602 hvregs.version = HV_GUEST_STATE_VERSION;
3603 if (vcpu->arch.nested) {
3604 hvregs.lpid = vcpu->arch.nested->shadow_lpid;
3605 hvregs.vcpu_token = vcpu->arch.nested_vcpu_id;
3606 } else {
3607 hvregs.lpid = vcpu->kvm->arch.lpid;
3608 hvregs.vcpu_token = vcpu->vcpu_id;
3609 }
3610 hvregs.hdec_expiry = time_limit;
3611 trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
3612 __pa(&vcpu->arch.regs));
3613 kvmhv_restore_hv_return_state(vcpu, &hvregs);
3614 vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
3615 vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
3616 vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
3617 vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
3618 mtspr(SPRN_PSSCR_PR, host_psscr);
3619
3620
3621 if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
3622 kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
3623 kvmppc_nested_cede(vcpu);
3624 trap = 0;
3625 }
3626 } else {
3627 trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
3628 }
3629
3630 vcpu->arch.slb_max = 0;
3631 dec = mfspr(SPRN_DEC);
3632 if (!(lpcr & LPCR_LD))
3633 dec = (s32) dec;
3634 tb = mftb();
3635 vcpu->arch.dec_expires = dec + tb;
3636 vcpu->cpu = -1;
3637 vcpu->arch.thread_cpu = -1;
3638 vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
3639
3640 vcpu->arch.iamr = mfspr(SPRN_IAMR);
3641 vcpu->arch.pspb = mfspr(SPRN_PSPB);
3642 vcpu->arch.fscr = mfspr(SPRN_FSCR);
3643 vcpu->arch.tar = mfspr(SPRN_TAR);
3644 vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
3645 vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
3646 vcpu->arch.bescr = mfspr(SPRN_BESCR);
3647 vcpu->arch.wort = mfspr(SPRN_WORT);
3648 vcpu->arch.tid = mfspr(SPRN_TIDR);
3649 vcpu->arch.amr = mfspr(SPRN_AMR);
3650 vcpu->arch.uamor = mfspr(SPRN_UAMOR);
3651 vcpu->arch.dscr = mfspr(SPRN_DSCR);
3652
3653 mtspr(SPRN_PSPB, 0);
3654 mtspr(SPRN_WORT, 0);
3655 mtspr(SPRN_UAMOR, 0);
3656 mtspr(SPRN_DSCR, host_dscr);
3657 mtspr(SPRN_TIDR, host_tidr);
3658 mtspr(SPRN_IAMR, host_iamr);
3659 mtspr(SPRN_PSPB, 0);
3660
3661 if (host_amr != vcpu->arch.amr)
3662 mtspr(SPRN_AMR, host_amr);
3663
3664 msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
3665 store_fp_state(&vcpu->arch.fp);
3666#ifdef CONFIG_ALTIVEC
3667 store_vr_state(&vcpu->arch.vr);
3668#endif
3669 vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
3670
3671 if (cpu_has_feature(CPU_FTR_TM) ||
3672 cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
3673 kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
3674
3675 save_pmu = 1;
3676 if (vcpu->arch.vpa.pinned_addr) {
3677 struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
3678 u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
3679 lp->yield_count = cpu_to_be32(yield_count);
3680 vcpu->arch.vpa.dirty = 1;
3681 save_pmu = lp->pmcregs_in_use;
3682 }
3683
3684 save_pmu |= nesting_enabled(vcpu->kvm);
3685
3686 kvmhv_save_guest_pmu(vcpu, save_pmu);
3687
3688 vc->entry_exit_map = 0x101;
3689 vc->in_guest = 0;
3690
3691 mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb());
3692 mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
3693
3694 kvmhv_load_host_pmu();
3695
3696 kvmppc_subcore_exit_guest();
3697
3698 return trap;
3699}
3700
3701
3702
3703
3704
3705static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
3706 struct kvm_vcpu *vcpu, int wait_state)
3707{
3708 DEFINE_WAIT(wait);
3709
3710 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
3711 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
3712 spin_unlock(&vc->lock);
3713 schedule();
3714 spin_lock(&vc->lock);
3715 }
3716 finish_wait(&vcpu->arch.cpu_run, &wait);
3717}
3718
3719static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
3720{
3721 if (!halt_poll_ns_grow)
3722 return;
3723
3724 vc->halt_poll_ns *= halt_poll_ns_grow;
3725 if (vc->halt_poll_ns < halt_poll_ns_grow_start)
3726 vc->halt_poll_ns = halt_poll_ns_grow_start;
3727}
3728
3729static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
3730{
3731 if (halt_poll_ns_shrink == 0)
3732 vc->halt_poll_ns = 0;
3733 else
3734 vc->halt_poll_ns /= halt_poll_ns_shrink;
3735}
3736
3737#ifdef CONFIG_KVM_XICS
3738static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
3739{
3740 if (!xics_on_xive())
3741 return false;
3742 return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr <
3743 vcpu->arch.xive_saved_state.cppr;
3744}
3745#else
3746static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
3747{
3748 return false;
3749}
3750#endif
3751
3752static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu)
3753{
3754 if (vcpu->arch.pending_exceptions || vcpu->arch.prodded ||
3755 kvmppc_doorbell_pending(vcpu) || xive_interrupt_pending(vcpu))
3756 return true;
3757
3758 return false;
3759}
3760
3761
3762
3763
3764
3765static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
3766{
3767 struct kvm_vcpu *vcpu;
3768 int i;
3769
3770 for_each_runnable_thread(i, vcpu, vc) {
3771 if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu))
3772 return 1;
3773 }
3774
3775 return 0;
3776}
3777
3778
3779
3780
3781
3782static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
3783{
3784 ktime_t cur, start_poll, start_wait;
3785 int do_sleep = 1;
3786 u64 block_ns;
3787 DECLARE_SWAITQUEUE(wait);
3788
3789
3790 cur = start_poll = ktime_get();
3791 if (vc->halt_poll_ns) {
3792 ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns);
3793 ++vc->runner->stat.halt_attempted_poll;
3794
3795 vc->vcore_state = VCORE_POLLING;
3796 spin_unlock(&vc->lock);
3797
3798 do {
3799 if (kvmppc_vcore_check_block(vc)) {
3800 do_sleep = 0;
3801 break;
3802 }
3803 cur = ktime_get();
3804 } while (single_task_running() && ktime_before(cur, stop));
3805
3806 spin_lock(&vc->lock);
3807 vc->vcore_state = VCORE_INACTIVE;
3808
3809 if (!do_sleep) {
3810 ++vc->runner->stat.halt_successful_poll;
3811 goto out;
3812 }
3813 }
3814
3815 prepare_to_swait_exclusive(&vc->wq, &wait, TASK_INTERRUPTIBLE);
3816
3817 if (kvmppc_vcore_check_block(vc)) {
3818 finish_swait(&vc->wq, &wait);
3819 do_sleep = 0;
3820
3821 if (vc->halt_poll_ns)
3822 ++vc->runner->stat.halt_successful_poll;
3823 goto out;
3824 }
3825
3826 start_wait = ktime_get();
3827
3828 vc->vcore_state = VCORE_SLEEPING;
3829 trace_kvmppc_vcore_blocked(vc, 0);
3830 spin_unlock(&vc->lock);
3831 schedule();
3832 finish_swait(&vc->wq, &wait);
3833 spin_lock(&vc->lock);
3834 vc->vcore_state = VCORE_INACTIVE;
3835 trace_kvmppc_vcore_blocked(vc, 1);
3836 ++vc->runner->stat.halt_successful_wait;
3837
3838 cur = ktime_get();
3839
3840out:
3841 block_ns = ktime_to_ns(cur) - ktime_to_ns(start_poll);
3842
3843
3844 if (do_sleep) {
3845 vc->runner->stat.halt_wait_ns +=
3846 ktime_to_ns(cur) - ktime_to_ns(start_wait);
3847
3848 if (vc->halt_poll_ns)
3849 vc->runner->stat.halt_poll_fail_ns +=
3850 ktime_to_ns(start_wait) -
3851 ktime_to_ns(start_poll);
3852 } else {
3853
3854 if (vc->halt_poll_ns)
3855 vc->runner->stat.halt_poll_success_ns +=
3856 ktime_to_ns(cur) -
3857 ktime_to_ns(start_poll);
3858 }
3859
3860
3861 if (halt_poll_ns) {
3862 if (block_ns <= vc->halt_poll_ns)
3863 ;
3864
3865 else if (vc->halt_poll_ns && block_ns > halt_poll_ns)
3866 shrink_halt_poll_ns(vc);
3867
3868 else if (vc->halt_poll_ns < halt_poll_ns &&
3869 block_ns < halt_poll_ns)
3870 grow_halt_poll_ns(vc);
3871 if (vc->halt_poll_ns > halt_poll_ns)
3872 vc->halt_poll_ns = halt_poll_ns;
3873 } else
3874 vc->halt_poll_ns = 0;
3875
3876 trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
3877}
3878
3879
3880
3881
3882
3883
3884static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu)
3885{
3886 int r = 0;
3887 struct kvm *kvm = vcpu->kvm;
3888
3889 mutex_lock(&kvm->arch.mmu_setup_lock);
3890 if (!kvm->arch.mmu_ready) {
3891 if (!kvm_is_radix(kvm))
3892 r = kvmppc_hv_setup_htab_rma(vcpu);
3893 if (!r) {
3894 if (cpu_has_feature(CPU_FTR_ARCH_300))
3895 kvmppc_setup_partition_table(kvm);
3896 kvm->arch.mmu_ready = 1;
3897 }
3898 }
3899 mutex_unlock(&kvm->arch.mmu_setup_lock);
3900 return r;
3901}
3902
3903static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3904{
3905 int n_ceded, i, r;
3906 struct kvmppc_vcore *vc;
3907 struct kvm_vcpu *v;
3908
3909 trace_kvmppc_run_vcpu_enter(vcpu);
3910
3911 kvm_run->exit_reason = 0;
3912 vcpu->arch.ret = RESUME_GUEST;
3913 vcpu->arch.trap = 0;
3914 kvmppc_update_vpas(vcpu);
3915
3916
3917
3918
3919 vc = vcpu->arch.vcore;
3920 spin_lock(&vc->lock);
3921 vcpu->arch.ceded = 0;
3922 vcpu->arch.run_task = current;
3923 vcpu->arch.kvm_run = kvm_run;
3924 vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
3925 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
3926 vcpu->arch.busy_preempt = TB_NIL;
3927 WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], vcpu);
3928 ++vc->n_runnable;
3929
3930
3931
3932
3933
3934
3935 if (!signal_pending(current)) {
3936 if ((vc->vcore_state == VCORE_PIGGYBACK ||
3937 vc->vcore_state == VCORE_RUNNING) &&
3938 !VCORE_IS_EXITING(vc)) {
3939 kvmppc_create_dtl_entry(vcpu, vc);
3940 kvmppc_start_thread(vcpu, vc);
3941 trace_kvm_guest_enter(vcpu);
3942 } else if (vc->vcore_state == VCORE_SLEEPING) {
3943 swake_up_one(&vc->wq);
3944 }
3945
3946 }
3947
3948 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
3949 !signal_pending(current)) {
3950
3951 if (!vcpu->kvm->arch.mmu_ready) {
3952 spin_unlock(&vc->lock);
3953 r = kvmhv_setup_mmu(vcpu);
3954 spin_lock(&vc->lock);
3955 if (r) {
3956 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
3957 kvm_run->fail_entry.
3958 hardware_entry_failure_reason = 0;
3959 vcpu->arch.ret = r;
3960 break;
3961 }
3962 }
3963
3964 if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
3965 kvmppc_vcore_end_preempt(vc);
3966
3967 if (vc->vcore_state != VCORE_INACTIVE) {
3968 kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE);
3969 continue;
3970 }
3971 for_each_runnable_thread(i, v, vc) {
3972 kvmppc_core_prepare_to_enter(v);
3973 if (signal_pending(v->arch.run_task)) {
3974 kvmppc_remove_runnable(vc, v);
3975 v->stat.signal_exits++;
3976 v->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
3977 v->arch.ret = -EINTR;
3978 wake_up(&v->arch.cpu_run);
3979 }
3980 }
3981 if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
3982 break;
3983 n_ceded = 0;
3984 for_each_runnable_thread(i, v, vc) {
3985 if (!kvmppc_vcpu_woken(v))
3986 n_ceded += v->arch.ceded;
3987 else
3988 v->arch.ceded = 0;
3989 }
3990 vc->runner = vcpu;
3991 if (n_ceded == vc->n_runnable) {
3992 kvmppc_vcore_blocked(vc);
3993 } else if (need_resched()) {
3994 kvmppc_vcore_preempt(vc);
3995
3996 cond_resched_lock(&vc->lock);
3997 if (vc->vcore_state == VCORE_PREEMPT)
3998 kvmppc_vcore_end_preempt(vc);
3999 } else {
4000 kvmppc_run_core(vc);
4001 }
4002 vc->runner = NULL;
4003 }
4004
4005 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
4006 (vc->vcore_state == VCORE_RUNNING ||
4007 vc->vcore_state == VCORE_EXITING ||
4008 vc->vcore_state == VCORE_PIGGYBACK))
4009 kvmppc_wait_for_exec(vc, vcpu, TASK_UNINTERRUPTIBLE);
4010
4011 if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
4012 kvmppc_vcore_end_preempt(vc);
4013
4014 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
4015 kvmppc_remove_runnable(vc, vcpu);
4016 vcpu->stat.signal_exits++;
4017 kvm_run->exit_reason = KVM_EXIT_INTR;
4018 vcpu->arch.ret = -EINTR;
4019 }
4020
4021 if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) {
4022
4023 i = -1;
4024 v = next_runnable_thread(vc, &i);
4025 wake_up(&v->arch.cpu_run);
4026 }
4027
4028 trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
4029 spin_unlock(&vc->lock);
4030 return vcpu->arch.ret;
4031}
4032
4033int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
4034 struct kvm_vcpu *vcpu, u64 time_limit,
4035 unsigned long lpcr)
4036{
4037 int trap, r, pcpu;
4038 int srcu_idx, lpid;
4039 struct kvmppc_vcore *vc;
4040 struct kvm *kvm = vcpu->kvm;
4041 struct kvm_nested_guest *nested = vcpu->arch.nested;
4042
4043 trace_kvmppc_run_vcpu_enter(vcpu);
4044
4045 kvm_run->exit_reason = 0;
4046 vcpu->arch.ret = RESUME_GUEST;
4047 vcpu->arch.trap = 0;
4048
4049 vc = vcpu->arch.vcore;
4050 vcpu->arch.ceded = 0;
4051 vcpu->arch.run_task = current;
4052 vcpu->arch.kvm_run = kvm_run;
4053 vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
4054 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
4055 vcpu->arch.busy_preempt = TB_NIL;
4056 vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
4057 vc->runnable_threads[0] = vcpu;
4058 vc->n_runnable = 1;
4059 vc->runner = vcpu;
4060
4061
4062 if (!kvm->arch.mmu_ready)
4063 kvmhv_setup_mmu(vcpu);
4064
4065 if (need_resched())
4066 cond_resched();
4067
4068 kvmppc_update_vpas(vcpu);
4069
4070 init_vcore_to_run(vc);
4071 vc->preempt_tb = TB_NIL;
4072
4073 preempt_disable();
4074 pcpu = smp_processor_id();
4075 vc->pcpu = pcpu;
4076 kvmppc_prepare_radix_vcpu(vcpu, pcpu);
4077
4078 local_irq_disable();
4079 hard_irq_disable();
4080 if (signal_pending(current))
4081 goto sigpend;
4082 if (lazy_irq_pending() || need_resched() || !kvm->arch.mmu_ready)
4083 goto out;
4084
4085 if (!nested) {
4086 kvmppc_core_prepare_to_enter(vcpu);
4087 if (vcpu->arch.doorbell_request) {
4088 vc->dpdes = 1;
4089 smp_wmb();
4090 vcpu->arch.doorbell_request = 0;
4091 }
4092 if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
4093 &vcpu->arch.pending_exceptions))
4094 lpcr |= LPCR_MER;
4095 } else if (vcpu->arch.pending_exceptions ||
4096 vcpu->arch.doorbell_request ||
4097 xive_interrupt_pending(vcpu)) {
4098 vcpu->arch.ret = RESUME_HOST;
4099 goto out;
4100 }
4101
4102 kvmppc_clear_host_core(pcpu);
4103
4104 local_paca->kvm_hstate.tid = 0;
4105 local_paca->kvm_hstate.napping = 0;
4106 local_paca->kvm_hstate.kvm_split_mode = NULL;
4107 kvmppc_start_thread(vcpu, vc);
4108 kvmppc_create_dtl_entry(vcpu, vc);
4109 trace_kvm_guest_enter(vcpu);
4110
4111 vc->vcore_state = VCORE_RUNNING;
4112 trace_kvmppc_run_core(vc, 0);
4113
4114 if (cpu_has_feature(CPU_FTR_HVMODE)) {
4115 lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
4116 mtspr(SPRN_LPID, lpid);
4117 isync();
4118 kvmppc_check_need_tlb_flush(kvm, pcpu, nested);
4119 }
4120
4121 guest_enter_irqoff();
4122
4123 srcu_idx = srcu_read_lock(&kvm->srcu);
4124
4125 this_cpu_disable_ftrace();
4126
4127
4128 trace_hardirqs_on();
4129
4130 trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr);
4131 vcpu->arch.trap = trap;
4132
4133 trace_hardirqs_off();
4134
4135 this_cpu_enable_ftrace();
4136
4137 srcu_read_unlock(&kvm->srcu, srcu_idx);
4138
4139 if (cpu_has_feature(CPU_FTR_HVMODE)) {
4140 mtspr(SPRN_LPID, kvm->arch.host_lpid);
4141 isync();
4142 }
4143
4144 set_irq_happened(trap);
4145
4146 kvmppc_set_host_core(pcpu);
4147
4148 local_irq_enable();
4149 guest_exit();
4150
4151 cpumask_clear_cpu(pcpu, &kvm->arch.cpu_in_guest);
4152
4153 preempt_enable();
4154
4155
4156
4157
4158
4159
4160 if (kvmppc_core_pending_dec(vcpu) &&
4161 ((get_tb() < vcpu->arch.dec_expires) ||
4162 (trap == BOOK3S_INTERRUPT_SYSCALL &&
4163 kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED)))
4164 kvmppc_core_dequeue_dec(vcpu);
4165
4166 trace_kvm_guest_exit(vcpu);
4167 r = RESUME_GUEST;
4168 if (trap) {
4169 if (!nested)
4170 r = kvmppc_handle_exit_hv(kvm_run, vcpu, current);
4171 else
4172 r = kvmppc_handle_nested_exit(kvm_run, vcpu);
4173 }
4174 vcpu->arch.ret = r;
4175
4176 if (is_kvmppc_resume_guest(r) && vcpu->arch.ceded &&
4177 !kvmppc_vcpu_woken(vcpu)) {
4178 kvmppc_set_timer(vcpu);
4179 while (vcpu->arch.ceded && !kvmppc_vcpu_woken(vcpu)) {
4180 if (signal_pending(current)) {
4181 vcpu->stat.signal_exits++;
4182 kvm_run->exit_reason = KVM_EXIT_INTR;
4183 vcpu->arch.ret = -EINTR;
4184 break;
4185 }
4186 spin_lock(&vc->lock);
4187 kvmppc_vcore_blocked(vc);
4188 spin_unlock(&vc->lock);
4189 }
4190 }
4191 vcpu->arch.ceded = 0;
4192
4193 vc->vcore_state = VCORE_INACTIVE;
4194 trace_kvmppc_run_core(vc, 1);
4195
4196 done:
4197 kvmppc_remove_runnable(vc, vcpu);
4198 trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
4199
4200 return vcpu->arch.ret;
4201
4202 sigpend:
4203 vcpu->stat.signal_exits++;
4204 kvm_run->exit_reason = KVM_EXIT_INTR;
4205 vcpu->arch.ret = -EINTR;
4206 out:
4207 local_irq_enable();
4208 preempt_enable();
4209 goto done;
4210}
4211
4212static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
4213{
4214 int r;
4215 int srcu_idx;
4216 unsigned long ebb_regs[3] = {};
4217 unsigned long user_tar = 0;
4218 unsigned int user_vrsave;
4219 struct kvm *kvm;
4220
4221 if (!vcpu->arch.sane) {
4222 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
4223 return -EINVAL;
4224 }
4225
4226
4227
4228
4229
4230
4231
4232#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
4233 if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
4234 (current->thread.regs->msr & MSR_TM)) {
4235 if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
4236 run->exit_reason = KVM_EXIT_FAIL_ENTRY;
4237 run->fail_entry.hardware_entry_failure_reason = 0;
4238 return -EINVAL;
4239 }
4240
4241 mtmsr(mfmsr() | MSR_TM);
4242 current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
4243 current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
4244 current->thread.tm_texasr = mfspr(SPRN_TEXASR);
4245 current->thread.regs->msr &= ~MSR_TM;
4246 }
4247#endif
4248
4249
4250
4251
4252
4253 if (!vcpu->arch.online) {
4254 atomic_inc(&vcpu->arch.vcore->online_count);
4255 vcpu->arch.online = 1;
4256 }
4257
4258 kvmppc_core_prepare_to_enter(vcpu);
4259
4260
4261 if (signal_pending(current)) {
4262 run->exit_reason = KVM_EXIT_INTR;
4263 return -EINTR;
4264 }
4265
4266 kvm = vcpu->kvm;
4267 atomic_inc(&kvm->arch.vcpus_running);
4268
4269 smp_mb();
4270
4271 flush_all_to_thread(current);
4272
4273
4274 if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
4275 ebb_regs[0] = mfspr(SPRN_EBBHR);
4276 ebb_regs[1] = mfspr(SPRN_EBBRR);
4277 ebb_regs[2] = mfspr(SPRN_BESCR);
4278 user_tar = mfspr(SPRN_TAR);
4279 }
4280 user_vrsave = mfspr(SPRN_VRSAVE);
4281
4282 vcpu->arch.wqp = &vcpu->arch.vcore->wq;
4283 vcpu->arch.pgdir = current->mm->pgd;
4284 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
4285
4286 do {
4287
4288
4289
4290
4291
4292
4293
4294
4295 if (kvm->arch.threads_indep && kvm_is_radix(kvm) &&
4296 !no_mixing_hpt_and_radix)
4297 r = kvmhv_run_single_vcpu(run, vcpu, ~(u64)0,
4298 vcpu->arch.vcore->lpcr);
4299 else
4300 r = kvmppc_run_vcpu(run, vcpu);
4301
4302 if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
4303 !(vcpu->arch.shregs.msr & MSR_PR)) {
4304 trace_kvm_hcall_enter(vcpu);
4305 r = kvmppc_pseries_do_hcall(vcpu);
4306 trace_kvm_hcall_exit(vcpu, r);
4307 kvmppc_core_prepare_to_enter(vcpu);
4308 } else if (r == RESUME_PAGE_FAULT) {
4309 srcu_idx = srcu_read_lock(&kvm->srcu);
4310 r = kvmppc_book3s_hv_page_fault(run, vcpu,
4311 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
4312 srcu_read_unlock(&kvm->srcu, srcu_idx);
4313 } else if (r == RESUME_PASSTHROUGH) {
4314 if (WARN_ON(xics_on_xive()))
4315 r = H_SUCCESS;
4316 else
4317 r = kvmppc_xics_rm_complete(vcpu, 0);
4318 }
4319 } while (is_kvmppc_resume_guest(r));
4320
4321
4322 if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
4323 mtspr(SPRN_EBBHR, ebb_regs[0]);
4324 mtspr(SPRN_EBBRR, ebb_regs[1]);
4325 mtspr(SPRN_BESCR, ebb_regs[2]);
4326 mtspr(SPRN_TAR, user_tar);
4327 mtspr(SPRN_FSCR, current->thread.fscr);
4328 }
4329 mtspr(SPRN_VRSAVE, user_vrsave);
4330
4331 vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
4332 atomic_dec(&kvm->arch.vcpus_running);
4333 return r;
4334}
4335
4336static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
4337 int shift, int sllp)
4338{
4339 (*sps)->page_shift = shift;
4340 (*sps)->slb_enc = sllp;
4341 (*sps)->enc[0].page_shift = shift;
4342 (*sps)->enc[0].pte_enc = kvmppc_pgsize_lp_encoding(shift, shift);
4343
4344
4345
4346 if (shift != 24) {
4347 int penc = kvmppc_pgsize_lp_encoding(shift, 24);
4348 if (penc != -1) {
4349 (*sps)->enc[1].page_shift = 24;
4350 (*sps)->enc[1].pte_enc = penc;
4351 }
4352 }
4353 (*sps)++;
4354}
4355
4356static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
4357 struct kvm_ppc_smmu_info *info)
4358{
4359 struct kvm_ppc_one_seg_page_size *sps;
4360
4361
4362
4363
4364
4365
4366 info->data_keys = 32;
4367 info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0;
4368
4369
4370 info->flags = KVM_PPC_PAGE_SIZES_REAL | KVM_PPC_1T_SEGMENTS;
4371 info->slb_size = 32;
4372
4373
4374 sps = &info->sps[0];
4375 kvmppc_add_seg_page_size(&sps, 12, 0);
4376 kvmppc_add_seg_page_size(&sps, 16, SLB_VSID_L | SLB_VSID_LP_01);
4377 kvmppc_add_seg_page_size(&sps, 24, SLB_VSID_L);
4378
4379
4380 if (kvmhv_on_pseries())
4381 info->flags |= KVM_PPC_NO_HASH;
4382
4383 return 0;
4384}
4385
4386
4387
4388
4389static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
4390 struct kvm_dirty_log *log)
4391{
4392 struct kvm_memslots *slots;
4393 struct kvm_memory_slot *memslot;
4394 int i, r;
4395 unsigned long n;
4396 unsigned long *buf, *p;
4397 struct kvm_vcpu *vcpu;
4398
4399 mutex_lock(&kvm->slots_lock);
4400
4401 r = -EINVAL;
4402 if (log->slot >= KVM_USER_MEM_SLOTS)
4403 goto out;
4404
4405 slots = kvm_memslots(kvm);
4406 memslot = id_to_memslot(slots, log->slot);
4407 r = -ENOENT;
4408 if (!memslot->dirty_bitmap)
4409 goto out;
4410
4411
4412
4413
4414
4415 n = kvm_dirty_bitmap_bytes(memslot);
4416 buf = memslot->dirty_bitmap + n / sizeof(long);
4417 memset(buf, 0, n);
4418
4419 if (kvm_is_radix(kvm))
4420 r = kvmppc_hv_get_dirty_log_radix(kvm, memslot, buf);
4421 else
4422 r = kvmppc_hv_get_dirty_log_hpt(kvm, memslot, buf);
4423 if (r)
4424 goto out;
4425
4426
4427
4428
4429
4430
4431
4432 p = memslot->dirty_bitmap;
4433 for (i = 0; i < n / sizeof(long); ++i)
4434 buf[i] |= xchg(&p[i], 0);
4435
4436
4437
4438 kvm_for_each_vcpu(i, vcpu, kvm) {
4439 spin_lock(&vcpu->arch.vpa_update_lock);
4440 kvmppc_harvest_vpa_dirty(&vcpu->arch.vpa, memslot, buf);
4441 kvmppc_harvest_vpa_dirty(&vcpu->arch.dtl, memslot, buf);
4442 spin_unlock(&vcpu->arch.vpa_update_lock);
4443 }
4444
4445 r = -EFAULT;
4446 if (copy_to_user(log->dirty_bitmap, buf, n))
4447 goto out;
4448
4449 r = 0;
4450out:
4451 mutex_unlock(&kvm->slots_lock);
4452 return r;
4453}
4454
4455static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
4456 struct kvm_memory_slot *dont)
4457{
4458 if (!dont || free->arch.rmap != dont->arch.rmap) {
4459 vfree(free->arch.rmap);
4460 free->arch.rmap = NULL;
4461 }
4462}
4463
4464static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
4465 unsigned long npages)
4466{
4467 slot->arch.rmap = vzalloc(array_size(npages, sizeof(*slot->arch.rmap)));
4468 if (!slot->arch.rmap)
4469 return -ENOMEM;
4470
4471 return 0;
4472}
4473
4474static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
4475 struct kvm_memory_slot *memslot,
4476 const struct kvm_userspace_memory_region *mem)
4477{
4478 return 0;
4479}
4480
4481static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
4482 const struct kvm_userspace_memory_region *mem,
4483 const struct kvm_memory_slot *old,
4484 const struct kvm_memory_slot *new,
4485 enum kvm_mr_change change)
4486{
4487 unsigned long npages = mem->memory_size >> PAGE_SHIFT;
4488
4489
4490
4491
4492
4493
4494
4495 if (npages)
4496 atomic64_inc(&kvm->arch.mmio_update);
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511 if (change == KVM_MR_FLAGS_ONLY && kvm_is_radix(kvm) &&
4512 ((new->flags ^ old->flags) & KVM_MEM_LOG_DIRTY_PAGES))
4513 kvmppc_radix_flush_memslot(kvm, old);
4514}
4515
4516
4517
4518
4519
4520
4521void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
4522{
4523 long int i;
4524 u32 cores_done = 0;
4525
4526 if ((kvm->arch.lpcr & mask) == lpcr)
4527 return;
4528
4529 kvm->arch.lpcr = (kvm->arch.lpcr & ~mask) | lpcr;
4530
4531 for (i = 0; i < KVM_MAX_VCORES; ++i) {
4532 struct kvmppc_vcore *vc = kvm->arch.vcores[i];
4533 if (!vc)
4534 continue;
4535 spin_lock(&vc->lock);
4536 vc->lpcr = (vc->lpcr & ~mask) | lpcr;
4537 spin_unlock(&vc->lock);
4538 if (++cores_done >= kvm->arch.online_vcores)
4539 break;
4540 }
4541}
4542
4543static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu)
4544{
4545 return;
4546}
4547
4548void kvmppc_setup_partition_table(struct kvm *kvm)
4549{
4550 unsigned long dw0, dw1;
4551
4552 if (!kvm_is_radix(kvm)) {
4553
4554 dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) |
4555 ((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1);
4556
4557 dw0 |= kvm->arch.sdr1;
4558
4559
4560 dw1 = kvm->arch.process_table;
4561 } else {
4562 dw0 = PATB_HR | radix__get_tree_size() |
4563 __pa(kvm->arch.pgtable) | RADIX_PGD_INDEX_SIZE;
4564 dw1 = PATB_GR | kvm->arch.process_table;
4565 }
4566 kvmhv_set_ptbl_entry(kvm->arch.lpid, dw0, dw1);
4567}
4568
4569
4570
4571
4572
4573static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
4574{
4575 int err = 0;
4576 struct kvm *kvm = vcpu->kvm;
4577 unsigned long hva;
4578 struct kvm_memory_slot *memslot;
4579 struct vm_area_struct *vma;
4580 unsigned long lpcr = 0, senc;
4581 unsigned long psize, porder;
4582 int srcu_idx;
4583
4584
4585 if (!kvm->arch.hpt.virt) {
4586 int order = KVM_DEFAULT_HPT_ORDER;
4587 struct kvm_hpt_info info;
4588
4589 err = kvmppc_allocate_hpt(&info, order);
4590
4591
4592
4593 while ((err == -ENOMEM) && --order >= PPC_MIN_HPT_ORDER)
4594 err = kvmppc_allocate_hpt(&info, order);
4595
4596 if (err < 0) {
4597 pr_err("KVM: Couldn't alloc HPT\n");
4598 goto out;
4599 }
4600
4601 kvmppc_set_hpt(kvm, &info);
4602 }
4603
4604
4605 srcu_idx = srcu_read_lock(&kvm->srcu);
4606 memslot = gfn_to_memslot(kvm, 0);
4607
4608
4609 err = -EINVAL;
4610 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
4611 goto out_srcu;
4612
4613
4614 hva = memslot->userspace_addr;
4615 down_read(¤t->mm->mmap_sem);
4616 vma = find_vma(current->mm, hva);
4617 if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
4618 goto up_out;
4619
4620 psize = vma_kernel_pagesize(vma);
4621
4622 up_read(¤t->mm->mmap_sem);
4623
4624
4625 if (psize >= 0x1000000)
4626 psize = 0x1000000;
4627 else if (psize >= 0x10000)
4628 psize = 0x10000;
4629 else
4630 psize = 0x1000;
4631 porder = __ilog2(psize);
4632
4633 senc = slb_pgsize_encoding(psize);
4634 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
4635 (VRMA_VSID << SLB_VSID_SHIFT_1T);
4636
4637 kvmppc_map_vrma(vcpu, memslot, porder);
4638
4639
4640 if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
4641
4642 lpcr = senc << (LPCR_VRMASD_SH - 4);
4643 kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
4644 }
4645
4646
4647 smp_wmb();
4648 err = 0;
4649 out_srcu:
4650 srcu_read_unlock(&kvm->srcu, srcu_idx);
4651 out:
4652 return err;
4653
4654 up_out:
4655 up_read(¤t->mm->mmap_sem);
4656 goto out_srcu;
4657}
4658
4659
4660
4661
4662
4663int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
4664{
4665 if (nesting_enabled(kvm))
4666 kvmhv_release_all_nested(kvm);
4667 kvmppc_rmap_reset(kvm);
4668 kvm->arch.process_table = 0;
4669
4670 spin_lock(&kvm->mmu_lock);
4671 kvm->arch.radix = 0;
4672 spin_unlock(&kvm->mmu_lock);
4673 kvmppc_free_radix(kvm);
4674 kvmppc_update_lpcr(kvm, LPCR_VPM1,
4675 LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
4676 return 0;
4677}
4678
4679
4680
4681
4682
4683int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
4684{
4685 int err;
4686
4687 err = kvmppc_init_vm_radix(kvm);
4688 if (err)
4689 return err;
4690 kvmppc_rmap_reset(kvm);
4691
4692 spin_lock(&kvm->mmu_lock);
4693 kvm->arch.radix = 1;
4694 spin_unlock(&kvm->mmu_lock);
4695 kvmppc_free_hpt(&kvm->arch.hpt);
4696 kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR,
4697 LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
4698 return 0;
4699}
4700
4701#ifdef CONFIG_KVM_XICS
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712void kvmppc_alloc_host_rm_ops(void)
4713{
4714 struct kvmppc_host_rm_ops *ops;
4715 unsigned long l_ops;
4716 int cpu, core;
4717 int size;
4718
4719
4720 if (kvmppc_host_rm_ops_hv != NULL)
4721 return;
4722
4723 ops = kzalloc(sizeof(struct kvmppc_host_rm_ops), GFP_KERNEL);
4724 if (!ops)
4725 return;
4726
4727 size = cpu_nr_cores() * sizeof(struct kvmppc_host_rm_core);
4728 ops->rm_core = kzalloc(size, GFP_KERNEL);
4729
4730 if (!ops->rm_core) {
4731 kfree(ops);
4732 return;
4733 }
4734
4735 cpus_read_lock();
4736
4737 for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
4738 if (!cpu_online(cpu))
4739 continue;
4740
4741 core = cpu >> threads_shift;
4742 ops->rm_core[core].rm_state.in_host = 1;
4743 }
4744
4745 ops->vcpu_kick = kvmppc_fast_vcpu_kick_hv;
4746
4747
4748
4749
4750
4751
4752
4753 smp_wmb();
4754 l_ops = (unsigned long) ops;
4755
4756 if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
4757 cpus_read_unlock();
4758 kfree(ops->rm_core);
4759 kfree(ops);
4760 return;
4761 }
4762
4763 cpuhp_setup_state_nocalls_cpuslocked(CPUHP_KVM_PPC_BOOK3S_PREPARE,
4764 "ppc/kvm_book3s:prepare",
4765 kvmppc_set_host_core,
4766 kvmppc_clear_host_core);
4767 cpus_read_unlock();
4768}
4769
4770void kvmppc_free_host_rm_ops(void)
4771{
4772 if (kvmppc_host_rm_ops_hv) {
4773 cpuhp_remove_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE);
4774 kfree(kvmppc_host_rm_ops_hv->rm_core);
4775 kfree(kvmppc_host_rm_ops_hv);
4776 kvmppc_host_rm_ops_hv = NULL;
4777 }
4778}
4779#endif
4780
4781static int kvmppc_core_init_vm_hv(struct kvm *kvm)
4782{
4783 unsigned long lpcr, lpid;
4784 char buf[32];
4785 int ret;
4786
4787 mutex_init(&kvm->arch.mmu_setup_lock);
4788
4789
4790
4791 lpid = kvmppc_alloc_lpid();
4792 if ((long)lpid < 0)
4793 return -ENOMEM;
4794 kvm->arch.lpid = lpid;
4795
4796 kvmppc_alloc_host_rm_ops();
4797
4798 kvmhv_vm_nested_init(kvm);
4799
4800
4801
4802
4803
4804
4805
4806
4807 if (!cpu_has_feature(CPU_FTR_ARCH_300))
4808 cpumask_setall(&kvm->arch.need_tlb_flush);
4809
4810
4811 memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
4812 sizeof(kvm->arch.enabled_hcalls));
4813
4814 if (!cpu_has_feature(CPU_FTR_ARCH_300))
4815 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
4816
4817
4818 if (cpu_has_feature(CPU_FTR_HVMODE)) {
4819 kvm->arch.host_lpid = mfspr(SPRN_LPID);
4820 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
4821 lpcr &= LPCR_PECE | LPCR_LPES;
4822 } else {
4823 lpcr = 0;
4824 }
4825 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
4826 LPCR_VPM0 | LPCR_VPM1;
4827 kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
4828 (VRMA_VSID << SLB_VSID_SHIFT_1T);
4829
4830 if (cpu_has_feature(CPU_FTR_ARCH_207S))
4831 lpcr |= LPCR_ONL;
4832
4833
4834
4835
4836
4837
4838
4839 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
4840 lpcr &= ~LPCR_VPM0;
4841 lpcr |= LPCR_HVICE | LPCR_HEIC;
4842
4843
4844
4845
4846
4847 if (xics_on_xive())
4848 lpcr |= LPCR_LPES;
4849 }
4850
4851
4852
4853
4854 if (radix_enabled()) {
4855 kvm->arch.radix = 1;
4856 kvm->arch.mmu_ready = 1;
4857 lpcr &= ~LPCR_VPM1;
4858 lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR;
4859 ret = kvmppc_init_vm_radix(kvm);
4860 if (ret) {
4861 kvmppc_free_lpid(kvm->arch.lpid);
4862 return ret;
4863 }
4864 kvmppc_setup_partition_table(kvm);
4865 }
4866
4867 kvm->arch.lpcr = lpcr;
4868
4869
4870 kvm->arch.resize_hpt = NULL;
4871
4872
4873
4874
4875
4876 if (radix_enabled())
4877 kvm->arch.tlb_sets = POWER9_TLB_SETS_RADIX;
4878 else if (cpu_has_feature(CPU_FTR_ARCH_300))
4879 kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH;
4880 else if (cpu_has_feature(CPU_FTR_ARCH_207S))
4881 kvm->arch.tlb_sets = POWER8_TLB_SETS;
4882 else
4883 kvm->arch.tlb_sets = POWER7_TLB_SETS;
4884
4885
4886
4887
4888
4889
4890
4891 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
4892 if (!indep_threads_mode && !cpu_has_feature(CPU_FTR_HVMODE)) {
4893 pr_warn("KVM: Ignoring indep_threads_mode=N in nested hypervisor\n");
4894 kvm->arch.threads_indep = true;
4895 } else {
4896 kvm->arch.threads_indep = indep_threads_mode;
4897 }
4898 }
4899 if (!kvm->arch.threads_indep)
4900 kvm_hv_vm_activated();
4901
4902
4903
4904
4905
4906
4907
4908
4909 if (!cpu_has_feature(CPU_FTR_ARCH_300))
4910 kvm->arch.smt_mode = threads_per_subcore;
4911 else
4912 kvm->arch.smt_mode = 1;
4913 kvm->arch.emul_smt_mode = 1;
4914
4915
4916
4917
4918 snprintf(buf, sizeof(buf), "vm%d", current->pid);
4919 kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir);
4920 kvmppc_mmu_debugfs_init(kvm);
4921 if (radix_enabled())
4922 kvmhv_radix_debugfs_init(kvm);
4923
4924 return 0;
4925}
4926
4927static void kvmppc_free_vcores(struct kvm *kvm)
4928{
4929 long int i;
4930
4931 for (i = 0; i < KVM_MAX_VCORES; ++i)
4932 kfree(kvm->arch.vcores[i]);
4933 kvm->arch.online_vcores = 0;
4934}
4935
4936static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
4937{
4938 debugfs_remove_recursive(kvm->arch.debugfs_dir);
4939
4940 if (!kvm->arch.threads_indep)
4941 kvm_hv_vm_deactivated();
4942
4943 kvmppc_free_vcores(kvm);
4944
4945
4946 if (kvm_is_radix(kvm))
4947 kvmppc_free_radix(kvm);
4948 else
4949 kvmppc_free_hpt(&kvm->arch.hpt);
4950
4951
4952 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
4953 if (nesting_enabled(kvm))
4954 kvmhv_release_all_nested(kvm);
4955 kvm->arch.process_table = 0;
4956 kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
4957 }
4958 kvmppc_free_lpid(kvm->arch.lpid);
4959
4960 kvmppc_free_pimap(kvm);
4961}
4962
4963
4964static int kvmppc_core_emulate_op_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
4965 unsigned int inst, int *advance)
4966{
4967 return EMULATE_FAIL;
4968}
4969
4970static int kvmppc_core_emulate_mtspr_hv(struct kvm_vcpu *vcpu, int sprn,
4971 ulong spr_val)
4972{
4973 return EMULATE_FAIL;
4974}
4975
4976static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
4977 ulong *spr_val)
4978{
4979 return EMULATE_FAIL;
4980}
4981
4982static int kvmppc_core_check_processor_compat_hv(void)
4983{
4984 if (cpu_has_feature(CPU_FTR_HVMODE) &&
4985 cpu_has_feature(CPU_FTR_ARCH_206))
4986 return 0;
4987
4988
4989 if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled())
4990 return 0;
4991
4992 return -EIO;
4993}
4994
4995#ifdef CONFIG_KVM_XICS
4996
4997void kvmppc_free_pimap(struct kvm *kvm)
4998{
4999 kfree(kvm->arch.pimap);
5000}
5001
5002static struct kvmppc_passthru_irqmap *kvmppc_alloc_pimap(void)
5003{
5004 return kzalloc(sizeof(struct kvmppc_passthru_irqmap), GFP_KERNEL);
5005}
5006
5007static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
5008{
5009 struct irq_desc *desc;
5010 struct kvmppc_irq_map *irq_map;
5011 struct kvmppc_passthru_irqmap *pimap;
5012 struct irq_chip *chip;
5013 int i, rc = 0;
5014
5015 if (!kvm_irq_bypass)
5016 return 1;
5017
5018 desc = irq_to_desc(host_irq);
5019 if (!desc)
5020 return -EIO;
5021
5022 mutex_lock(&kvm->lock);
5023
5024 pimap = kvm->arch.pimap;
5025 if (pimap == NULL) {
5026
5027 pimap = kvmppc_alloc_pimap();
5028 if (pimap == NULL) {
5029 mutex_unlock(&kvm->lock);
5030 return -ENOMEM;
5031 }
5032 kvm->arch.pimap = pimap;
5033 }
5034
5035
5036
5037
5038
5039
5040 chip = irq_data_get_irq_chip(&desc->irq_data);
5041 if (!chip || !(is_pnv_opal_msi(chip) || is_xive_irq(chip))) {
5042 pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
5043 host_irq, guest_gsi);
5044 mutex_unlock(&kvm->lock);
5045 return -ENOENT;
5046 }
5047
5048
5049
5050
5051
5052
5053 for (i = 0; i < pimap->n_mapped; i++) {
5054 if (guest_gsi == pimap->mapped[i].v_hwirq) {
5055 if (pimap->mapped[i].r_hwirq) {
5056 mutex_unlock(&kvm->lock);
5057 return -EINVAL;
5058 }
5059 break;
5060 }
5061 }
5062
5063 if (i == KVMPPC_PIRQ_MAPPED) {
5064 mutex_unlock(&kvm->lock);
5065 return -EAGAIN;
5066 }
5067
5068 irq_map = &pimap->mapped[i];
5069
5070 irq_map->v_hwirq = guest_gsi;
5071 irq_map->desc = desc;
5072
5073
5074
5075
5076
5077 smp_wmb();
5078 irq_map->r_hwirq = desc->irq_data.hwirq;
5079
5080 if (i == pimap->n_mapped)
5081 pimap->n_mapped++;
5082
5083 if (xics_on_xive())
5084 rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc);
5085 else
5086 kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
5087 if (rc)
5088 irq_map->r_hwirq = 0;
5089
5090 mutex_unlock(&kvm->lock);
5091
5092 return 0;
5093}
5094
5095static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
5096{
5097 struct irq_desc *desc;
5098 struct kvmppc_passthru_irqmap *pimap;
5099 int i, rc = 0;
5100
5101 if (!kvm_irq_bypass)
5102 return 0;
5103
5104 desc = irq_to_desc(host_irq);
5105 if (!desc)
5106 return -EIO;
5107
5108 mutex_lock(&kvm->lock);
5109 if (!kvm->arch.pimap)
5110 goto unlock;
5111
5112 pimap = kvm->arch.pimap;
5113
5114 for (i = 0; i < pimap->n_mapped; i++) {
5115 if (guest_gsi == pimap->mapped[i].v_hwirq)
5116 break;
5117 }
5118
5119 if (i == pimap->n_mapped) {
5120 mutex_unlock(&kvm->lock);
5121 return -ENODEV;
5122 }
5123
5124 if (xics_on_xive())
5125 rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc);
5126 else
5127 kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
5128
5129
5130 pimap->mapped[i].r_hwirq = 0;
5131
5132
5133
5134
5135
5136 unlock:
5137 mutex_unlock(&kvm->lock);
5138 return rc;
5139}
5140
5141static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons,
5142 struct irq_bypass_producer *prod)
5143{
5144 int ret = 0;
5145 struct kvm_kernel_irqfd *irqfd =
5146 container_of(cons, struct kvm_kernel_irqfd, consumer);
5147
5148 irqfd->producer = prod;
5149
5150 ret = kvmppc_set_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
5151 if (ret)
5152 pr_info("kvmppc_set_passthru_irq (irq %d, gsi %d) fails: %d\n",
5153