1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kvm_host.h>
22#include <linux/kernel.h>
23#include <linux/err.h>
24#include <linux/slab.h>
25#include <linux/preempt.h>
26#include <linux/sched/signal.h>
27#include <linux/sched/stat.h>
28#include <linux/delay.h>
29#include <linux/export.h>
30#include <linux/fs.h>
31#include <linux/anon_inodes.h>
32#include <linux/cpu.h>
33#include <linux/cpumask.h>
34#include <linux/spinlock.h>
35#include <linux/page-flags.h>
36#include <linux/srcu.h>
37#include <linux/miscdevice.h>
38#include <linux/debugfs.h>
39#include <linux/gfp.h>
40#include <linux/vmalloc.h>
41#include <linux/highmem.h>
42#include <linux/hugetlb.h>
43#include <linux/kvm_irqfd.h>
44#include <linux/irqbypass.h>
45#include <linux/module.h>
46#include <linux/compiler.h>
47#include <linux/of.h>
48
49#include <asm/ftrace.h>
50#include <asm/reg.h>
51#include <asm/ppc-opcode.h>
52#include <asm/asm-prototypes.h>
53#include <asm/archrandom.h>
54#include <asm/debug.h>
55#include <asm/disassemble.h>
56#include <asm/cputable.h>
57#include <asm/cacheflush.h>
58#include <asm/tlbflush.h>
59#include <linux/uaccess.h>
60#include <asm/io.h>
61#include <asm/kvm_ppc.h>
62#include <asm/kvm_book3s.h>
63#include <asm/mmu_context.h>
64#include <asm/lppaca.h>
65#include <asm/processor.h>
66#include <asm/cputhreads.h>
67#include <asm/page.h>
68#include <asm/hvcall.h>
69#include <asm/switch_to.h>
70#include <asm/smp.h>
71#include <asm/dbell.h>
72#include <asm/hmi.h>
73#include <asm/pnv-pci.h>
74#include <asm/mmu.h>
75#include <asm/opal.h>
76#include <asm/xics.h>
77#include <asm/xive.h>
78#include <asm/hw_breakpoint.h>
79#include <asm/kvm_host.h>
80#include <asm/kvm_book3s_uvmem.h>
81#include <asm/ultravisor.h>
82#include <asm/dtl.h>
83
84#include "book3s.h"
85
86#define CREATE_TRACE_POINTS
87#include "trace_hv.h"
88
89
90
91
92
93
94#define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1)
95
96#define RESUME_PASSTHROUGH (RESUME_GUEST | RESUME_FLAG_ARCH2)
97
98
99#define TB_NIL (~(u64)0)
100
101static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
102
103static int dynamic_mt_modes = 6;
104module_param(dynamic_mt_modes, int, 0644);
105MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)");
106static int target_smt_mode;
107module_param(target_smt_mode, int, 0644);
108MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
109
110static bool indep_threads_mode = true;
111module_param(indep_threads_mode, bool, S_IRUGO | S_IWUSR);
112MODULE_PARM_DESC(indep_threads_mode, "Independent-threads mode (only on POWER9)");
113
114static bool one_vm_per_core;
115module_param(one_vm_per_core, bool, S_IRUGO | S_IWUSR);
116MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires indep_threads_mode=N)");
117
118#ifdef CONFIG_KVM_XICS
119static const struct kernel_param_ops module_param_ops = {
120 .set = param_set_int,
121 .get = param_get_int,
122};
123
124module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass, 0644);
125MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization");
126
127module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, 0644);
128MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
129#endif
130
131
132static bool nested = true;
133module_param(nested, bool, S_IRUGO | S_IWUSR);
134MODULE_PARM_DESC(nested, "Enable nested virtualization (only on POWER9)");
135
136static inline bool nesting_enabled(struct kvm *kvm)
137{
138 return kvm->arch.nested_enable && kvm_is_radix(kvm);
139}
140
141
142static bool no_mixing_hpt_and_radix __read_mostly;
143
144static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
145
146
147
148
149
150
151#define RWMR_RPA_P8_1THREAD 0x164520C62609AECAUL
152#define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9UL
153#define RWMR_RPA_P8_3THREAD 0x164520C62609AECAUL
154#define RWMR_RPA_P8_4THREAD 0x199A421245058DA9UL
155#define RWMR_RPA_P8_5THREAD 0x164520C62609AECAUL
156#define RWMR_RPA_P8_6THREAD 0x164520C62609AECAUL
157#define RWMR_RPA_P8_7THREAD 0x164520C62609AECAUL
158#define RWMR_RPA_P8_8THREAD 0x164520C62609AECAUL
159
160static unsigned long p8_rwmr_values[MAX_SMT_THREADS + 1] = {
161 RWMR_RPA_P8_1THREAD,
162 RWMR_RPA_P8_1THREAD,
163 RWMR_RPA_P8_2THREAD,
164 RWMR_RPA_P8_3THREAD,
165 RWMR_RPA_P8_4THREAD,
166 RWMR_RPA_P8_5THREAD,
167 RWMR_RPA_P8_6THREAD,
168 RWMR_RPA_P8_7THREAD,
169 RWMR_RPA_P8_8THREAD,
170};
171
172static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
173 int *ip)
174{
175 int i = *ip;
176 struct kvm_vcpu *vcpu;
177
178 while (++i < MAX_SMT_THREADS) {
179 vcpu = READ_ONCE(vc->runnable_threads[i]);
180 if (vcpu) {
181 *ip = i;
182 return vcpu;
183 }
184 }
185 return NULL;
186}
187
188
189#define for_each_runnable_thread(i, vcpu, vc) \
190 for (i = -1; (vcpu = next_runnable_thread(vc, &i)); )
191
192static bool kvmppc_ipi_thread(int cpu)
193{
194 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
195
196
197 if (kvmhv_on_pseries())
198 return false;
199
200
201 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
202 msg |= get_hard_smp_processor_id(cpu);
203 smp_mb();
204 __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
205 return true;
206 }
207
208
209 if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
210 preempt_disable();
211 if (cpu_first_thread_sibling(cpu) ==
212 cpu_first_thread_sibling(smp_processor_id())) {
213 msg |= cpu_thread_in_core(cpu);
214 smp_mb();
215 __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
216 preempt_enable();
217 return true;
218 }
219 preempt_enable();
220 }
221
222#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
223 if (cpu >= 0 && cpu < nr_cpu_ids) {
224 if (paca_ptrs[cpu]->kvm_hstate.xics_phys) {
225 xics_wake_cpu(cpu);
226 return true;
227 }
228 opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY);
229 return true;
230 }
231#endif
232
233 return false;
234}
235
236static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
237{
238 int cpu;
239 struct rcuwait *waitp;
240
241 waitp = kvm_arch_vcpu_get_wait(vcpu);
242 if (rcuwait_wake_up(waitp))
243 ++vcpu->stat.generic.halt_wakeup;
244
245 cpu = READ_ONCE(vcpu->arch.thread_cpu);
246 if (cpu >= 0 && kvmppc_ipi_thread(cpu))
247 return;
248
249
250 cpu = vcpu->cpu;
251 if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu))
252 smp_send_reschedule(cpu);
253}
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc)
289{
290 unsigned long flags;
291
292 spin_lock_irqsave(&vc->stoltb_lock, flags);
293 vc->preempt_tb = mftb();
294 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
295}
296
297static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc)
298{
299 unsigned long flags;
300
301 spin_lock_irqsave(&vc->stoltb_lock, flags);
302 if (vc->preempt_tb != TB_NIL) {
303 vc->stolen_tb += mftb() - vc->preempt_tb;
304 vc->preempt_tb = TB_NIL;
305 }
306 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
307}
308
309static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
310{
311 struct kvmppc_vcore *vc = vcpu->arch.vcore;
312 unsigned long flags;
313
314
315
316
317
318
319
320 if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
321 kvmppc_core_end_stolen(vc);
322
323 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
324 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
325 vcpu->arch.busy_preempt != TB_NIL) {
326 vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
327 vcpu->arch.busy_preempt = TB_NIL;
328 }
329 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
330}
331
332static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
333{
334 struct kvmppc_vcore *vc = vcpu->arch.vcore;
335 unsigned long flags;
336
337 if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
338 kvmppc_core_start_stolen(vc);
339
340 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
341 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
342 vcpu->arch.busy_preempt = mftb();
343 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
344}
345
346static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
347{
348 vcpu->arch.pvr = pvr;
349}
350
351
352#define PCR_ARCH_31 (PCR_ARCH_300 << 1)
353
354static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
355{
356 unsigned long host_pcr_bit = 0, guest_pcr_bit = 0;
357 struct kvmppc_vcore *vc = vcpu->arch.vcore;
358
359
360 if (cpu_has_feature(CPU_FTR_ARCH_31))
361 host_pcr_bit = PCR_ARCH_31;
362 else if (cpu_has_feature(CPU_FTR_ARCH_300))
363 host_pcr_bit = PCR_ARCH_300;
364 else if (cpu_has_feature(CPU_FTR_ARCH_207S))
365 host_pcr_bit = PCR_ARCH_207;
366 else if (cpu_has_feature(CPU_FTR_ARCH_206))
367 host_pcr_bit = PCR_ARCH_206;
368 else
369 host_pcr_bit = PCR_ARCH_205;
370
371
372 guest_pcr_bit = host_pcr_bit;
373 if (arch_compat) {
374 switch (arch_compat) {
375 case PVR_ARCH_205:
376 guest_pcr_bit = PCR_ARCH_205;
377 break;
378 case PVR_ARCH_206:
379 case PVR_ARCH_206p:
380 guest_pcr_bit = PCR_ARCH_206;
381 break;
382 case PVR_ARCH_207:
383 guest_pcr_bit = PCR_ARCH_207;
384 break;
385 case PVR_ARCH_300:
386 guest_pcr_bit = PCR_ARCH_300;
387 break;
388 case PVR_ARCH_31:
389 guest_pcr_bit = PCR_ARCH_31;
390 break;
391 default:
392 return -EINVAL;
393 }
394 }
395
396
397 if (guest_pcr_bit > host_pcr_bit)
398 return -EINVAL;
399
400 spin_lock(&vc->lock);
401 vc->arch_compat = arch_compat;
402
403
404
405
406 vc->pcr = (host_pcr_bit - guest_pcr_bit) | PCR_MASK;
407 spin_unlock(&vc->lock);
408
409 return 0;
410}
411
412static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
413{
414 int r;
415
416 pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id);
417 pr_err("pc = %.16lx msr = %.16llx trap = %x\n",
418 vcpu->arch.regs.nip, vcpu->arch.shregs.msr, vcpu->arch.trap);
419 for (r = 0; r < 16; ++r)
420 pr_err("r%2d = %.16lx r%d = %.16lx\n",
421 r, kvmppc_get_gpr(vcpu, r),
422 r+16, kvmppc_get_gpr(vcpu, r+16));
423 pr_err("ctr = %.16lx lr = %.16lx\n",
424 vcpu->arch.regs.ctr, vcpu->arch.regs.link);
425 pr_err("srr0 = %.16llx srr1 = %.16llx\n",
426 vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1);
427 pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
428 vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
429 pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
430 vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
431 pr_err("cr = %.8lx xer = %.16lx dsisr = %.8x\n",
432 vcpu->arch.regs.ccr, vcpu->arch.regs.xer, vcpu->arch.shregs.dsisr);
433 pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
434 pr_err("fault dar = %.16lx dsisr = %.8x\n",
435 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
436 pr_err("SLB (%d entries):\n", vcpu->arch.slb_max);
437 for (r = 0; r < vcpu->arch.slb_max; ++r)
438 pr_err(" ESID = %.16llx VSID = %.16llx\n",
439 vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
440 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
441 vcpu->arch.vcore->lpcr, vcpu->kvm->arch.sdr1,
442 vcpu->arch.last_inst);
443}
444
445static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
446{
447 return kvm_get_vcpu_by_id(kvm, id);
448}
449
450static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
451{
452 vpa->__old_status |= LPPACA_OLD_SHARED_PROC;
453 vpa->yield_count = cpu_to_be32(1);
454}
455
456static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v,
457 unsigned long addr, unsigned long len)
458{
459
460 if (addr & (L1_CACHE_BYTES - 1))
461 return -EINVAL;
462 spin_lock(&vcpu->arch.vpa_update_lock);
463 if (v->next_gpa != addr || v->len != len) {
464 v->next_gpa = addr;
465 v->len = addr ? len : 0;
466 v->update_pending = 1;
467 }
468 spin_unlock(&vcpu->arch.vpa_update_lock);
469 return 0;
470}
471
472
473struct reg_vpa {
474 u32 dummy;
475 union {
476 __be16 hword;
477 __be32 word;
478 } length;
479};
480
481static int vpa_is_registered(struct kvmppc_vpa *vpap)
482{
483 if (vpap->update_pending)
484 return vpap->next_gpa != 0;
485 return vpap->pinned_addr != NULL;
486}
487
488static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
489 unsigned long flags,
490 unsigned long vcpuid, unsigned long vpa)
491{
492 struct kvm *kvm = vcpu->kvm;
493 unsigned long len, nb;
494 void *va;
495 struct kvm_vcpu *tvcpu;
496 int err;
497 int subfunc;
498 struct kvmppc_vpa *vpap;
499
500 tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
501 if (!tvcpu)
502 return H_PARAMETER;
503
504 subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK;
505 if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL ||
506 subfunc == H_VPA_REG_SLB) {
507
508 if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa)
509 return H_PARAMETER;
510
511
512 va = kvmppc_pin_guest_page(kvm, vpa, &nb);
513 if (va == NULL)
514 return H_PARAMETER;
515 if (subfunc == H_VPA_REG_VPA)
516 len = be16_to_cpu(((struct reg_vpa *)va)->length.hword);
517 else
518 len = be32_to_cpu(((struct reg_vpa *)va)->length.word);
519 kvmppc_unpin_guest_page(kvm, va, vpa, false);
520
521
522 if (len > nb || len < sizeof(struct reg_vpa))
523 return H_PARAMETER;
524 } else {
525 vpa = 0;
526 len = 0;
527 }
528
529 err = H_PARAMETER;
530 vpap = NULL;
531 spin_lock(&tvcpu->arch.vpa_update_lock);
532
533 switch (subfunc) {
534 case H_VPA_REG_VPA:
535
536
537
538
539
540
541 BUILD_BUG_ON(sizeof(struct lppaca) != 640);
542 if (len < sizeof(struct lppaca))
543 break;
544 vpap = &tvcpu->arch.vpa;
545 err = 0;
546 break;
547
548 case H_VPA_REG_DTL:
549 if (len < sizeof(struct dtl_entry))
550 break;
551 len -= len % sizeof(struct dtl_entry);
552
553
554 err = H_RESOURCE;
555 if (!vpa_is_registered(&tvcpu->arch.vpa))
556 break;
557
558 vpap = &tvcpu->arch.dtl;
559 err = 0;
560 break;
561
562 case H_VPA_REG_SLB:
563
564 err = H_RESOURCE;
565 if (!vpa_is_registered(&tvcpu->arch.vpa))
566 break;
567
568 vpap = &tvcpu->arch.slb_shadow;
569 err = 0;
570 break;
571
572 case H_VPA_DEREG_VPA:
573
574 err = H_RESOURCE;
575 if (vpa_is_registered(&tvcpu->arch.dtl) ||
576 vpa_is_registered(&tvcpu->arch.slb_shadow))
577 break;
578
579 vpap = &tvcpu->arch.vpa;
580 err = 0;
581 break;
582
583 case H_VPA_DEREG_DTL:
584 vpap = &tvcpu->arch.dtl;
585 err = 0;
586 break;
587
588 case H_VPA_DEREG_SLB:
589 vpap = &tvcpu->arch.slb_shadow;
590 err = 0;
591 break;
592 }
593
594 if (vpap) {
595 vpap->next_gpa = vpa;
596 vpap->len = len;
597 vpap->update_pending = 1;
598 }
599
600 spin_unlock(&tvcpu->arch.vpa_update_lock);
601
602 return err;
603}
604
605static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
606{
607 struct kvm *kvm = vcpu->kvm;
608 void *va;
609 unsigned long nb;
610 unsigned long gpa;
611
612
613
614
615
616
617
618
619
620 for (;;) {
621 gpa = vpap->next_gpa;
622 spin_unlock(&vcpu->arch.vpa_update_lock);
623 va = NULL;
624 nb = 0;
625 if (gpa)
626 va = kvmppc_pin_guest_page(kvm, gpa, &nb);
627 spin_lock(&vcpu->arch.vpa_update_lock);
628 if (gpa == vpap->next_gpa)
629 break;
630
631 if (va)
632 kvmppc_unpin_guest_page(kvm, va, gpa, false);
633 }
634
635 vpap->update_pending = 0;
636 if (va && nb < vpap->len) {
637
638
639
640
641
642 kvmppc_unpin_guest_page(kvm, va, gpa, false);
643 va = NULL;
644 }
645 if (vpap->pinned_addr)
646 kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa,
647 vpap->dirty);
648 vpap->gpa = gpa;
649 vpap->pinned_addr = va;
650 vpap->dirty = false;
651 if (va)
652 vpap->pinned_end = va + vpap->len;
653}
654
655static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
656{
657 if (!(vcpu->arch.vpa.update_pending ||
658 vcpu->arch.slb_shadow.update_pending ||
659 vcpu->arch.dtl.update_pending))
660 return;
661
662 spin_lock(&vcpu->arch.vpa_update_lock);
663 if (vcpu->arch.vpa.update_pending) {
664 kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
665 if (vcpu->arch.vpa.pinned_addr)
666 init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
667 }
668 if (vcpu->arch.dtl.update_pending) {
669 kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
670 vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
671 vcpu->arch.dtl_index = 0;
672 }
673 if (vcpu->arch.slb_shadow.update_pending)
674 kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow);
675 spin_unlock(&vcpu->arch.vpa_update_lock);
676}
677
678
679
680
681
682static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
683{
684 u64 p;
685 unsigned long flags;
686
687 spin_lock_irqsave(&vc->stoltb_lock, flags);
688 p = vc->stolen_tb;
689 if (vc->vcore_state != VCORE_INACTIVE &&
690 vc->preempt_tb != TB_NIL)
691 p += now - vc->preempt_tb;
692 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
693 return p;
694}
695
696static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
697 struct kvmppc_vcore *vc)
698{
699 struct dtl_entry *dt;
700 struct lppaca *vpa;
701 unsigned long stolen;
702 unsigned long core_stolen;
703 u64 now;
704 unsigned long flags;
705
706 dt = vcpu->arch.dtl_ptr;
707 vpa = vcpu->arch.vpa.pinned_addr;
708 now = mftb();
709 core_stolen = vcore_stolen_time(vc, now);
710 stolen = core_stolen - vcpu->arch.stolen_logged;
711 vcpu->arch.stolen_logged = core_stolen;
712 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
713 stolen += vcpu->arch.busy_stolen;
714 vcpu->arch.busy_stolen = 0;
715 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
716 if (!dt || !vpa)
717 return;
718 memset(dt, 0, sizeof(struct dtl_entry));
719 dt->dispatch_reason = 7;
720 dt->processor_id = cpu_to_be16(vc->pcpu + vcpu->arch.ptid);
721 dt->timebase = cpu_to_be64(now + vc->tb_offset);
722 dt->enqueue_to_dispatch_time = cpu_to_be32(stolen);
723 dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu));
724 dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr);
725 ++dt;
726 if (dt == vcpu->arch.dtl.pinned_end)
727 dt = vcpu->arch.dtl.pinned_addr;
728 vcpu->arch.dtl_ptr = dt;
729
730 smp_wmb();
731 vpa->dtl_idx = cpu_to_be64(++vcpu->arch.dtl_index);
732 vcpu->arch.dtl.dirty = true;
733}
734
735
736static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
737{
738 int thr;
739 struct kvmppc_vcore *vc;
740
741 if (vcpu->arch.doorbell_request)
742 return true;
743
744
745
746
747
748 smp_rmb();
749 vc = vcpu->arch.vcore;
750 thr = vcpu->vcpu_id - vc->first_vcpuid;
751 return !!(vc->dpdes & (1 << thr));
752}
753
754static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu)
755{
756 if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207)
757 return true;
758 if ((!vcpu->arch.vcore->arch_compat) &&
759 cpu_has_feature(CPU_FTR_ARCH_207S))
760 return true;
761 return false;
762}
763
764static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
765 unsigned long resource, unsigned long value1,
766 unsigned long value2)
767{
768 switch (resource) {
769 case H_SET_MODE_RESOURCE_SET_CIABR:
770 if (!kvmppc_power8_compatible(vcpu))
771 return H_P2;
772 if (value2)
773 return H_P4;
774 if (mflags)
775 return H_UNSUPPORTED_FLAG_START;
776
777 if ((value1 & CIABR_PRIV) == CIABR_PRIV_HYPER)
778 return H_P3;
779 vcpu->arch.ciabr = value1;
780 return H_SUCCESS;
781 case H_SET_MODE_RESOURCE_SET_DAWR0:
782 if (!kvmppc_power8_compatible(vcpu))
783 return H_P2;
784 if (!ppc_breakpoint_available())
785 return H_P2;
786 if (mflags)
787 return H_UNSUPPORTED_FLAG_START;
788 if (value2 & DABRX_HYP)
789 return H_P4;
790 vcpu->arch.dawr = value1;
791 vcpu->arch.dawrx = value2;
792 return H_SUCCESS;
793 case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
794
795
796
797
798 if (mflags != 0 && mflags != 3)
799 return H_UNSUPPORTED_FLAG_START;
800 return H_TOO_HARD;
801 default:
802 return H_TOO_HARD;
803 }
804}
805
806
807static int kvmppc_copy_guest(struct kvm *kvm, gpa_t to, gpa_t from,
808 unsigned long len)
809{
810 struct kvm_memory_slot *to_memslot = NULL;
811 struct kvm_memory_slot *from_memslot = NULL;
812 unsigned long to_addr, from_addr;
813 int r;
814
815
816 from_memslot = gfn_to_memslot(kvm, from >> PAGE_SHIFT);
817 if (!from_memslot)
818 return -EFAULT;
819 if ((from + len) >= ((from_memslot->base_gfn + from_memslot->npages)
820 << PAGE_SHIFT))
821 return -EINVAL;
822 from_addr = gfn_to_hva_memslot(from_memslot, from >> PAGE_SHIFT);
823 if (kvm_is_error_hva(from_addr))
824 return -EFAULT;
825 from_addr |= (from & (PAGE_SIZE - 1));
826
827
828 to_memslot = gfn_to_memslot(kvm, to >> PAGE_SHIFT);
829 if (!to_memslot)
830 return -EFAULT;
831 if ((to + len) >= ((to_memslot->base_gfn + to_memslot->npages)
832 << PAGE_SHIFT))
833 return -EINVAL;
834 to_addr = gfn_to_hva_memslot(to_memslot, to >> PAGE_SHIFT);
835 if (kvm_is_error_hva(to_addr))
836 return -EFAULT;
837 to_addr |= (to & (PAGE_SIZE - 1));
838
839
840 r = raw_copy_in_user((void __user *)to_addr, (void __user *)from_addr,
841 len);
842 if (r)
843 return -EFAULT;
844 mark_page_dirty(kvm, to >> PAGE_SHIFT);
845 return 0;
846}
847
848static long kvmppc_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
849 unsigned long dest, unsigned long src)
850{
851 u64 pg_sz = SZ_4K;
852 u64 pg_mask = SZ_4K - 1;
853 int ret;
854
855
856 if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE |
857 H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED))
858 return H_PARAMETER;
859
860
861 if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask)))
862 return H_PARAMETER;
863
864
865 if (flags & H_COPY_PAGE) {
866 ret = kvmppc_copy_guest(vcpu->kvm, dest, src, pg_sz);
867 if (ret < 0)
868 return H_PARAMETER;
869 } else if (flags & H_ZERO_PAGE) {
870 ret = kvm_clear_guest(vcpu->kvm, dest, pg_sz);
871 if (ret < 0)
872 return H_PARAMETER;
873 }
874
875
876
877 return H_SUCCESS;
878}
879
880static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
881{
882 struct kvmppc_vcore *vcore = target->arch.vcore;
883
884
885
886
887
888
889
890
891
892 spin_lock(&vcore->lock);
893 if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
894 vcore->vcore_state != VCORE_INACTIVE &&
895 vcore->runner)
896 target = vcore->runner;
897 spin_unlock(&vcore->lock);
898
899 return kvm_vcpu_yield_to(target);
900}
901
902static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
903{
904 int yield_count = 0;
905 struct lppaca *lppaca;
906
907 spin_lock(&vcpu->arch.vpa_update_lock);
908 lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr;
909 if (lppaca)
910 yield_count = be32_to_cpu(lppaca->yield_count);
911 spin_unlock(&vcpu->arch.vpa_update_lock);
912 return yield_count;
913}
914
915int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
916{
917 unsigned long req = kvmppc_get_gpr(vcpu, 3);
918 unsigned long target, ret = H_SUCCESS;
919 int yield_count;
920 struct kvm_vcpu *tvcpu;
921 int idx, rc;
922
923 if (req <= MAX_HCALL_OPCODE &&
924 !test_bit(req/4, vcpu->kvm->arch.enabled_hcalls))
925 return RESUME_HOST;
926
927 switch (req) {
928 case H_CEDE:
929 break;
930 case H_PROD:
931 target = kvmppc_get_gpr(vcpu, 4);
932 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
933 if (!tvcpu) {
934 ret = H_PARAMETER;
935 break;
936 }
937 tvcpu->arch.prodded = 1;
938 smp_mb();
939 if (tvcpu->arch.ceded)
940 kvmppc_fast_vcpu_kick_hv(tvcpu);
941 break;
942 case H_CONFER:
943 target = kvmppc_get_gpr(vcpu, 4);
944 if (target == -1)
945 break;
946 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
947 if (!tvcpu) {
948 ret = H_PARAMETER;
949 break;
950 }
951 yield_count = kvmppc_get_gpr(vcpu, 5);
952 if (kvmppc_get_yield_count(tvcpu) != yield_count)
953 break;
954 kvm_arch_vcpu_yield_to(tvcpu);
955 break;
956 case H_REGISTER_VPA:
957 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
958 kvmppc_get_gpr(vcpu, 5),
959 kvmppc_get_gpr(vcpu, 6));
960 break;
961 case H_RTAS:
962 if (list_empty(&vcpu->kvm->arch.rtas_tokens))
963 return RESUME_HOST;
964
965 idx = srcu_read_lock(&vcpu->kvm->srcu);
966 rc = kvmppc_rtas_hcall(vcpu);
967 srcu_read_unlock(&vcpu->kvm->srcu, idx);
968
969 if (rc == -ENOENT)
970 return RESUME_HOST;
971 else if (rc == 0)
972 break;
973
974
975 return rc;
976 case H_LOGICAL_CI_LOAD:
977 ret = kvmppc_h_logical_ci_load(vcpu);
978 if (ret == H_TOO_HARD)
979 return RESUME_HOST;
980 break;
981 case H_LOGICAL_CI_STORE:
982 ret = kvmppc_h_logical_ci_store(vcpu);
983 if (ret == H_TOO_HARD)
984 return RESUME_HOST;
985 break;
986 case H_SET_MODE:
987 ret = kvmppc_h_set_mode(vcpu, kvmppc_get_gpr(vcpu, 4),
988 kvmppc_get_gpr(vcpu, 5),
989 kvmppc_get_gpr(vcpu, 6),
990 kvmppc_get_gpr(vcpu, 7));
991 if (ret == H_TOO_HARD)
992 return RESUME_HOST;
993 break;
994 case H_XIRR:
995 case H_CPPR:
996 case H_EOI:
997 case H_IPI:
998 case H_IPOLL:
999 case H_XIRR_X:
1000 if (kvmppc_xics_enabled(vcpu)) {
1001 if (xics_on_xive()) {
1002 ret = H_NOT_AVAILABLE;
1003 return RESUME_GUEST;
1004 }
1005 ret = kvmppc_xics_hcall(vcpu, req);
1006 break;
1007 }
1008 return RESUME_HOST;
1009 case H_SET_DABR:
1010 ret = kvmppc_h_set_dabr(vcpu, kvmppc_get_gpr(vcpu, 4));
1011 break;
1012 case H_SET_XDABR:
1013 ret = kvmppc_h_set_xdabr(vcpu, kvmppc_get_gpr(vcpu, 4),
1014 kvmppc_get_gpr(vcpu, 5));
1015 break;
1016#ifdef CONFIG_SPAPR_TCE_IOMMU
1017 case H_GET_TCE:
1018 ret = kvmppc_h_get_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
1019 kvmppc_get_gpr(vcpu, 5));
1020 if (ret == H_TOO_HARD)
1021 return RESUME_HOST;
1022 break;
1023 case H_PUT_TCE:
1024 ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
1025 kvmppc_get_gpr(vcpu, 5),
1026 kvmppc_get_gpr(vcpu, 6));
1027 if (ret == H_TOO_HARD)
1028 return RESUME_HOST;
1029 break;
1030 case H_PUT_TCE_INDIRECT:
1031 ret = kvmppc_h_put_tce_indirect(vcpu, kvmppc_get_gpr(vcpu, 4),
1032 kvmppc_get_gpr(vcpu, 5),
1033 kvmppc_get_gpr(vcpu, 6),
1034 kvmppc_get_gpr(vcpu, 7));
1035 if (ret == H_TOO_HARD)
1036 return RESUME_HOST;
1037 break;
1038 case H_STUFF_TCE:
1039 ret = kvmppc_h_stuff_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
1040 kvmppc_get_gpr(vcpu, 5),
1041 kvmppc_get_gpr(vcpu, 6),
1042 kvmppc_get_gpr(vcpu, 7));
1043 if (ret == H_TOO_HARD)
1044 return RESUME_HOST;
1045 break;
1046#endif
1047 case H_RANDOM:
1048 if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4]))
1049 ret = H_HARDWARE;
1050 break;
1051
1052 case H_SET_PARTITION_TABLE:
1053 ret = H_FUNCTION;
1054 if (nesting_enabled(vcpu->kvm))
1055 ret = kvmhv_set_partition_table(vcpu);
1056 break;
1057 case H_ENTER_NESTED:
1058 ret = H_FUNCTION;
1059 if (!nesting_enabled(vcpu->kvm))
1060 break;
1061 ret = kvmhv_enter_nested_guest(vcpu);
1062 if (ret == H_INTERRUPT) {
1063 kvmppc_set_gpr(vcpu, 3, 0);
1064 vcpu->arch.hcall_needed = 0;
1065 return -EINTR;
1066 } else if (ret == H_TOO_HARD) {
1067 kvmppc_set_gpr(vcpu, 3, 0);
1068 vcpu->arch.hcall_needed = 0;
1069 return RESUME_HOST;
1070 }
1071 break;
1072 case H_TLB_INVALIDATE:
1073 ret = H_FUNCTION;
1074 if (nesting_enabled(vcpu->kvm))
1075 ret = kvmhv_do_nested_tlbie(vcpu);
1076 break;
1077 case H_COPY_TOFROM_GUEST:
1078 ret = H_FUNCTION;
1079 if (nesting_enabled(vcpu->kvm))
1080 ret = kvmhv_copy_tofrom_guest_nested(vcpu);
1081 break;
1082 case H_PAGE_INIT:
1083 ret = kvmppc_h_page_init(vcpu, kvmppc_get_gpr(vcpu, 4),
1084 kvmppc_get_gpr(vcpu, 5),
1085 kvmppc_get_gpr(vcpu, 6));
1086 break;
1087 case H_SVM_PAGE_IN:
1088 ret = H_UNSUPPORTED;
1089 if (kvmppc_get_srr1(vcpu) & MSR_S)
1090 ret = kvmppc_h_svm_page_in(vcpu->kvm,
1091 kvmppc_get_gpr(vcpu, 4),
1092 kvmppc_get_gpr(vcpu, 5),
1093 kvmppc_get_gpr(vcpu, 6));
1094 break;
1095 case H_SVM_PAGE_OUT:
1096 ret = H_UNSUPPORTED;
1097 if (kvmppc_get_srr1(vcpu) & MSR_S)
1098 ret = kvmppc_h_svm_page_out(vcpu->kvm,
1099 kvmppc_get_gpr(vcpu, 4),
1100 kvmppc_get_gpr(vcpu, 5),
1101 kvmppc_get_gpr(vcpu, 6));
1102 break;
1103 case H_SVM_INIT_START:
1104 ret = H_UNSUPPORTED;
1105 if (kvmppc_get_srr1(vcpu) & MSR_S)
1106 ret = kvmppc_h_svm_init_start(vcpu->kvm);
1107 break;
1108 case H_SVM_INIT_DONE:
1109 ret = H_UNSUPPORTED;
1110 if (kvmppc_get_srr1(vcpu) & MSR_S)
1111 ret = kvmppc_h_svm_init_done(vcpu->kvm);
1112 break;
1113 case H_SVM_INIT_ABORT:
1114
1115
1116
1117
1118
1119
1120
1121 ret = kvmppc_h_svm_init_abort(vcpu->kvm);
1122 break;
1123
1124 default:
1125 return RESUME_HOST;
1126 }
1127 kvmppc_set_gpr(vcpu, 3, ret);
1128 vcpu->arch.hcall_needed = 0;
1129 return RESUME_GUEST;
1130}
1131
1132
1133
1134
1135
1136
1137
1138static void kvmppc_nested_cede(struct kvm_vcpu *vcpu)
1139{
1140 vcpu->arch.shregs.msr |= MSR_EE;
1141 vcpu->arch.ceded = 1;
1142 smp_mb();
1143 if (vcpu->arch.prodded) {
1144 vcpu->arch.prodded = 0;
1145 smp_mb();
1146 vcpu->arch.ceded = 0;
1147 }
1148}
1149
1150static int kvmppc_hcall_impl_hv(unsigned long cmd)
1151{
1152 switch (cmd) {
1153 case H_CEDE:
1154 case H_PROD:
1155 case H_CONFER:
1156 case H_REGISTER_VPA:
1157 case H_SET_MODE:
1158 case H_LOGICAL_CI_LOAD:
1159 case H_LOGICAL_CI_STORE:
1160#ifdef CONFIG_KVM_XICS
1161 case H_XIRR:
1162 case H_CPPR:
1163 case H_EOI:
1164 case H_IPI:
1165 case H_IPOLL:
1166 case H_XIRR_X:
1167#endif
1168 case H_PAGE_INIT:
1169 return 1;
1170 }
1171
1172
1173 return kvmppc_hcall_impl_hv_realmode(cmd);
1174}
1175
1176static int kvmppc_emulate_debug_inst(struct kvm_vcpu *vcpu)
1177{
1178 u32 last_inst;
1179
1180 if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) !=
1181 EMULATE_DONE) {
1182
1183
1184
1185
1186 return RESUME_GUEST;
1187 }
1188
1189 if (last_inst == KVMPPC_INST_SW_BREAKPOINT) {
1190 vcpu->run->exit_reason = KVM_EXIT_DEBUG;
1191 vcpu->run->debug.arch.address = kvmppc_get_pc(vcpu);
1192 return RESUME_HOST;
1193 } else {
1194 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
1195 return RESUME_GUEST;
1196 }
1197}
1198
1199static void do_nothing(void *x)
1200{
1201}
1202
1203static unsigned long kvmppc_read_dpdes(struct kvm_vcpu *vcpu)
1204{
1205 int thr, cpu, pcpu, nthreads;
1206 struct kvm_vcpu *v;
1207 unsigned long dpdes;
1208
1209 nthreads = vcpu->kvm->arch.emul_smt_mode;
1210 dpdes = 0;
1211 cpu = vcpu->vcpu_id & ~(nthreads - 1);
1212 for (thr = 0; thr < nthreads; ++thr, ++cpu) {
1213 v = kvmppc_find_vcpu(vcpu->kvm, cpu);
1214 if (!v)
1215 continue;
1216
1217
1218
1219
1220
1221 pcpu = READ_ONCE(v->cpu);
1222 if (pcpu >= 0)
1223 smp_call_function_single(pcpu, do_nothing, NULL, 1);
1224 if (kvmppc_doorbell_pending(v))
1225 dpdes |= 1 << thr;
1226 }
1227 return dpdes;
1228}
1229
1230
1231
1232
1233
1234
1235
1236static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
1237{
1238 u32 inst, rb, thr;
1239 unsigned long arg;
1240 struct kvm *kvm = vcpu->kvm;
1241 struct kvm_vcpu *tvcpu;
1242
1243 if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE)
1244 return RESUME_GUEST;
1245 if (get_op(inst) != 31)
1246 return EMULATE_FAIL;
1247 rb = get_rb(inst);
1248 thr = vcpu->vcpu_id & (kvm->arch.emul_smt_mode - 1);
1249 switch (get_xop(inst)) {
1250 case OP_31_XOP_MSGSNDP:
1251 arg = kvmppc_get_gpr(vcpu, rb);
1252 if (((arg >> 27) & 0x1f) != PPC_DBELL_SERVER)
1253 break;
1254 arg &= 0x7f;
1255 if (arg >= kvm->arch.emul_smt_mode)
1256 break;
1257 tvcpu = kvmppc_find_vcpu(kvm, vcpu->vcpu_id - thr + arg);
1258 if (!tvcpu)
1259 break;
1260 if (!tvcpu->arch.doorbell_request) {
1261 tvcpu->arch.doorbell_request = 1;
1262 kvmppc_fast_vcpu_kick_hv(tvcpu);
1263 }
1264 break;
1265 case OP_31_XOP_MSGCLRP:
1266 arg = kvmppc_get_gpr(vcpu, rb);
1267 if (((arg >> 27) & 0x1f) != PPC_DBELL_SERVER)
1268 break;
1269 vcpu->arch.vcore->dpdes = 0;
1270 vcpu->arch.doorbell_request = 0;
1271 break;
1272 case OP_31_XOP_MFSPR:
1273 switch (get_sprn(inst)) {
1274 case SPRN_TIR:
1275 arg = thr;
1276 break;
1277 case SPRN_DPDES:
1278 arg = kvmppc_read_dpdes(vcpu);
1279 break;
1280 default:
1281 return EMULATE_FAIL;
1282 }
1283 kvmppc_set_gpr(vcpu, get_rt(inst), arg);
1284 break;
1285 default:
1286 return EMULATE_FAIL;
1287 }
1288 kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
1289 return RESUME_GUEST;
1290}
1291
1292static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
1293 struct task_struct *tsk)
1294{
1295 struct kvm_run *run = vcpu->run;
1296 int r = RESUME_HOST;
1297
1298 vcpu->stat.sum_exits++;
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308 if (vcpu->arch.shregs.msr & MSR_HV) {
1309 printk(KERN_EMERG "KVM trap in HV mode!\n");
1310 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
1311 vcpu->arch.trap, kvmppc_get_pc(vcpu),
1312 vcpu->arch.shregs.msr);
1313 kvmppc_dump_regs(vcpu);
1314 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1315 run->hw.hardware_exit_reason = vcpu->arch.trap;
1316 return RESUME_HOST;
1317 }
1318 run->exit_reason = KVM_EXIT_UNKNOWN;
1319 run->ready_for_interrupt_injection = 1;
1320 switch (vcpu->arch.trap) {
1321
1322 case BOOK3S_INTERRUPT_HV_DECREMENTER:
1323 vcpu->stat.dec_exits++;
1324 r = RESUME_GUEST;
1325 break;
1326 case BOOK3S_INTERRUPT_EXTERNAL:
1327 case BOOK3S_INTERRUPT_H_DOORBELL:
1328 case BOOK3S_INTERRUPT_H_VIRT:
1329 vcpu->stat.ext_intr_exits++;
1330 r = RESUME_GUEST;
1331 break;
1332
1333 case BOOK3S_INTERRUPT_HMI:
1334 case BOOK3S_INTERRUPT_PERFMON:
1335 case BOOK3S_INTERRUPT_SYSTEM_RESET:
1336 r = RESUME_GUEST;
1337 break;
1338 case BOOK3S_INTERRUPT_MACHINE_CHECK: {
1339 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1340 DEFAULT_RATELIMIT_BURST);
1341
1342
1343
1344
1345 if (__ratelimit(&rs))
1346 machine_check_print_event_info(&vcpu->arch.mce_evt,false, true);
1347
1348
1349
1350
1351
1352
1353
1354 if (!vcpu->kvm->arch.fwnmi_enabled) {
1355 ulong flags = vcpu->arch.shregs.msr & 0x083c0000;
1356 kvmppc_core_queue_machine_check(vcpu, flags);
1357 r = RESUME_GUEST;
1358 break;
1359 }
1360
1361
1362 run->exit_reason = KVM_EXIT_NMI;
1363 run->hw.hardware_exit_reason = vcpu->arch.trap;
1364
1365 run->flags &= ~KVM_RUN_PPC_NMI_DISP_MASK;
1366
1367 if (vcpu->arch.mce_evt.disposition == MCE_DISPOSITION_RECOVERED)
1368 run->flags |= KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
1369 else
1370 run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
1371
1372 r = RESUME_HOST;
1373 break;
1374 }
1375 case BOOK3S_INTERRUPT_PROGRAM:
1376 {
1377 ulong flags;
1378
1379
1380
1381
1382
1383
1384 flags = vcpu->arch.shregs.msr & 0x1f0000ull;
1385 kvmppc_core_queue_program(vcpu, flags);
1386 r = RESUME_GUEST;
1387 break;
1388 }
1389 case BOOK3S_INTERRUPT_SYSCALL:
1390 {
1391
1392 int i;
1393
1394
1395
1396
1397
1398 run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
1399 for (i = 0; i < 9; ++i)
1400 run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
1401 run->exit_reason = KVM_EXIT_PAPR_HCALL;
1402 vcpu->arch.hcall_needed = 1;
1403 r = RESUME_HOST;
1404 break;
1405 }
1406
1407
1408
1409
1410
1411
1412
1413 case BOOK3S_INTERRUPT_H_DATA_STORAGE:
1414 r = RESUME_PAGE_FAULT;
1415 break;
1416 case BOOK3S_INTERRUPT_H_INST_STORAGE:
1417 vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
1418 vcpu->arch.fault_dsisr = vcpu->arch.shregs.msr &
1419 DSISR_SRR1_MATCH_64S;
1420 if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
1421 vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
1422 r = RESUME_PAGE_FAULT;
1423 break;
1424
1425
1426
1427
1428
1429
1430
1431 case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
1432 if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED)
1433 vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ?
1434 swab32(vcpu->arch.emul_inst) :
1435 vcpu->arch.emul_inst;
1436 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
1437 r = kvmppc_emulate_debug_inst(vcpu);
1438 } else {
1439 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
1440 r = RESUME_GUEST;
1441 }
1442 break;
1443
1444
1445
1446
1447
1448
1449
1450 case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
1451 r = EMULATE_FAIL;
1452 if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) &&
1453 cpu_has_feature(CPU_FTR_ARCH_300))
1454 r = kvmppc_emulate_doorbell_instr(vcpu);
1455 if (r == EMULATE_FAIL) {
1456 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
1457 r = RESUME_GUEST;
1458 }
1459 break;
1460
1461#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1462 case BOOK3S_INTERRUPT_HV_SOFTPATCH:
1463
1464
1465
1466
1467
1468
1469 r = kvmhv_p9_tm_emulation(vcpu);
1470 break;
1471#endif
1472
1473 case BOOK3S_INTERRUPT_HV_RM_HARD:
1474 r = RESUME_PASSTHROUGH;
1475 break;
1476 default:
1477 kvmppc_dump_regs(vcpu);
1478 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
1479 vcpu->arch.trap, kvmppc_get_pc(vcpu),
1480 vcpu->arch.shregs.msr);
1481 run->hw.hardware_exit_reason = vcpu->arch.trap;
1482 r = RESUME_HOST;
1483 break;
1484 }
1485
1486 return r;
1487}
1488
1489static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
1490{
1491 int r;
1492 int srcu_idx;
1493
1494 vcpu->stat.sum_exits++;
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504 if (vcpu->arch.shregs.msr & MSR_HV) {
1505 pr_emerg("KVM trap in HV mode while nested!\n");
1506 pr_emerg("trap=0x%x | pc=0x%lx | msr=0x%llx\n",
1507 vcpu->arch.trap, kvmppc_get_pc(vcpu),
1508 vcpu->arch.shregs.msr);
1509 kvmppc_dump_regs(vcpu);
1510 return RESUME_HOST;
1511 }
1512 switch (vcpu->arch.trap) {
1513
1514 case BOOK3S_INTERRUPT_HV_DECREMENTER:
1515 vcpu->stat.dec_exits++;
1516 r = RESUME_GUEST;
1517 break;
1518 case BOOK3S_INTERRUPT_EXTERNAL:
1519 vcpu->stat.ext_intr_exits++;
1520 r = RESUME_HOST;
1521 break;
1522 case BOOK3S_INTERRUPT_H_DOORBELL:
1523 case BOOK3S_INTERRUPT_H_VIRT:
1524 vcpu->stat.ext_intr_exits++;
1525 r = RESUME_GUEST;
1526 break;
1527
1528 case BOOK3S_INTERRUPT_HMI:
1529 case BOOK3S_INTERRUPT_PERFMON:
1530 case BOOK3S_INTERRUPT_SYSTEM_RESET:
1531 r = RESUME_GUEST;
1532 break;
1533 case BOOK3S_INTERRUPT_MACHINE_CHECK:
1534 {
1535 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1536 DEFAULT_RATELIMIT_BURST);
1537
1538 r = RESUME_HOST;
1539
1540 if (__ratelimit(&rs))
1541 machine_check_print_event_info(&vcpu->arch.mce_evt, false, true);
1542 break;
1543 }
1544
1545
1546
1547
1548
1549
1550 case BOOK3S_INTERRUPT_H_DATA_STORAGE:
1551 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1552 r = kvmhv_nested_page_fault(vcpu);
1553 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
1554 break;
1555 case BOOK3S_INTERRUPT_H_INST_STORAGE:
1556 vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
1557 vcpu->arch.fault_dsisr = kvmppc_get_msr(vcpu) &
1558 DSISR_SRR1_MATCH_64S;
1559 if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
1560 vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
1561 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1562 r = kvmhv_nested_page_fault(vcpu);
1563 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
1564 break;
1565
1566#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1567 case BOOK3S_INTERRUPT_HV_SOFTPATCH:
1568
1569
1570
1571
1572
1573
1574 r = kvmhv_p9_tm_emulation(vcpu);
1575 break;
1576#endif
1577
1578 case BOOK3S_INTERRUPT_HV_RM_HARD:
1579 vcpu->arch.trap = 0;
1580 r = RESUME_GUEST;
1581 if (!xics_on_xive())
1582 kvmppc_xics_rm_complete(vcpu, 0);
1583 break;
1584 default:
1585 r = RESUME_HOST;
1586 break;
1587 }
1588
1589 return r;
1590}
1591
1592static int kvm_arch_vcpu_ioctl_get_sregs_hv(struct kvm_vcpu *vcpu,
1593 struct kvm_sregs *sregs)
1594{
1595 int i;
1596
1597 memset(sregs, 0, sizeof(struct kvm_sregs));
1598 sregs->pvr = vcpu->arch.pvr;
1599 for (i = 0; i < vcpu->arch.slb_max; i++) {
1600 sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
1601 sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
1602 }
1603
1604 return 0;
1605}
1606
1607static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
1608 struct kvm_sregs *sregs)
1609{
1610 int i, j;
1611
1612
1613 if (sregs->pvr != vcpu->arch.pvr)
1614 return -EINVAL;
1615
1616 j = 0;
1617 for (i = 0; i < vcpu->arch.slb_nr; i++) {
1618 if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) {
1619 vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe;
1620 vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv;
1621 ++j;
1622 }
1623 }
1624 vcpu->arch.slb_max = j;
1625
1626 return 0;
1627}
1628
1629
1630
1631
1632
1633
1634unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr)
1635{
1636
1637 if (kvm_is_radix(kvm))
1638 lpcr &= ~LPCR_TC;
1639
1640
1641 if (!cpu_has_feature(CPU_FTR_ARCH_207S))
1642 lpcr &= ~LPCR_AIL;
1643 if ((lpcr & LPCR_AIL) != LPCR_AIL_3)
1644 lpcr &= ~LPCR_AIL;
1645
1646
1647
1648
1649
1650 if (!cpu_has_feature(CPU_FTR_ARCH_300))
1651 lpcr &= ~LPCR_LD;
1652
1653 return lpcr;
1654}
1655
1656static void verify_lpcr(struct kvm *kvm, unsigned long lpcr)
1657{
1658 if (lpcr != kvmppc_filter_lpcr_hv(kvm, lpcr)) {
1659 WARN_ONCE(1, "lpcr 0x%lx differs from filtered 0x%lx\n",
1660 lpcr, kvmppc_filter_lpcr_hv(kvm, lpcr));
1661 }
1662}
1663
1664static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
1665 bool preserve_top32)
1666{
1667 struct kvm *kvm = vcpu->kvm;
1668 struct kvmppc_vcore *vc = vcpu->arch.vcore;
1669 u64 mask;
1670
1671 spin_lock(&vc->lock);
1672
1673
1674
1675
1676
1677
1678
1679
1680 mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD;
1681
1682
1683 if (preserve_top32)
1684 mask &= 0xFFFFFFFF;
1685
1686 new_lpcr = kvmppc_filter_lpcr_hv(kvm,
1687 (vc->lpcr & ~mask) | (new_lpcr & mask));
1688
1689
1690
1691
1692
1693 if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) {
1694 struct kvm_vcpu *vcpu;
1695 int i;
1696
1697 kvm_for_each_vcpu(i, vcpu, kvm) {
1698 if (vcpu->arch.vcore != vc)
1699 continue;
1700 if (new_lpcr & LPCR_ILE)
1701 vcpu->arch.intr_msr |= MSR_LE;
1702 else
1703 vcpu->arch.intr_msr &= ~MSR_LE;
1704 }
1705 }
1706
1707 vc->lpcr = new_lpcr;
1708
1709 spin_unlock(&vc->lock);
1710}
1711
1712static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1713 union kvmppc_one_reg *val)
1714{
1715 int r = 0;
1716 long int i;
1717
1718 switch (id) {
1719 case KVM_REG_PPC_DEBUG_INST:
1720 *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
1721 break;
1722 case KVM_REG_PPC_HIOR:
1723 *val = get_reg_val(id, 0);
1724 break;
1725 case KVM_REG_PPC_DABR:
1726 *val = get_reg_val(id, vcpu->arch.dabr);
1727 break;
1728 case KVM_REG_PPC_DABRX:
1729 *val = get_reg_val(id, vcpu->arch.dabrx);
1730 break;
1731 case KVM_REG_PPC_DSCR:
1732 *val = get_reg_val(id, vcpu->arch.dscr);
1733 break;
1734 case KVM_REG_PPC_PURR:
1735 *val = get_reg_val(id, vcpu->arch.purr);
1736 break;
1737 case KVM_REG_PPC_SPURR:
1738 *val = get_reg_val(id, vcpu->arch.spurr);
1739 break;
1740 case KVM_REG_PPC_AMR:
1741 *val = get_reg_val(id, vcpu->arch.amr);
1742 break;
1743 case KVM_REG_PPC_UAMOR:
1744 *val = get_reg_val(id, vcpu->arch.uamor);
1745 break;
1746 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCR1:
1747 i = id - KVM_REG_PPC_MMCR0;
1748 *val = get_reg_val(id, vcpu->arch.mmcr[i]);
1749 break;
1750 case KVM_REG_PPC_MMCR2:
1751 *val = get_reg_val(id, vcpu->arch.mmcr[2]);
1752 break;
1753 case KVM_REG_PPC_MMCRA:
1754 *val = get_reg_val(id, vcpu->arch.mmcra);
1755 break;
1756 case KVM_REG_PPC_MMCRS:
1757 *val = get_reg_val(id, vcpu->arch.mmcrs);
1758 break;
1759 case KVM_REG_PPC_MMCR3:
1760 *val = get_reg_val(id, vcpu->arch.mmcr[3]);
1761 break;
1762 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
1763 i = id - KVM_REG_PPC_PMC1;
1764 *val = get_reg_val(id, vcpu->arch.pmc[i]);
1765 break;
1766 case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
1767 i = id - KVM_REG_PPC_SPMC1;
1768 *val = get_reg_val(id, vcpu->arch.spmc[i]);
1769 break;
1770 case KVM_REG_PPC_SIAR:
1771 *val = get_reg_val(id, vcpu->arch.siar);
1772 break;
1773 case KVM_REG_PPC_SDAR:
1774 *val = get_reg_val(id, vcpu->arch.sdar);
1775 break;
1776 case KVM_REG_PPC_SIER:
1777 *val = get_reg_val(id, vcpu->arch.sier[0]);
1778 break;
1779 case KVM_REG_PPC_SIER2:
1780 *val = get_reg_val(id, vcpu->arch.sier[1]);
1781 break;
1782 case KVM_REG_PPC_SIER3:
1783 *val = get_reg_val(id, vcpu->arch.sier[2]);
1784 break;
1785 case KVM_REG_PPC_IAMR:
1786 *val = get_reg_val(id, vcpu->arch.iamr);
1787 break;
1788 case KVM_REG_PPC_PSPB:
1789 *val = get_reg_val(id, vcpu->arch.pspb);
1790 break;
1791 case KVM_REG_PPC_DPDES:
1792
1793
1794
1795
1796
1797
1798 *val = get_reg_val(id, vcpu->arch.vcore->dpdes |
1799 vcpu->arch.doorbell_request);
1800 break;
1801 case KVM_REG_PPC_VTB:
1802 *val = get_reg_val(id, vcpu->arch.vcore->vtb);
1803 break;
1804 case KVM_REG_PPC_DAWR:
1805 *val = get_reg_val(id, vcpu->arch.dawr);
1806 break;
1807 case KVM_REG_PPC_DAWRX:
1808 *val = get_reg_val(id, vcpu->arch.dawrx);
1809 break;
1810 case KVM_REG_PPC_CIABR:
1811 *val = get_reg_val(id, vcpu->arch.ciabr);
1812 break;
1813 case KVM_REG_PPC_CSIGR:
1814 *val = get_reg_val(id, vcpu->arch.csigr);
1815 break;
1816 case KVM_REG_PPC_TACR:
1817 *val = get_reg_val(id, vcpu->arch.tacr);
1818 break;
1819 case KVM_REG_PPC_TCSCR:
1820 *val = get_reg_val(id, vcpu->arch.tcscr);
1821 break;
1822 case KVM_REG_PPC_PID:
1823 *val = get_reg_val(id, vcpu->arch.pid);
1824 break;
1825 case KVM_REG_PPC_ACOP:
1826 *val = get_reg_val(id, vcpu->arch.acop);
1827 break;
1828 case KVM_REG_PPC_WORT:
1829 *val = get_reg_val(id, vcpu->arch.wort);
1830 break;
1831 case KVM_REG_PPC_TIDR:
1832 *val = get_reg_val(id, vcpu->arch.tid);
1833 break;
1834 case KVM_REG_PPC_PSSCR:
1835 *val = get_reg_val(id, vcpu->arch.psscr);
1836 break;
1837 case KVM_REG_PPC_VPA_ADDR:
1838 spin_lock(&vcpu->arch.vpa_update_lock);
1839 *val = get_reg_val(id, vcpu->arch.vpa.next_gpa);
1840 spin_unlock(&vcpu->arch.vpa_update_lock);
1841 break;
1842 case KVM_REG_PPC_VPA_SLB:
1843 spin_lock(&vcpu->arch.vpa_update_lock);
1844 val->vpaval.addr = vcpu->arch.slb_shadow.next_gpa;
1845 val->vpaval.length = vcpu->arch.slb_shadow.len;
1846 spin_unlock(&vcpu->arch.vpa_update_lock);
1847 break;
1848 case KVM_REG_PPC_VPA_DTL:
1849 spin_lock(&vcpu->arch.vpa_update_lock);
1850 val->vpaval.addr = vcpu->arch.dtl.next_gpa;
1851 val->vpaval.length = vcpu->arch.dtl.len;
1852 spin_unlock(&vcpu->arch.vpa_update_lock);
1853 break;
1854 case KVM_REG_PPC_TB_OFFSET:
1855 *val = get_reg_val(id, vcpu->arch.vcore->tb_offset);
1856 break;
1857 case KVM_REG_PPC_LPCR:
1858 case KVM_REG_PPC_LPCR_64:
1859 *val = get_reg_val(id, vcpu->arch.vcore->lpcr);
1860 break;
1861 case KVM_REG_PPC_PPR:
1862 *val = get_reg_val(id, vcpu->arch.ppr);
1863 break;
1864#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1865 case KVM_REG_PPC_TFHAR:
1866 *val = get_reg_val(id, vcpu->arch.tfhar);
1867 break;
1868 case KVM_REG_PPC_TFIAR:
1869 *val = get_reg_val(id, vcpu->arch.tfiar);
1870 break;
1871 case KVM_REG_PPC_TEXASR:
1872 *val = get_reg_val(id, vcpu->arch.texasr);
1873 break;
1874 case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
1875 i = id - KVM_REG_PPC_TM_GPR0;
1876 *val = get_reg_val(id, vcpu->arch.gpr_tm[i]);
1877 break;
1878 case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
1879 {
1880 int j;
1881 i = id - KVM_REG_PPC_TM_VSR0;
1882 if (i < 32)
1883 for (j = 0; j < TS_FPRWIDTH; j++)
1884 val->vsxval[j] = vcpu->arch.fp_tm.fpr[i][j];
1885 else {
1886 if (cpu_has_feature(CPU_FTR_ALTIVEC))
1887 val->vval = vcpu->arch.vr_tm.vr[i-32];
1888 else
1889 r = -ENXIO;
1890 }
1891 break;
1892 }
1893 case KVM_REG_PPC_TM_CR:
1894 *val = get_reg_val(id, vcpu->arch.cr_tm);
1895 break;
1896 case KVM_REG_PPC_TM_XER:
1897 *val = get_reg_val(id, vcpu->arch.xer_tm);
1898 break;
1899 case KVM_REG_PPC_TM_LR:
1900 *val = get_reg_val(id, vcpu->arch.lr_tm);
1901 break;
1902 case KVM_REG_PPC_TM_CTR:
1903 *val = get_reg_val(id, vcpu->arch.ctr_tm);
1904 break;
1905 case KVM_REG_PPC_TM_FPSCR:
1906 *val = get_reg_val(id, vcpu->arch.fp_tm.fpscr);
1907 break;
1908 case KVM_REG_PPC_TM_AMR:
1909 *val = get_reg_val(id, vcpu->arch.amr_tm);
1910 break;
1911 case KVM_REG_PPC_TM_PPR:
1912 *val = get_reg_val(id, vcpu->arch.ppr_tm);
1913 break;
1914 case KVM_REG_PPC_TM_VRSAVE:
1915 *val = get_reg_val(id, vcpu->arch.vrsave_tm);
1916 break;
1917 case KVM_REG_PPC_TM_VSCR:
1918 if (cpu_has_feature(CPU_FTR_ALTIVEC))
1919 *val = get_reg_val(id, vcpu->arch.vr_tm.vscr.u[3]);
1920 else
1921 r = -ENXIO;
1922 break;
1923 case KVM_REG_PPC_TM_DSCR:
1924 *val = get_reg_val(id, vcpu->arch.dscr_tm);
1925 break;
1926 case KVM_REG_PPC_TM_TAR:
1927 *val = get_reg_val(id, vcpu->arch.tar_tm);
1928 break;
1929#endif
1930 case KVM_REG_PPC_ARCH_COMPAT:
1931 *val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
1932 break;
1933 case KVM_REG_PPC_DEC_EXPIRY:
1934 *val = get_reg_val(id, vcpu->arch.dec_expires +
1935 vcpu->arch.vcore->tb_offset);
1936 break;
1937 case KVM_REG_PPC_ONLINE:
1938 *val = get_reg_val(id, vcpu->arch.online);
1939 break;
1940 case KVM_REG_PPC_PTCR:
1941 *val = get_reg_val(id, vcpu->kvm->arch.l1_ptcr);
1942 break;
1943 default:
1944 r = -EINVAL;
1945 break;
1946 }
1947
1948 return r;
1949}
1950
1951static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1952 union kvmppc_one_reg *val)
1953{
1954 int r = 0;
1955 long int i;
1956 unsigned long addr, len;
1957
1958 switch (id) {
1959 case KVM_REG_PPC_HIOR:
1960
1961 if (set_reg_val(id, *val))
1962 r = -EINVAL;
1963 break;
1964 case KVM_REG_PPC_DABR:
1965 vcpu->arch.dabr = set_reg_val(id, *val);
1966 break;
1967 case KVM_REG_PPC_DABRX:
1968 vcpu->arch.dabrx = set_reg_val(id, *val) & ~DABRX_HYP;
1969 break;
1970 case KVM_REG_PPC_DSCR:
1971 vcpu->arch.dscr = set_reg_val(id, *val);
1972 break;
1973 case KVM_REG_PPC_PURR:
1974 vcpu->arch.purr = set_reg_val(id, *val);
1975 break;
1976 case KVM_REG_PPC_SPURR:
1977 vcpu->arch.spurr = set_reg_val(id, *val);
1978 break;
1979 case KVM_REG_PPC_AMR:
1980 vcpu->arch.amr = set_reg_val(id, *val);
1981 break;
1982 case KVM_REG_PPC_UAMOR:
1983 vcpu->arch.uamor = set_reg_val(id, *val);
1984 break;
1985 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCR1:
1986 i = id - KVM_REG_PPC_MMCR0;
1987 vcpu->arch.mmcr[i] = set_reg_val(id, *val);
1988 break;
1989 case KVM_REG_PPC_MMCR2:
1990 vcpu->arch.mmcr[2] = set_reg_val(id, *val);
1991 break;
1992 case KVM_REG_PPC_MMCRA:
1993 vcpu->arch.mmcra = set_reg_val(id, *val);
1994 break;
1995 case KVM_REG_PPC_MMCRS:
1996 vcpu->arch.mmcrs = set_reg_val(id, *val);
1997 break;
1998 case KVM_REG_PPC_MMCR3:
1999 *val = get_reg_val(id, vcpu->arch.mmcr[3]);
2000 break;
2001 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
2002 i = id - KVM_REG_PPC_PMC1;
2003 vcpu->arch.pmc[i] = set_reg_val(id, *val);
2004 break;
2005 case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
2006 i = id - KVM_REG_PPC_SPMC1;
2007 vcpu->arch.spmc[i] = set_reg_val(id, *val);
2008 break;
2009 case KVM_REG_PPC_SIAR:
2010 vcpu->arch.siar = set_reg_val(id, *val);
2011 break;
2012 case KVM_REG_PPC_SDAR:
2013 vcpu->arch.sdar = set_reg_val(id, *val);
2014 break;
2015 case KVM_REG_PPC_SIER:
2016 vcpu->arch.sier[0] = set_reg_val(id, *val);
2017 break;
2018 case KVM_REG_PPC_SIER2:
2019 vcpu->arch.sier[1] = set_reg_val(id, *val);
2020 break;
2021 case KVM_REG_PPC_SIER3:
2022 vcpu->arch.sier[2] = set_reg_val(id, *val);
2023 break;
2024 case KVM_REG_PPC_IAMR:
2025 vcpu->arch.iamr = set_reg_val(id, *val);
2026 break;
2027 case KVM_REG_PPC_PSPB:
2028 vcpu->arch.pspb = set_reg_val(id, *val);
2029 break;
2030 case KVM_REG_PPC_DPDES:
2031 vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
2032 break;
2033 case KVM_REG_PPC_VTB:
2034 vcpu->arch.vcore->vtb = set_reg_val(id, *val);
2035 break;
2036 case KVM_REG_PPC_DAWR:
2037 vcpu->arch.dawr = set_reg_val(id, *val);
2038 break;
2039 case KVM_REG_PPC_DAWRX:
2040 vcpu->arch.dawrx = set_reg_val(id, *val) & ~DAWRX_HYP;
2041 break;
2042 case KVM_REG_PPC_CIABR:
2043 vcpu->arch.ciabr = set_reg_val(id, *val);
2044
2045 if ((vcpu->arch.ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
2046 vcpu->arch.ciabr &= ~CIABR_PRIV;
2047 break;
2048 case KVM_REG_PPC_CSIGR:
2049 vcpu->arch.csigr = set_reg_val(id, *val);
2050 break;
2051 case KVM_REG_PPC_TACR:
2052 vcpu->arch.tacr = set_reg_val(id, *val);
2053 break;
2054 case KVM_REG_PPC_TCSCR:
2055 vcpu->arch.tcscr = set_reg_val(id, *val);
2056 break;
2057 case KVM_REG_PPC_PID:
2058 vcpu->arch.pid = set_reg_val(id, *val);
2059 break;
2060 case KVM_REG_PPC_ACOP:
2061 vcpu->arch.acop = set_reg_val(id, *val);
2062 break;
2063 case KVM_REG_PPC_WORT:
2064 vcpu->arch.wort = set_reg_val(id, *val);
2065 break;
2066 case KVM_REG_PPC_TIDR:
2067 vcpu->arch.tid = set_reg_val(id, *val);
2068 break;
2069 case KVM_REG_PPC_PSSCR:
2070 vcpu->arch.psscr = set_reg_val(id, *val) & PSSCR_GUEST_VIS;
2071 break;
2072 case KVM_REG_PPC_VPA_ADDR:
2073 addr = set_reg_val(id, *val);
2074 r = -EINVAL;
2075 if (!addr && (vcpu->arch.slb_shadow.next_gpa ||
2076 vcpu->arch.dtl.next_gpa))
2077 break;
2078 r = set_vpa(vcpu, &vcpu->arch.vpa, addr, sizeof(struct lppaca));
2079 break;
2080 case KVM_REG_PPC_VPA_SLB:
2081 addr = val->vpaval.addr;
2082 len = val->vpaval.length;
2083 r = -EINVAL;
2084 if (addr && !vcpu->arch.vpa.next_gpa)
2085 break;
2086 r = set_vpa(vcpu, &vcpu->arch.slb_shadow, addr, len);
2087 break;
2088 case KVM_REG_PPC_VPA_DTL:
2089 addr = val->vpaval.addr;
2090 len = val->vpaval.length;
2091 r = -EINVAL;
2092 if (addr && (len < sizeof(struct dtl_entry) ||
2093 !vcpu->arch.vpa.next_gpa))
2094 break;
2095 len -= len % sizeof(struct dtl_entry);
2096 r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
2097 break;
2098 case KVM_REG_PPC_TB_OFFSET:
2099
2100 vcpu->arch.vcore->tb_offset =
2101 ALIGN(set_reg_val(id, *val), 1UL << 24);
2102 break;
2103 case KVM_REG_PPC_LPCR:
2104 kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), true);
2105 break;
2106 case KVM_REG_PPC_LPCR_64:
2107 kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), false);
2108 break;
2109 case KVM_REG_PPC_PPR:
2110 vcpu->arch.ppr = set_reg_val(id, *val);
2111 break;
2112#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2113 case KVM_REG_PPC_TFHAR:
2114 vcpu->arch.tfhar = set_reg_val(id, *val);
2115 break;
2116 case KVM_REG_PPC_TFIAR:
2117 vcpu->arch.tfiar = set_reg_val(id, *val);
2118 break;
2119 case KVM_REG_PPC_TEXASR:
2120 vcpu->arch.texasr = set_reg_val(id, *val);
2121 break;
2122 case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
2123 i = id - KVM_REG_PPC_TM_GPR0;
2124 vcpu->arch.gpr_tm[i] = set_reg_val(id, *val);
2125 break;
2126 case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
2127 {
2128 int j;
2129 i = id - KVM_REG_PPC_TM_VSR0;
2130 if (i < 32)
2131 for (j = 0; j < TS_FPRWIDTH; j++)
2132 vcpu->arch.fp_tm.fpr[i][j] = val->vsxval[j];
2133 else
2134 if (cpu_has_feature(CPU_FTR_ALTIVEC))
2135 vcpu->arch.vr_tm.vr[i-32] = val->vval;
2136 else
2137 r = -ENXIO;
2138 break;
2139 }
2140 case KVM_REG_PPC_TM_CR:
2141 vcpu->arch.cr_tm = set_reg_val(id, *val);
2142 break;
2143 case KVM_REG_PPC_TM_XER:
2144 vcpu->arch.xer_tm = set_reg_val(id, *val);
2145 break;
2146 case KVM_REG_PPC_TM_LR:
2147 vcpu->arch.lr_tm = set_reg_val(id, *val);
2148 break;
2149 case KVM_REG_PPC_TM_CTR:
2150 vcpu->arch.ctr_tm = set_reg_val(id, *val);
2151 break;
2152 case KVM_REG_PPC_TM_FPSCR:
2153 vcpu->arch.fp_tm.fpscr = set_reg_val(id, *val);
2154 break;
2155 case KVM_REG_PPC_TM_AMR:
2156 vcpu->arch.amr_tm = set_reg_val(id, *val);
2157 break;
2158 case KVM_REG_PPC_TM_PPR:
2159 vcpu->arch.ppr_tm = set_reg_val(id, *val);
2160 break;
2161 case KVM_REG_PPC_TM_VRSAVE:
2162 vcpu->arch.vrsave_tm = set_reg_val(id, *val);
2163 break;
2164 case KVM_REG_PPC_TM_VSCR:
2165 if (cpu_has_feature(CPU_FTR_ALTIVEC))
2166 vcpu->arch.vr.vscr.u[3] = set_reg_val(id, *val);
2167 else
2168 r = - ENXIO;
2169 break;
2170 case KVM_REG_PPC_TM_DSCR:
2171 vcpu->arch.dscr_tm = set_reg_val(id, *val);
2172 break;
2173 case KVM_REG_PPC_TM_TAR:
2174 vcpu->arch.tar_tm = set_reg_val(id, *val);
2175 break;
2176#endif
2177 case KVM_REG_PPC_ARCH_COMPAT:
2178 r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
2179 break;
2180 case KVM_REG_PPC_DEC_EXPIRY:
2181 vcpu->arch.dec_expires = set_reg_val(id, *val) -
2182 vcpu->arch.vcore->tb_offset;
2183 break;
2184 case KVM_REG_PPC_ONLINE:
2185 i = set_reg_val(id, *val);
2186 if (i && !vcpu->arch.online)
2187 atomic_inc(&vcpu->arch.vcore->online_count);
2188 else if (!i && vcpu->arch.online)
2189 atomic_dec(&vcpu->arch.vcore->online_count);
2190 vcpu->arch.online = i;
2191 break;
2192 case KVM_REG_PPC_PTCR:
2193 vcpu->kvm->arch.l1_ptcr = set_reg_val(id, *val);
2194 break;
2195 default:
2196 r = -EINVAL;
2197 break;
2198 }
2199
2200 return r;
2201}
2202
2203
2204
2205
2206
2207
2208
2209
2210static int threads_per_vcore(struct kvm *kvm)
2211{
2212 if (kvm->arch.threads_indep)
2213 return 1;
2214 return threads_per_subcore;
2215}
2216
2217static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)
2218{
2219 struct kvmppc_vcore *vcore;
2220
2221 vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
2222
2223 if (vcore == NULL)
2224 return NULL;
2225
2226 spin_lock_init(&vcore->lock);
2227 spin_lock_init(&vcore->stoltb_lock);
2228 rcuwait_init(&vcore->wait);
2229 vcore->preempt_tb = TB_NIL;
2230 vcore->lpcr = kvm->arch.lpcr;
2231 vcore->first_vcpuid = id;
2232 vcore->kvm = kvm;
2233 INIT_LIST_HEAD(&vcore->preempt_list);
2234
2235 return vcore;
2236}
2237
2238#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
2239static struct debugfs_timings_element {
2240 const char *name;
2241 size_t offset;
2242} timings[] = {
2243 {"rm_entry", offsetof(struct kvm_vcpu, arch.rm_entry)},
2244 {"rm_intr", offsetof(struct kvm_vcpu, arch.rm_intr)},
2245 {"rm_exit", offsetof(struct kvm_vcpu, arch.rm_exit)},
2246 {"guest", offsetof(struct kvm_vcpu, arch.guest_time)},
2247 {"cede", offsetof(struct kvm_vcpu, arch.cede_time)},
2248};
2249
2250#define N_TIMINGS (ARRAY_SIZE(timings))
2251
2252struct debugfs_timings_state {
2253 struct kvm_vcpu *vcpu;
2254 unsigned int buflen;
2255 char buf[N_TIMINGS * 100];
2256};
2257
2258static int debugfs_timings_open(struct inode *inode, struct file *file)
2259{
2260 struct kvm_vcpu *vcpu = inode->i_private;
2261 struct debugfs_timings_state *p;
2262
2263 p = kzalloc(sizeof(*p), GFP_KERNEL);
2264 if (!p)
2265 return -ENOMEM;
2266
2267 kvm_get_kvm(vcpu->kvm);
2268 p->vcpu = vcpu;
2269 file->private_data = p;
2270
2271 return nonseekable_open(inode, file);
2272}
2273
2274static int debugfs_timings_release(struct inode *inode, struct file *file)
2275{
2276 struct debugfs_timings_state *p = file->private_data;
2277
2278 kvm_put_kvm(p->vcpu->kvm);
2279 kfree(p);
2280 return 0;
2281}
2282
2283static ssize_t debugfs_timings_read(struct file *file, char __user *buf,
2284 size_t len, loff_t *ppos)
2285{
2286 struct debugfs_timings_state *p = file->private_data;
2287 struct kvm_vcpu *vcpu = p->vcpu;
2288 char *s, *buf_end;
2289 struct kvmhv_tb_accumulator tb;
2290 u64 count;
2291 loff_t pos;
2292 ssize_t n;
2293 int i, loops;
2294 bool ok;
2295
2296 if (!p->buflen) {
2297 s = p->buf;
2298 buf_end = s + sizeof(p->buf);
2299 for (i = 0; i < N_TIMINGS; ++i) {
2300 struct kvmhv_tb_accumulator *acc;
2301
2302 acc = (struct kvmhv_tb_accumulator *)
2303 ((unsigned long)vcpu + timings[i].offset);
2304 ok = false;
2305 for (loops = 0; loops < 1000; ++loops) {
2306 count = acc->seqcount;
2307 if (!(count & 1)) {
2308 smp_rmb();
2309 tb = *acc;
2310 smp_rmb();
2311 if (count == acc->seqcount) {
2312 ok = true;
2313 break;
2314 }
2315 }
2316 udelay(1);
2317 }
2318 if (!ok)
2319 snprintf(s, buf_end - s, "%s: stuck\n",
2320 timings[i].name);
2321 else
2322 snprintf(s, buf_end - s,
2323 "%s: %llu %llu %llu %llu\n",
2324 timings[i].name, count / 2,
2325 tb_to_ns(tb.tb_total),
2326 tb_to_ns(tb.tb_min),
2327 tb_to_ns(tb.tb_max));
2328 s += strlen(s);
2329 }
2330 p->buflen = s - p->buf;
2331 }
2332
2333 pos = *ppos;
2334 if (pos >= p->buflen)
2335 return 0;
2336 if (len > p->buflen - pos)
2337 len = p->buflen - pos;
2338 n = copy_to_user(buf, p->buf + pos, len);
2339 if (n) {
2340 if (n == len)
2341 return -EFAULT;
2342 len -= n;
2343 }
2344 *ppos = pos + len;
2345 return len;
2346}
2347
2348static ssize_t debugfs_timings_write(struct file *file, const char __user *buf,
2349 size_t len, loff_t *ppos)
2350{
2351 return -EACCES;
2352}
2353
2354static const struct file_operations debugfs_timings_ops = {
2355 .owner = THIS_MODULE,
2356 .open = debugfs_timings_open,
2357 .release = debugfs_timings_release,
2358 .read = debugfs_timings_read,
2359 .write = debugfs_timings_write,
2360 .llseek = generic_file_llseek,
2361};
2362
2363
2364static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
2365{
2366 char buf[16];
2367 struct kvm *kvm = vcpu->kvm;
2368
2369 snprintf(buf, sizeof(buf), "vcpu%u", id);
2370 vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir);
2371 debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir, vcpu,
2372 &debugfs_timings_ops);
2373}
2374
2375#else
2376static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
2377{
2378}
2379#endif
2380
2381static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
2382{
2383 int err;
2384 int core;
2385 struct kvmppc_vcore *vcore;
2386 struct kvm *kvm;
2387 unsigned int id;
2388
2389 kvm = vcpu->kvm;
2390 id = vcpu->vcpu_id;
2391
2392 vcpu->arch.shared = &vcpu->arch.shregs;
2393#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
2394
2395
2396
2397
2398#ifdef __BIG_ENDIAN__
2399 vcpu->arch.shared_big_endian = true;
2400#else
2401 vcpu->arch.shared_big_endian = false;
2402#endif
2403#endif
2404 vcpu->arch.mmcr[0] = MMCR0_FC;
2405 vcpu->arch.ctrl = CTRL_RUNLATCH;
2406
2407 kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
2408 spin_lock_init(&vcpu->arch.vpa_update_lock);
2409 spin_lock_init(&vcpu->arch.tbacct_lock);
2410 vcpu->arch.busy_preempt = TB_NIL;
2411 vcpu->arch.intr_msr = MSR_SF | MSR_ME;
2412
2413
2414
2415
2416
2417
2418
2419
2420 vcpu->arch.hfscr = HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB |
2421 HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX;
2422 if (cpu_has_feature(CPU_FTR_HVMODE)) {
2423 vcpu->arch.hfscr &= mfspr(SPRN_HFSCR);
2424 if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
2425 vcpu->arch.hfscr |= HFSCR_TM;
2426 }
2427 if (cpu_has_feature(CPU_FTR_TM_COMP))
2428 vcpu->arch.hfscr |= HFSCR_TM;
2429
2430 kvmppc_mmu_book3s_hv_init(vcpu);
2431
2432 vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
2433
2434 init_waitqueue_head(&vcpu->arch.cpu_run);
2435
2436 mutex_lock(&kvm->lock);
2437 vcore = NULL;
2438 err = -EINVAL;
2439 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
2440 if (id >= (KVM_MAX_VCPUS * kvm->arch.emul_smt_mode)) {
2441 pr_devel("KVM: VCPU ID too high\n");
2442 core = KVM_MAX_VCORES;
2443 } else {
2444 BUG_ON(kvm->arch.smt_mode != 1);
2445 core = kvmppc_pack_vcpu_id(kvm, id);
2446 }
2447 } else {
2448 core = id / kvm->arch.smt_mode;
2449 }
2450 if (core < KVM_MAX_VCORES) {
2451 vcore = kvm->arch.vcores[core];
2452 if (vcore && cpu_has_feature(CPU_FTR_ARCH_300)) {
2453 pr_devel("KVM: collision on id %u", id);
2454 vcore = NULL;
2455 } else if (!vcore) {
2456
2457
2458
2459
2460 err = -ENOMEM;
2461 vcore = kvmppc_vcore_create(kvm,
2462 id & ~(kvm->arch.smt_mode - 1));
2463 mutex_lock(&kvm->arch.mmu_setup_lock);
2464 kvm->arch.vcores[core] = vcore;
2465 kvm->arch.online_vcores++;
2466 mutex_unlock(&kvm->arch.mmu_setup_lock);
2467 }
2468 }
2469 mutex_unlock(&kvm->lock);
2470
2471 if (!vcore)
2472 return err;
2473
2474 spin_lock(&vcore->lock);
2475 ++vcore->num_threads;
2476 spin_unlock(&vcore->lock);
2477 vcpu->arch.vcore = vcore;
2478 vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;
2479 vcpu->arch.thread_cpu = -1;
2480 vcpu->arch.prev_cpu = -1;
2481
2482 vcpu->arch.cpu_type = KVM_CPU_3S_64;
2483 kvmppc_sanity_check(vcpu);
2484
2485 debugfs_vcpu_init(vcpu, id);
2486
2487 return 0;
2488}
2489
2490static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode,
2491 unsigned long flags)
2492{
2493 int err;
2494 int esmt = 0;
2495
2496 if (flags)
2497 return -EINVAL;
2498 if (smt_mode > MAX_SMT_THREADS || !is_power_of_2(smt_mode))
2499 return -EINVAL;
2500 if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
2501
2502
2503
2504
2505 if (smt_mode > threads_per_subcore)
2506 return -EINVAL;
2507 } else {
2508
2509
2510
2511
2512 esmt = smt_mode;
2513 smt_mode = 1;
2514 }
2515 mutex_lock(&kvm->lock);
2516 err = -EBUSY;
2517 if (!kvm->arch.online_vcores) {
2518 kvm->arch.smt_mode = smt_mode;
2519 kvm->arch.emul_smt_mode = esmt;
2520 err = 0;
2521 }
2522 mutex_unlock(&kvm->lock);
2523
2524 return err;
2525}
2526
2527static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
2528{
2529 if (vpa->pinned_addr)
2530 kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa,
2531 vpa->dirty);
2532}
2533
2534static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu)
2535{
2536 spin_lock(&vcpu->arch.vpa_update_lock);
2537 unpin_vpa(vcpu->kvm, &vcpu->arch.dtl);
2538 unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow);
2539 unpin_vpa(vcpu->kvm, &vcpu->arch.vpa);
2540 spin_unlock(&vcpu->arch.vpa_update_lock);
2541}
2542
2543static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu)
2544{
2545
2546 return 1;
2547}
2548
2549static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
2550{
2551 unsigned long dec_nsec, now;
2552
2553 now = get_tb();
2554 if (now > vcpu->arch.dec_expires) {
2555
2556 kvmppc_core_queue_dec(vcpu);
2557 kvmppc_core_prepare_to_enter(vcpu);
2558 return;
2559 }
2560 dec_nsec = tb_to_ns(vcpu->arch.dec_expires - now);
2561 hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL);
2562 vcpu->arch.timer_running = 1;
2563}
2564
2565extern int __kvmppc_vcore_entry(void);
2566
2567static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
2568 struct kvm_vcpu *vcpu)
2569{
2570 u64 now;
2571
2572 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
2573 return;
2574 spin_lock_irq(&vcpu->arch.tbacct_lock);
2575 now = mftb();
2576 vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) -
2577 vcpu->arch.stolen_logged;
2578 vcpu->arch.busy_preempt = now;
2579 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
2580 spin_unlock_irq(&vcpu->arch.tbacct_lock);
2581 --vc->n_runnable;
2582 WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], NULL);
2583}
2584
2585static int kvmppc_grab_hwthread(int cpu)
2586{
2587 struct paca_struct *tpaca;
2588 long timeout = 10000;
2589
2590 tpaca = paca_ptrs[cpu];
2591
2592
2593 tpaca->kvm_hstate.kvm_vcpu = NULL;
2594 tpaca->kvm_hstate.kvm_vcore = NULL;
2595 tpaca->kvm_hstate.napping = 0;
2596 smp_wmb();
2597 tpaca->kvm_hstate.hwthread_req = 1;
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608 smp_mb();
2609 while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) {
2610 if (--timeout <= 0) {
2611 pr_err("KVM: couldn't grab cpu %d\n", cpu);
2612 return -EBUSY;
2613 }
2614 udelay(1);
2615 }
2616 return 0;
2617}
2618
2619static void kvmppc_release_hwthread(int cpu)
2620{
2621 struct paca_struct *tpaca;
2622
2623 tpaca = paca_ptrs[cpu];
2624 tpaca->kvm_hstate.hwthread_req = 0;
2625 tpaca->kvm_hstate.kvm_vcpu = NULL;
2626 tpaca->kvm_hstate.kvm_vcore = NULL;
2627 tpaca->kvm_hstate.kvm_split_mode = NULL;
2628}
2629
2630static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
2631{
2632 struct kvm_nested_guest *nested = vcpu->arch.nested;
2633 cpumask_t *cpu_in_guest;
2634 int i;
2635
2636 cpu = cpu_first_thread_sibling(cpu);
2637 if (nested) {
2638 cpumask_set_cpu(cpu, &nested->need_tlb_flush);
2639 cpu_in_guest = &nested->cpu_in_guest;
2640 } else {
2641 cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush);
2642 cpu_in_guest = &kvm->arch.cpu_in_guest;
2643 }
2644
2645
2646
2647
2648
2649 smp_mb();
2650 for (i = 0; i < threads_per_core; ++i)
2651 if (cpumask_test_cpu(cpu + i, cpu_in_guest))
2652 smp_call_function_single(cpu + i, do_nothing, NULL, 1);
2653}
2654
2655static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
2656{
2657 struct kvm_nested_guest *nested = vcpu->arch.nested;
2658 struct kvm *kvm = vcpu->kvm;
2659 int prev_cpu;
2660
2661 if (!cpu_has_feature(CPU_FTR_HVMODE))
2662 return;
2663
2664 if (nested)
2665 prev_cpu = nested->prev_cpu[vcpu->arch.nested_vcpu_id];
2666 else
2667 prev_cpu = vcpu->arch.prev_cpu;
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681 if (prev_cpu != pcpu) {
2682 if (prev_cpu >= 0 &&
2683 cpu_first_thread_sibling(prev_cpu) !=
2684 cpu_first_thread_sibling(pcpu))
2685 radix_flush_cpu(kvm, prev_cpu, vcpu);
2686 if (nested)
2687 nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu;
2688 else
2689 vcpu->arch.prev_cpu = pcpu;
2690 }
2691}
2692
2693static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
2694{
2695 int cpu;
2696 struct paca_struct *tpaca;
2697 struct kvm *kvm = vc->kvm;
2698
2699 cpu = vc->pcpu;
2700 if (vcpu) {
2701 if (vcpu->arch.timer_running) {
2702 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
2703 vcpu->arch.timer_running = 0;
2704 }
2705 cpu += vcpu->arch.ptid;
2706 vcpu->cpu = vc->pcpu;
2707 vcpu->arch.thread_cpu = cpu;
2708 cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
2709 }
2710 tpaca = paca_ptrs[cpu];
2711 tpaca->kvm_hstate.kvm_vcpu = vcpu;
2712 tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
2713 tpaca->kvm_hstate.fake_suspend = 0;
2714
2715 smp_wmb();
2716 tpaca->kvm_hstate.kvm_vcore = vc;
2717 if (cpu != smp_processor_id())
2718 kvmppc_ipi_thread(cpu);
2719}
2720
2721static void kvmppc_wait_for_nap(int n_threads)
2722{
2723 int cpu = smp_processor_id();
2724 int i, loops;
2725
2726 if (n_threads <= 1)
2727 return;
2728 for (loops = 0; loops < 1000000; ++loops) {
2729
2730
2731
2732
2733
2734
2735 for (i = 1; i < n_threads; ++i)
2736 if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore)
2737 break;
2738 if (i == n_threads) {
2739 HMT_medium();
2740 return;
2741 }
2742 HMT_low();
2743 }
2744 HMT_medium();
2745 for (i = 1; i < n_threads; ++i)
2746 if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore)
2747 pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
2748}
2749
2750
2751
2752
2753
2754
2755static int on_primary_thread(void)
2756{
2757 int cpu = smp_processor_id();
2758 int thr;
2759
2760
2761 if (cpu_thread_in_subcore(cpu))
2762 return 0;
2763
2764 thr = 0;
2765 while (++thr < threads_per_subcore)
2766 if (cpu_online(cpu + thr))
2767 return 0;
2768
2769
2770 for (thr = 1; thr < threads_per_subcore; ++thr) {
2771 if (kvmppc_grab_hwthread(cpu + thr)) {
2772
2773 do {
2774 kvmppc_release_hwthread(cpu + thr);
2775 } while (--thr > 0);
2776 return 0;
2777 }
2778 }
2779 return 1;
2780}
2781
2782
2783
2784
2785
2786
2787struct preempted_vcore_list {
2788 struct list_head list;
2789 spinlock_t lock;
2790};
2791
2792static DEFINE_PER_CPU(struct preempted_vcore_list, preempted_vcores);
2793
2794static void init_vcore_lists(void)
2795{
2796 int cpu;
2797
2798 for_each_possible_cpu(cpu) {
2799 struct preempted_vcore_list *lp = &per_cpu(preempted_vcores, cpu);
2800 spin_lock_init(&lp->lock);
2801 INIT_LIST_HEAD(&lp->list);
2802 }
2803}
2804
2805static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
2806{
2807 struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
2808
2809 vc->vcore_state = VCORE_PREEMPT;
2810 vc->pcpu = smp_processor_id();
2811 if (vc->num_threads < threads_per_vcore(vc->kvm)) {
2812 spin_lock(&lp->lock);
2813 list_add_tail(&vc->preempt_list, &lp->list);
2814 spin_unlock(&lp->lock);
2815 }
2816
2817
2818 kvmppc_core_start_stolen(vc);
2819}
2820
2821static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc)
2822{
2823 struct preempted_vcore_list *lp;
2824
2825 kvmppc_core_end_stolen(vc);
2826 if (!list_empty(&vc->preempt_list)) {
2827 lp = &per_cpu(preempted_vcores, vc->pcpu);
2828 spin_lock(&lp->lock);
2829 list_del_init(&vc->preempt_list);
2830 spin_unlock(&lp->lock);
2831 }
2832 vc->vcore_state = VCORE_INACTIVE;
2833}
2834
2835
2836
2837
2838
2839struct core_info {
2840 int n_subcores;
2841 int max_subcore_threads;
2842 int total_threads;
2843 int subcore_threads[MAX_SUBCORES];
2844 struct kvmppc_vcore *vc[MAX_SUBCORES];
2845};
2846
2847
2848
2849
2850
2851static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
2852
2853static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
2854{
2855 memset(cip, 0, sizeof(*cip));
2856 cip->n_subcores = 1;
2857 cip->max_subcore_threads = vc->num_threads;
2858 cip->total_threads = vc->num_threads;
2859 cip->subcore_threads[0] = vc->num_threads;
2860 cip->vc[0] = vc;
2861}
2862
2863static bool subcore_config_ok(int n_subcores, int n_threads)
2864{
2865
2866
2867
2868
2869 if (cpu_has_feature(CPU_FTR_ARCH_300))
2870 return n_subcores <= 4 && n_threads == 1;
2871
2872
2873 if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS)
2874 return false;
2875 if (n_subcores > MAX_SUBCORES)
2876 return false;
2877 if (n_subcores > 1) {
2878 if (!(dynamic_mt_modes & 2))
2879 n_subcores = 4;
2880 if (n_subcores > 2 && !(dynamic_mt_modes & 4))
2881 return false;
2882 }
2883
2884 return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
2885}
2886
2887static void init_vcore_to_run(struct kvmppc_vcore *vc)
2888{
2889 vc->entry_exit_map = 0;
2890 vc->in_guest = 0;
2891 vc->napping_threads = 0;
2892 vc->conferring_threads = 0;
2893 vc->tb_offset_applied = 0;
2894}
2895
2896static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
2897{
2898 int n_threads = vc->num_threads;
2899 int sub;
2900
2901 if (!cpu_has_feature(CPU_FTR_ARCH_207S))
2902 return false;
2903
2904
2905 if (one_vm_per_core && vc->kvm != cip->vc[0]->kvm)
2906 return false;
2907
2908 if (n_threads < cip->max_subcore_threads)
2909 n_threads = cip->max_subcore_threads;
2910 if (!subcore_config_ok(cip->n_subcores + 1, n_threads))
2911 return false;
2912 cip->max_subcore_threads = n_threads;
2913
2914 sub = cip->n_subcores;
2915 ++cip->n_subcores;
2916 cip->total_threads += vc->num_threads;
2917 cip->subcore_threads[sub] = vc->num_threads;
2918 cip->vc[sub] = vc;
2919 init_vcore_to_run(vc);
2920 list_del_init(&vc->preempt_list);
2921
2922 return true;
2923}
2924
2925
2926
2927
2928
2929static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
2930 int target_threads)
2931{
2932 if (cip->total_threads + pvc->num_threads > target_threads)
2933 return false;
2934
2935 return can_dynamic_split(pvc, cip);
2936}
2937
2938static void prepare_threads(struct kvmppc_vcore *vc)
2939{
2940 int i;
2941 struct kvm_vcpu *vcpu;
2942
2943 for_each_runnable_thread(i, vcpu, vc) {
2944 if (signal_pending(vcpu->arch.run_task))
2945 vcpu->arch.ret = -EINTR;
2946 else if (no_mixing_hpt_and_radix &&
2947 kvm_is_radix(vc->kvm) != radix_enabled())
2948 vcpu->arch.ret = -EINVAL;
2949 else if (vcpu->arch.vpa.update_pending ||
2950 vcpu->arch.slb_shadow.update_pending ||
2951 vcpu->arch.dtl.update_pending)
2952 vcpu->arch.ret = RESUME_GUEST;
2953 else
2954 continue;
2955 kvmppc_remove_runnable(vc, vcpu);
2956 wake_up(&vcpu->arch.cpu_run);
2957 }
2958}
2959
2960static void collect_piggybacks(struct core_info *cip, int target_threads)
2961{
2962 struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
2963 struct kvmppc_vcore *pvc, *vcnext;
2964
2965 spin_lock(&lp->lock);
2966 list_for_each_entry_safe(pvc, vcnext, &lp->list, preempt_list) {
2967 if (!spin_trylock(&pvc->lock))
2968 continue;
2969 prepare_threads(pvc);
2970 if (!pvc->n_runnable || !pvc->kvm->arch.mmu_ready) {
2971 list_del_init(&pvc->preempt_list);
2972 if (pvc->runner == NULL) {
2973 pvc->vcore_state = VCORE_INACTIVE;
2974 kvmppc_core_end_stolen(pvc);
2975 }
2976 spin_unlock(&pvc->lock);
2977 continue;
2978 }
2979 if (!can_piggyback(pvc, cip, target_threads)) {
2980 spin_unlock(&pvc->lock);
2981 continue;
2982 }
2983 kvmppc_core_end_stolen(pvc);
2984 pvc->vcore_state = VCORE_PIGGYBACK;
2985 if (cip->total_threads >= target_threads)
2986 break;
2987 }
2988 spin_unlock(&lp->lock);
2989}
2990
2991static bool recheck_signals_and_mmu(struct core_info *cip)
2992{
2993 int sub, i;
2994 struct kvm_vcpu *vcpu;
2995 struct kvmppc_vcore *vc;
2996
2997 for (sub = 0; sub < cip->n_subcores; ++sub) {
2998 vc = cip->vc[sub];
2999 if (!vc->kvm->arch.mmu_ready)
3000 return true;
3001 for_each_runnable_thread(i, vcpu, vc)
3002 if (signal_pending(vcpu->arch.run_task))
3003 return true;
3004 }
3005 return false;
3006}
3007
3008static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
3009{
3010 int still_running = 0, i;
3011 u64 now;
3012 long ret;
3013 struct kvm_vcpu *vcpu;
3014
3015 spin_lock(&vc->lock);
3016 now = get_tb();
3017 for_each_runnable_thread(i, vcpu, vc) {
3018
3019
3020
3021
3022
3023
3024
3025 spin_unlock(&vc->lock);
3026
3027 if (now < vcpu->arch.dec_expires &&
3028 kvmppc_core_pending_dec(vcpu))
3029 kvmppc_core_dequeue_dec(vcpu);
3030
3031 trace_kvm_guest_exit(vcpu);
3032
3033 ret = RESUME_GUEST;
3034 if (vcpu->arch.trap)
3035 ret = kvmppc_handle_exit_hv(vcpu,
3036 vcpu->arch.run_task);
3037
3038 vcpu->arch.ret = ret;
3039 vcpu->arch.trap = 0;
3040
3041 spin_lock(&vc->lock);
3042 if (is_kvmppc_resume_guest(vcpu->arch.ret)) {
3043 if (vcpu->arch.pending_exceptions)
3044 kvmppc_core_prepare_to_enter(vcpu);
3045 if (vcpu->arch.ceded)
3046 kvmppc_set_timer(vcpu);
3047 else
3048 ++still_running;
3049 } else {
3050 kvmppc_remove_runnable(vc, vcpu);
3051 wake_up(&vcpu->arch.cpu_run);
3052 }
3053 }
3054 if (!is_master) {
3055 if (still_running > 0) {
3056 kvmppc_vcore_preempt(vc);
3057 } else if (vc->runner) {
3058 vc->vcore_state = VCORE_PREEMPT;
3059 kvmppc_core_start_stolen(vc);
3060 } else {
3061 vc->vcore_state = VCORE_INACTIVE;
3062 }
3063 if (vc->n_runnable > 0 && vc->runner == NULL) {
3064
3065 i = -1;
3066 vcpu = next_runnable_thread(vc, &i);
3067 wake_up(&vcpu->arch.cpu_run);
3068 }
3069 }
3070 spin_unlock(&vc->lock);
3071}
3072
3073
3074
3075
3076
3077
3078static inline int kvmppc_clear_host_core(unsigned int cpu)
3079{
3080 int core;
3081
3082 if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
3083 return 0;
3084
3085
3086
3087
3088
3089 core = cpu >> threads_shift;
3090 kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0;
3091 return 0;
3092}
3093
3094
3095
3096
3097
3098
3099static inline int kvmppc_set_host_core(unsigned int cpu)
3100{
3101 int core;
3102
3103 if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
3104 return 0;
3105
3106
3107
3108
3109
3110 core = cpu >> threads_shift;
3111 kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1;
3112 return 0;
3113}
3114
3115static void set_irq_happened(int trap)
3116{
3117 switch (trap) {
3118 case BOOK3S_INTERRUPT_EXTERNAL:
3119 local_paca->irq_happened |= PACA_IRQ_EE;
3120 break;
3121 case BOOK3S_INTERRUPT_H_DOORBELL:
3122 local_paca->irq_happened |= PACA_IRQ_DBELL;
3123 break;
3124 case BOOK3S_INTERRUPT_HMI:
3125 local_paca->irq_happened |= PACA_IRQ_HMI;
3126 break;
3127 case BOOK3S_INTERRUPT_SYSTEM_RESET:
3128 replay_system_reset();
3129 break;
3130 }
3131}
3132
3133
3134
3135
3136
3137static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
3138{
3139 struct kvm_vcpu *vcpu;
3140 int i;
3141 int srcu_idx;
3142 struct core_info core_info;
3143 struct kvmppc_vcore *pvc;
3144 struct kvm_split_mode split_info, *sip;
3145 int split, subcore_size, active;
3146 int sub;
3147 bool thr0_done;
3148 unsigned long cmd_bit, stat_bit;
3149 int pcpu, thr;
3150 int target_threads;
3151 int controlled_threads;
3152 int trap;
3153 bool is_power8;
3154
3155
3156
3157
3158
3159 prepare_threads(vc);
3160
3161
3162 if (vc->runner->arch.state != KVMPPC_VCPU_RUNNABLE)
3163 return;
3164
3165
3166
3167
3168 init_vcore_to_run(vc);
3169 vc->preempt_tb = TB_NIL;
3170
3171
3172
3173
3174
3175
3176 controlled_threads = threads_per_vcore(vc->kvm);
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186 if ((controlled_threads > 1) &&
3187 ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
3188 for_each_runnable_thread(i, vcpu, vc) {
3189 vcpu->arch.ret = -EBUSY;
3190 kvmppc_remove_runnable(vc, vcpu);
3191 wake_up(&vcpu->arch.cpu_run);
3192 }
3193 goto out;
3194 }
3195
3196
3197
3198
3199
3200 init_core_info(&core_info, vc);
3201 pcpu = smp_processor_id();
3202 target_threads = controlled_threads;
3203 if (target_smt_mode && target_smt_mode < target_threads)
3204 target_threads = target_smt_mode;
3205 if (vc->num_threads < target_threads)
3206 collect_piggybacks(&core_info, target_threads);
3207
3208
3209
3210
3211
3212
3213 pcpu = smp_processor_id();
3214 if (kvm_is_radix(vc->kvm)) {
3215 for (sub = 0; sub < core_info.n_subcores; ++sub)
3216 for_each_runnable_thread(i, vcpu, core_info.vc[sub])
3217 kvmppc_prepare_radix_vcpu(vcpu, pcpu);
3218 }
3219
3220
3221
3222
3223
3224
3225
3226
3227 local_irq_disable();
3228 hard_irq_disable();
3229 if (lazy_irq_pending() || need_resched() ||
3230 recheck_signals_and_mmu(&core_info)) {
3231 local_irq_enable();
3232 vc->vcore_state = VCORE_INACTIVE;
3233
3234 for (sub = 1; sub < core_info.n_subcores; ++sub) {
3235 pvc = core_info.vc[sub];
3236
3237 kvmppc_vcore_preempt(pvc);
3238 spin_unlock(&pvc->lock);
3239 }
3240 for (i = 0; i < controlled_threads; ++i)
3241 kvmppc_release_hwthread(pcpu + i);
3242 return;
3243 }
3244
3245 kvmppc_clear_host_core(pcpu);
3246
3247
3248 subcore_size = threads_per_subcore;
3249 cmd_bit = stat_bit = 0;
3250 split = core_info.n_subcores;
3251 sip = NULL;
3252 is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S)
3253 && !cpu_has_feature(CPU_FTR_ARCH_300);
3254
3255 if (split > 1) {
3256 sip = &split_info;
3257 memset(&split_info, 0, sizeof(split_info));
3258 for (sub = 0; sub < core_info.n_subcores; ++sub)
3259 split_info.vc[sub] = core_info.vc[sub];
3260
3261 if (is_power8) {
3262 if (split == 2 && (dynamic_mt_modes & 2)) {
3263 cmd_bit = HID0_POWER8_1TO2LPAR;
3264 stat_bit = HID0_POWER8_2LPARMODE;
3265 } else {
3266 split = 4;
3267 cmd_bit = HID0_POWER8_1TO4LPAR;
3268 stat_bit = HID0_POWER8_4LPARMODE;
3269 }
3270 subcore_size = MAX_SMT_THREADS / split;
3271 split_info.rpr = mfspr(SPRN_RPR);
3272 split_info.pmmar = mfspr(SPRN_PMMAR);
3273 split_info.ldbar = mfspr(SPRN_LDBAR);
3274 split_info.subcore_size = subcore_size;
3275 } else {
3276 split_info.subcore_size = 1;
3277 }
3278
3279
3280 smp_wmb();
3281 }
3282
3283 for (thr = 0; thr < controlled_threads; ++thr) {
3284 struct paca_struct *paca = paca_ptrs[pcpu + thr];
3285
3286 paca->kvm_hstate.napping = 0;
3287 paca->kvm_hstate.kvm_split_mode = sip;
3288 }
3289
3290
3291 if (cmd_bit) {
3292 unsigned long hid0 = mfspr(SPRN_HID0);
3293
3294 hid0 |= cmd_bit | HID0_POWER8_DYNLPARDIS;
3295 mb();
3296 mtspr(SPRN_HID0, hid0);
3297 isync();
3298 for (;;) {
3299 hid0 = mfspr(SPRN_HID0);
3300 if (hid0 & stat_bit)
3301 break;
3302 cpu_relax();
3303 }
3304 }
3305
3306
3307
3308
3309
3310
3311 if (is_power8) {
3312 unsigned long rwmr_val = RWMR_RPA_P8_8THREAD;
3313 int n_online = atomic_read(&vc->online_count);
3314
3315
3316
3317
3318
3319 if (split == 1 && threads_per_subcore == MAX_SMT_THREADS &&
3320 n_online >= 1 && n_online <= MAX_SMT_THREADS)
3321 rwmr_val = p8_rwmr_values[n_online];
3322 mtspr(SPRN_RWMR, rwmr_val);
3323 }
3324
3325
3326 active = 0;
3327 for (sub = 0; sub < core_info.n_subcores; ++sub) {
3328 thr = is_power8 ? subcore_thread_map[sub] : sub;
3329 thr0_done = false;
3330 active |= 1 << thr;
3331 pvc = core_info.vc[sub];
3332 pvc->pcpu = pcpu + thr;
3333 for_each_runnable_thread(i, vcpu, pvc) {
3334 kvmppc_start_thread(vcpu, pvc);
3335 kvmppc_create_dtl_entry(vcpu, pvc);
3336 trace_kvm_guest_enter(vcpu);
3337 if (!vcpu->arch.ptid)
3338 thr0_done = true;
3339 active |= 1 << (thr + vcpu->arch.ptid);
3340 }
3341
3342
3343
3344
3345 if (!thr0_done)
3346 kvmppc_start_thread(NULL, pvc);
3347 }
3348
3349
3350
3351
3352
3353 smp_mb();
3354
3355
3356
3357
3358
3359
3360 if (cmd_bit) {
3361 split_info.do_nap = 1;
3362 for (thr = 1; thr < threads_per_subcore; ++thr)
3363 if (!(active & (1 << thr)))
3364 kvmppc_ipi_thread(pcpu + thr);
3365 }
3366
3367 vc->vcore_state = VCORE_RUNNING;
3368 preempt_disable();
3369
3370 trace_kvmppc_run_core(vc, 0);
3371
3372 for (sub = 0; sub < core_info.n_subcores; ++sub)
3373 spin_unlock(&core_info.vc[sub]->lock);
3374
3375 guest_enter_irqoff();
3376
3377 srcu_idx = srcu_read_lock(&vc->kvm->srcu);
3378
3379 this_cpu_disable_ftrace();
3380
3381
3382
3383
3384
3385 trace_hardirqs_on();
3386
3387 trap = __kvmppc_vcore_entry();
3388
3389 trace_hardirqs_off();
3390
3391 this_cpu_enable_ftrace();
3392
3393 srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
3394
3395 set_irq_happened(trap);
3396
3397 spin_lock(&vc->lock);
3398
3399 vc->vcore_state = VCORE_EXITING;
3400
3401
3402 kvmppc_wait_for_nap(controlled_threads);
3403
3404
3405 if (cmd_bit) {
3406 unsigned long hid0 = mfspr(SPRN_HID0);
3407 unsigned long loops = 0;
3408
3409 hid0 &= ~HID0_POWER8_DYNLPARDIS;
3410 stat_bit = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
3411 mb();
3412 mtspr(SPRN_HID0, hid0);
3413 isync();
3414 for (;;) {
3415 hid0 = mfspr(SPRN_HID0);
3416 if (!(hid0 & stat_bit))
3417 break;
3418 cpu_relax();
3419 ++loops;
3420 }
3421 split_info.do_nap = 0;
3422 }
3423
3424 kvmppc_set_host_core(pcpu);
3425
3426 context_tracking_guest_exit();
3427 if (!vtime_accounting_enabled_this_cpu()) {
3428 local_irq_enable();
3429
3430
3431
3432
3433
3434
3435
3436
3437 local_irq_disable();
3438 }
3439 vtime_account_guest_exit();
3440
3441 local_irq_enable();
3442
3443
3444 for (i = 0; i < controlled_threads; ++i) {
3445 kvmppc_release_hwthread(pcpu + i);
3446 if (sip && sip->napped[i])
3447 kvmppc_ipi_thread(pcpu + i);
3448 cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
3449 }
3450
3451 spin_unlock(&vc->lock);
3452
3453
3454 smp_mb();
3455
3456 preempt_enable();
3457
3458 for (sub = 0; sub < core_info.n_subcores; ++sub) {
3459 pvc = core_info.vc[sub];
3460 post_guest_process(pvc, pvc == vc);
3461 }
3462
3463 spin_lock(&vc->lock);
3464
3465 out:
3466 vc->vcore_state = VCORE_INACTIVE;
3467 trace_kvmppc_run_core(vc, 1);
3468}
3469
3470
3471
3472
3473static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
3474 unsigned long lpcr)
3475{
3476 struct kvmppc_vcore *vc = vcpu->arch.vcore;
3477 s64 hdec;
3478 u64 tb, purr, spurr;
3479 int trap;
3480 unsigned long host_hfscr = mfspr(SPRN_HFSCR);
3481 unsigned long host_ciabr = mfspr(SPRN_CIABR);
3482 unsigned long host_dawr = mfspr(SPRN_DAWR0);
3483 unsigned long host_dawrx = mfspr(SPRN_DAWRX0);
3484 unsigned long host_psscr = mfspr(SPRN_PSSCR);
3485 unsigned long host_pidr = mfspr(SPRN_PID);
3486
3487
3488
3489
3490
3491 mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr | LPCR_HDICE);
3492 isync();
3493
3494 hdec = time_limit - mftb();
3495 if (hdec < 0) {
3496 mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr);
3497 isync();
3498 return BOOK3S_INTERRUPT_HV_DECREMENTER;
3499 }
3500 mtspr(SPRN_HDEC, hdec);
3501
3502 if (vc->tb_offset) {
3503 u64 new_tb = mftb() + vc->tb_offset;
3504 mtspr(SPRN_TBU40, new_tb);
3505 tb = mftb();
3506 if ((tb & 0xffffff) < (new_tb & 0xffffff))
3507 mtspr(SPRN_TBU40, new_tb + 0x1000000);
3508 vc->tb_offset_applied = vc->tb_offset;
3509 }
3510
3511 if (vc->pcr)
3512 mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
3513 mtspr(SPRN_DPDES, vc->dpdes);
3514 mtspr(SPRN_VTB, vc->vtb);
3515
3516 local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
3517 local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
3518 mtspr(SPRN_PURR, vcpu->arch.purr);
3519 mtspr(SPRN_SPURR, vcpu->arch.spurr);
3520
3521 if (dawr_enabled()) {
3522 mtspr(SPRN_DAWR0, vcpu->arch.dawr);
3523 mtspr(SPRN_DAWRX0, vcpu->arch.dawrx);
3524 }
3525 mtspr(SPRN_CIABR, vcpu->arch.ciabr);
3526 mtspr(SPRN_IC, vcpu->arch.ic);
3527 mtspr(SPRN_PID, vcpu->arch.pid);
3528
3529 mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
3530 (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
3531
3532 mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
3533
3534 mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
3535 mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
3536 mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
3537 mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
3538
3539 mtspr(SPRN_AMOR, ~0UL);
3540
3541 mtspr(SPRN_LPCR, lpcr);
3542 isync();
3543
3544 kvmppc_xive_push_vcpu(vcpu);
3545
3546 mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
3547 mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
3548
3549 trap = __kvmhv_vcpu_entry_p9(vcpu);
3550
3551
3552 purr = mfspr(SPRN_PURR);
3553 spurr = mfspr(SPRN_SPURR);
3554 mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
3555 purr - vcpu->arch.purr);
3556 mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
3557 spurr - vcpu->arch.spurr);
3558 vcpu->arch.purr = purr;
3559 vcpu->arch.spurr = spurr;
3560
3561 vcpu->arch.ic = mfspr(SPRN_IC);
3562 vcpu->arch.pid = mfspr(SPRN_PID);
3563 vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
3564
3565 vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
3566 vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
3567 vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
3568 vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
3569
3570
3571 mtspr(SPRN_PSSCR, host_psscr |
3572 (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
3573 mtspr(SPRN_HFSCR, host_hfscr);
3574 mtspr(SPRN_CIABR, host_ciabr);
3575 mtspr(SPRN_DAWR0, host_dawr);
3576 mtspr(SPRN_DAWRX0, host_dawrx);
3577 mtspr(SPRN_PID, host_pidr);
3578
3579
3580
3581
3582
3583 asm volatile("eieio; tlbsync; ptesync");
3584
3585
3586
3587
3588
3589 if (cpu_has_feature(CPU_FTR_ARCH_31))
3590 asm volatile(PPC_CP_ABORT);
3591
3592 mtspr(SPRN_LPID, vcpu->kvm->arch.host_lpid);
3593 isync();
3594
3595 vc->dpdes = mfspr(SPRN_DPDES);
3596 vc->vtb = mfspr(SPRN_VTB);
3597 mtspr(SPRN_DPDES, 0);
3598 if (vc->pcr)
3599 mtspr(SPRN_PCR, PCR_MASK);
3600
3601 if (vc->tb_offset_applied) {
3602 u64 new_tb = mftb() - vc->tb_offset_applied;
3603 mtspr(SPRN_TBU40, new_tb);
3604 tb = mftb();
3605 if ((tb & 0xffffff) < (new_tb & 0xffffff))
3606 mtspr(SPRN_TBU40, new_tb + 0x1000000);
3607 vc->tb_offset_applied = 0;
3608 }
3609
3610 mtspr(SPRN_HDEC, 0x7fffffff);
3611 mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr);
3612
3613 return trap;
3614}
3615
3616
3617
3618
3619
3620static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
3621 unsigned long lpcr)
3622{
3623 struct kvmppc_vcore *vc = vcpu->arch.vcore;
3624 unsigned long host_dscr = mfspr(SPRN_DSCR);
3625 unsigned long host_tidr = mfspr(SPRN_TIDR);
3626 unsigned long host_iamr = mfspr(SPRN_IAMR);
3627 unsigned long host_amr = mfspr(SPRN_AMR);
3628 unsigned long host_fscr = mfspr(SPRN_FSCR);
3629 s64 dec;
3630 u64 tb;
3631 int trap, save_pmu;
3632
3633 dec = mfspr(SPRN_DEC);
3634 tb = mftb();
3635 if (dec < 0)
3636 return BOOK3S_INTERRUPT_HV_DECREMENTER;
3637 local_paca->kvm_hstate.dec_expires = dec + tb;
3638 if (local_paca->kvm_hstate.dec_expires < time_limit)
3639 time_limit = local_paca->kvm_hstate.dec_expires;
3640
3641 vcpu->arch.ceded = 0;
3642
3643 kvmhv_save_host_pmu();
3644
3645 kvmppc_subcore_enter_guest();
3646
3647 vc->entry_exit_map = 1;
3648 vc->in_guest = 1;
3649
3650 if (vcpu->arch.vpa.pinned_addr) {
3651 struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
3652 u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
3653 lp->yield_count = cpu_to_be32(yield_count);
3654 vcpu->arch.vpa.dirty = 1;
3655 }
3656
3657 if (cpu_has_feature(CPU_FTR_TM) ||
3658 cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
3659 kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
3660
3661 kvmhv_load_guest_pmu(vcpu);
3662
3663 msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
3664 load_fp_state(&vcpu->arch.fp);
3665#ifdef CONFIG_ALTIVEC
3666 load_vr_state(&vcpu->arch.vr);
3667#endif
3668 mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
3669
3670 mtspr(SPRN_DSCR, vcpu->arch.dscr);
3671 mtspr(SPRN_IAMR, vcpu->arch.iamr);
3672 mtspr(SPRN_PSPB, vcpu->arch.pspb);
3673 mtspr(SPRN_FSCR, vcpu->arch.fscr);
3674 mtspr(SPRN_TAR, vcpu->arch.tar);
3675 mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
3676 mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
3677 mtspr(SPRN_BESCR, vcpu->arch.bescr);
3678 mtspr(SPRN_WORT, vcpu->arch.wort);
3679 mtspr(SPRN_TIDR, vcpu->arch.tid);
3680 mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
3681 mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
3682 mtspr(SPRN_AMR, vcpu->arch.amr);
3683 mtspr(SPRN_UAMOR, vcpu->arch.uamor);
3684
3685 if (!(vcpu->arch.ctrl & 1))
3686 mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
3687
3688 mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
3689
3690 if (kvmhv_on_pseries()) {
3691
3692
3693
3694
3695
3696
3697 unsigned long host_psscr;
3698
3699 struct hv_guest_state hvregs;
3700
3701 host_psscr = mfspr(SPRN_PSSCR_PR);
3702 mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
3703 kvmhv_save_hv_regs(vcpu, &hvregs);
3704 hvregs.lpcr = lpcr;
3705 vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
3706 hvregs.version = HV_GUEST_STATE_VERSION;
3707 if (vcpu->arch.nested) {
3708 hvregs.lpid = vcpu->arch.nested->shadow_lpid;
3709 hvregs.vcpu_token = vcpu->arch.nested_vcpu_id;
3710 } else {
3711 hvregs.lpid = vcpu->kvm->arch.lpid;
3712 hvregs.vcpu_token = vcpu->vcpu_id;
3713 }
3714 hvregs.hdec_expiry = time_limit;
3715 trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
3716 __pa(&vcpu->arch.regs));
3717 kvmhv_restore_hv_return_state(vcpu, &hvregs);
3718 vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
3719 vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
3720 vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
3721 vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
3722 mtspr(SPRN_PSSCR_PR, host_psscr);
3723
3724
3725 if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
3726 kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
3727 kvmppc_nested_cede(vcpu);
3728 kvmppc_set_gpr(vcpu, 3, 0);
3729 trap = 0;
3730 }
3731 } else {
3732 trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
3733 }
3734
3735 vcpu->arch.slb_max = 0;
3736 dec = mfspr(SPRN_DEC);
3737 if (!(lpcr & LPCR_LD))
3738 dec = (s32) dec;
3739 tb = mftb();
3740 vcpu->arch.dec_expires = dec + tb;
3741 vcpu->cpu = -1;
3742 vcpu->arch.thread_cpu = -1;
3743
3744 vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
3745 if (!(vcpu->arch.ctrl & 1))
3746 mtspr(SPRN_CTRLT, vcpu->arch.ctrl | 1);
3747
3748 vcpu->arch.iamr = mfspr(SPRN_IAMR);
3749 vcpu->arch.pspb = mfspr(SPRN_PSPB);
3750 vcpu->arch.fscr = mfspr(SPRN_FSCR);
3751 vcpu->arch.tar = mfspr(SPRN_TAR);
3752 vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
3753 vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
3754 vcpu->arch.bescr = mfspr(SPRN_BESCR);
3755 vcpu->arch.wort = mfspr(SPRN_WORT);
3756 vcpu->arch.tid = mfspr(SPRN_TIDR);
3757 vcpu->arch.amr = mfspr(SPRN_AMR);
3758 vcpu->arch.uamor = mfspr(SPRN_UAMOR);
3759 vcpu->arch.dscr = mfspr(SPRN_DSCR);
3760
3761 mtspr(SPRN_PSPB, 0);
3762 mtspr(SPRN_WORT, 0);
3763 mtspr(SPRN_UAMOR, 0);
3764 mtspr(SPRN_DSCR, host_dscr);
3765 mtspr(SPRN_TIDR, host_tidr);
3766 mtspr(SPRN_IAMR, host_iamr);
3767
3768 if (host_amr != vcpu->arch.amr)
3769 mtspr(SPRN_AMR, host_amr);
3770
3771 if (host_fscr != vcpu->arch.fscr)
3772 mtspr(SPRN_FSCR, host_fscr);
3773
3774 msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
3775 store_fp_state(&vcpu->arch.fp);
3776#ifdef CONFIG_ALTIVEC
3777 store_vr_state(&vcpu->arch.vr);
3778#endif
3779 vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
3780
3781 if (cpu_has_feature(CPU_FTR_TM) ||
3782 cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
3783 kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
3784
3785 save_pmu = 1;
3786 if (vcpu->arch.vpa.pinned_addr) {
3787 struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
3788 u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
3789 lp->yield_count = cpu_to_be32(yield_count);
3790 vcpu->arch.vpa.dirty = 1;
3791 save_pmu = lp->pmcregs_in_use;
3792 }
3793
3794 save_pmu |= nesting_enabled(vcpu->kvm);
3795
3796 kvmhv_save_guest_pmu(vcpu, save_pmu);
3797
3798 vc->entry_exit_map = 0x101;
3799 vc->in_guest = 0;
3800
3801 mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb());
3802 mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
3803
3804 kvmhv_load_host_pmu();
3805
3806 kvmppc_subcore_exit_guest();
3807
3808 return trap;
3809}
3810
3811
3812
3813
3814
3815static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
3816 struct kvm_vcpu *vcpu, int wait_state)
3817{
3818 DEFINE_WAIT(wait);
3819
3820 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
3821 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
3822 spin_unlock(&vc->lock);
3823 schedule();
3824 spin_lock(&vc->lock);
3825 }
3826 finish_wait(&vcpu->arch.cpu_run, &wait);
3827}
3828
3829static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
3830{
3831 if (!halt_poll_ns_grow)
3832 return;
3833
3834 vc->halt_poll_ns *= halt_poll_ns_grow;
3835 if (vc->halt_poll_ns < halt_poll_ns_grow_start)
3836 vc->halt_poll_ns = halt_poll_ns_grow_start;
3837}
3838
3839static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
3840{
3841 if (halt_poll_ns_shrink == 0)
3842 vc->halt_poll_ns = 0;
3843 else
3844 vc->halt_poll_ns /= halt_poll_ns_shrink;
3845}
3846
3847#ifdef CONFIG_KVM_XICS
3848static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
3849{
3850 if (!xics_on_xive())
3851 return false;
3852 return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr <
3853 vcpu->arch.xive_saved_state.cppr;
3854}
3855#else
3856static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
3857{
3858 return false;
3859}
3860#endif
3861
3862static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu)
3863{
3864 if (vcpu->arch.pending_exceptions || vcpu->arch.prodded ||
3865 kvmppc_doorbell_pending(vcpu) || xive_interrupt_pending(vcpu))
3866 return true;
3867
3868 return false;
3869}
3870
3871
3872
3873
3874
3875static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
3876{
3877 struct kvm_vcpu *vcpu;
3878 int i;
3879
3880 for_each_runnable_thread(i, vcpu, vc) {
3881 if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu))
3882 return 1;
3883 }
3884
3885 return 0;
3886}
3887
3888
3889
3890
3891
3892static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
3893{
3894 ktime_t cur, start_poll, start_wait;
3895 int do_sleep = 1;
3896 u64 block_ns;
3897
3898
3899 cur = start_poll = ktime_get();
3900 if (vc->halt_poll_ns) {
3901 ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns);
3902 ++vc->runner->stat.generic.halt_attempted_poll;
3903
3904 vc->vcore_state = VCORE_POLLING;
3905 spin_unlock(&vc->lock);
3906
3907 do {
3908 if (kvmppc_vcore_check_block(vc)) {
3909 do_sleep = 0;
3910 break;
3911 }
3912 cur = ktime_get();
3913 } while (kvm_vcpu_can_poll(cur, stop));
3914
3915 spin_lock(&vc->lock);
3916 vc->vcore_state = VCORE_INACTIVE;
3917
3918 if (!do_sleep) {
3919 ++vc->runner->stat.generic.halt_successful_poll;
3920 goto out;
3921 }
3922 }
3923
3924 prepare_to_rcuwait(&vc->wait);
3925 set_current_state(TASK_INTERRUPTIBLE);
3926 if (kvmppc_vcore_check_block(vc)) {
3927 finish_rcuwait(&vc->wait);
3928 do_sleep = 0;
3929
3930 if (vc->halt_poll_ns)
3931 ++vc->runner->stat.generic.halt_successful_poll;
3932 goto out;
3933 }
3934
3935 start_wait = ktime_get();
3936
3937 vc->vcore_state = VCORE_SLEEPING;
3938 trace_kvmppc_vcore_blocked(vc, 0);
3939 spin_unlock(&vc->lock);
3940 schedule();
3941 finish_rcuwait(&vc->wait);
3942 spin_lock(&vc->lock);
3943 vc->vcore_state = VCORE_INACTIVE;
3944 trace_kvmppc_vcore_blocked(vc, 1);
3945 ++vc->runner->stat.halt_successful_wait;
3946
3947 cur = ktime_get();
3948
3949out:
3950 block_ns = ktime_to_ns(cur) - ktime_to_ns(start_poll);
3951
3952
3953 if (do_sleep) {
3954 vc->runner->stat.generic.halt_wait_ns +=
3955 ktime_to_ns(cur) - ktime_to_ns(start_wait);
3956 KVM_STATS_LOG_HIST_UPDATE(
3957 vc->runner->stat.generic.halt_wait_hist,
3958 ktime_to_ns(cur) - ktime_to_ns(start_wait));
3959
3960 if (vc->halt_poll_ns) {
3961 vc->runner->stat.generic.halt_poll_fail_ns +=
3962 ktime_to_ns(start_wait) -
3963 ktime_to_ns(start_poll);
3964 KVM_STATS_LOG_HIST_UPDATE(
3965 vc->runner->stat.generic.halt_poll_fail_hist,
3966 ktime_to_ns(start_wait) -
3967 ktime_to_ns(start_poll));
3968 }
3969 } else {
3970
3971 if (vc->halt_poll_ns) {
3972 vc->runner->stat.generic.halt_poll_success_ns +=
3973 ktime_to_ns(cur) -
3974 ktime_to_ns(start_poll);
3975 KVM_STATS_LOG_HIST_UPDATE(
3976 vc->runner->stat.generic.halt_poll_success_hist,
3977 ktime_to_ns(cur) - ktime_to_ns(start_poll));
3978 }
3979 }
3980
3981
3982 if (halt_poll_ns) {
3983 if (block_ns <= vc->halt_poll_ns)
3984 ;
3985
3986 else if (vc->halt_poll_ns && block_ns > halt_poll_ns)
3987 shrink_halt_poll_ns(vc);
3988
3989 else if (vc->halt_poll_ns < halt_poll_ns &&
3990 block_ns < halt_poll_ns)
3991 grow_halt_poll_ns(vc);
3992 if (vc->halt_poll_ns > halt_poll_ns)
3993 vc->halt_poll_ns = halt_poll_ns;
3994 } else
3995 vc->halt_poll_ns = 0;
3996
3997 trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
3998}
3999
4000
4001
4002
4003
4004
4005static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu)
4006{
4007 int r = 0;
4008 struct kvm *kvm = vcpu->kvm;
4009
4010 mutex_lock(&kvm->arch.mmu_setup_lock);
4011 if (!kvm->arch.mmu_ready) {
4012 if (!kvm_is_radix(kvm))
4013 r = kvmppc_hv_setup_htab_rma(vcpu);
4014 if (!r) {
4015 if (cpu_has_feature(CPU_FTR_ARCH_300))
4016 kvmppc_setup_partition_table(kvm);
4017 kvm->arch.mmu_ready = 1;
4018 }
4019 }
4020 mutex_unlock(&kvm->arch.mmu_setup_lock);
4021 return r;
4022}
4023
4024static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu)
4025{
4026 struct kvm_run *run = vcpu->run;
4027 int n_ceded, i, r;
4028 struct kvmppc_vcore *vc;
4029 struct kvm_vcpu *v;
4030
4031 trace_kvmppc_run_vcpu_enter(vcpu);
4032
4033 run->exit_reason = 0;
4034 vcpu->arch.ret = RESUME_GUEST;
4035 vcpu->arch.trap = 0;
4036 kvmppc_update_vpas(vcpu);
4037
4038
4039
4040
4041 vc = vcpu->arch.vcore;
4042 spin_lock(&vc->lock);
4043 vcpu->arch.ceded = 0;
4044 vcpu->arch.run_task = current;
4045 vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
4046 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
4047 vcpu->arch.busy_preempt = TB_NIL;
4048 WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], vcpu);
4049 ++vc->n_runnable;
4050
4051
4052
4053
4054
4055
4056 if (!signal_pending(current)) {
4057 if ((vc->vcore_state == VCORE_PIGGYBACK ||
4058 vc->vcore_state == VCORE_RUNNING) &&
4059 !VCORE_IS_EXITING(vc)) {
4060 kvmppc_create_dtl_entry(vcpu, vc);
4061 kvmppc_start_thread(vcpu, vc);
4062 trace_kvm_guest_enter(vcpu);
4063 } else if (vc->vcore_state == VCORE_SLEEPING) {
4064 rcuwait_wake_up(&vc->wait);
4065 }
4066
4067 }
4068
4069 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
4070 !signal_pending(current)) {
4071
4072 if (!vcpu->kvm->arch.mmu_ready) {
4073 spin_unlock(&vc->lock);
4074 r = kvmhv_setup_mmu(vcpu);
4075 spin_lock(&vc->lock);
4076 if (r) {
4077 run->exit_reason = KVM_EXIT_FAIL_ENTRY;
4078 run->fail_entry.
4079 hardware_entry_failure_reason = 0;
4080 vcpu->arch.ret = r;
4081 break;
4082 }
4083 }
4084
4085 if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
4086 kvmppc_vcore_end_preempt(vc);
4087
4088 if (vc->vcore_state != VCORE_INACTIVE) {
4089 kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE);
4090 continue;
4091 }
4092 for_each_runnable_thread(i, v, vc) {
4093 kvmppc_core_prepare_to_enter(v);
4094 if (signal_pending(v->arch.run_task)) {
4095 kvmppc_remove_runnable(vc, v);
4096 v->stat.signal_exits++;
4097 v->run->exit_reason = KVM_EXIT_INTR;
4098 v->arch.ret = -EINTR;
4099 wake_up(&v->arch.cpu_run);
4100 }
4101 }
4102 if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
4103 break;
4104 n_ceded = 0;
4105 for_each_runnable_thread(i, v, vc) {
4106 if (!kvmppc_vcpu_woken(v))
4107 n_ceded += v->arch.ceded;
4108 else
4109 v->arch.ceded = 0;
4110 }
4111 vc->runner = vcpu;
4112 if (n_ceded == vc->n_runnable) {
4113 kvmppc_vcore_blocked(vc);
4114 } else if (need_resched()) {
4115 kvmppc_vcore_preempt(vc);
4116
4117 cond_resched_lock(&vc->lock);
4118 if (vc->vcore_state == VCORE_PREEMPT)
4119 kvmppc_vcore_end_preempt(vc);
4120 } else {
4121 kvmppc_run_core(vc);
4122 }
4123 vc->runner = NULL;
4124 }
4125
4126 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
4127 (vc->vcore_state == VCORE_RUNNING ||
4128 vc->vcore_state == VCORE_EXITING ||
4129 vc->vcore_state == VCORE_PIGGYBACK))
4130 kvmppc_wait_for_exec(vc, vcpu, TASK_UNINTERRUPTIBLE);
4131
4132 if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
4133 kvmppc_vcore_end_preempt(vc);
4134
4135 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
4136 kvmppc_remove_runnable(vc, vcpu);
4137 vcpu->stat.signal_exits++;
4138 run->exit_reason = KVM_EXIT_INTR;
4139 vcpu->arch.ret = -EINTR;
4140 }
4141
4142 if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) {
4143
4144 i = -1;
4145 v = next_runnable_thread(vc, &i);
4146 wake_up(&v->arch.cpu_run);
4147 }
4148
4149 trace_kvmppc_run_vcpu_exit(vcpu);
4150 spin_unlock(&vc->lock);
4151 return vcpu->arch.ret;
4152}
4153
4154int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
4155 unsigned long lpcr)
4156{
4157 struct kvm_run *run = vcpu->run;
4158 int trap, r, pcpu;
4159 int srcu_idx, lpid;
4160 struct kvmppc_vcore *vc;
4161 struct kvm *kvm = vcpu->kvm;
4162 struct kvm_nested_guest *nested = vcpu->arch.nested;
4163
4164 trace_kvmppc_run_vcpu_enter(vcpu);
4165
4166 run->exit_reason = 0;
4167 vcpu->arch.ret = RESUME_GUEST;
4168 vcpu->arch.trap = 0;
4169
4170 vc = vcpu->arch.vcore;
4171 vcpu->arch.ceded = 0;
4172 vcpu->arch.run_task = current;
4173 vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
4174 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
4175 vcpu->arch.busy_preempt = TB_NIL;
4176 vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
4177 vc->runnable_threads[0] = vcpu;
4178 vc->n_runnable = 1;
4179 vc->runner = vcpu;
4180
4181
4182 if (!kvm->arch.mmu_ready)
4183 kvmhv_setup_mmu(vcpu);
4184
4185 if (need_resched())
4186 cond_resched();
4187
4188 kvmppc_update_vpas(vcpu);
4189
4190 init_vcore_to_run(vc);
4191 vc->preempt_tb = TB_NIL;
4192
4193 preempt_disable();
4194 pcpu = smp_processor_id();
4195 vc->pcpu = pcpu;
4196 kvmppc_prepare_radix_vcpu(vcpu, pcpu);
4197
4198 local_irq_disable();
4199 hard_irq_disable();
4200 if (signal_pending(current))
4201 goto sigpend;
4202 if (lazy_irq_pending() || need_resched() || !kvm->arch.mmu_ready)
4203 goto out;
4204
4205 if (!nested) {
4206 kvmppc_core_prepare_to_enter(vcpu);
4207 if (vcpu->arch.doorbell_request) {
4208 vc->dpdes = 1;
4209 smp_wmb();
4210 vcpu->arch.doorbell_request = 0;
4211 }
4212 if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
4213 &vcpu->arch.pending_exceptions))
4214 lpcr |= LPCR_MER;
4215 } else if (vcpu->arch.pending_exceptions ||
4216 vcpu->arch.doorbell_request ||
4217 xive_interrupt_pending(vcpu)) {
4218 vcpu->arch.ret = RESUME_HOST;
4219 goto out;
4220 }
4221
4222 kvmppc_clear_host_core(pcpu);
4223
4224 local_paca->kvm_hstate.napping = 0;
4225 local_paca->kvm_hstate.kvm_split_mode = NULL;
4226 kvmppc_start_thread(vcpu, vc);
4227 kvmppc_create_dtl_entry(vcpu, vc);
4228 trace_kvm_guest_enter(vcpu);
4229
4230 vc->vcore_state = VCORE_RUNNING;
4231 trace_kvmppc_run_core(vc, 0);
4232
4233 if (cpu_has_feature(CPU_FTR_HVMODE)) {
4234 lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
4235 mtspr(SPRN_LPID, lpid);
4236 isync();
4237 kvmppc_check_need_tlb_flush(kvm, pcpu, nested);
4238 }
4239
4240 guest_enter_irqoff();
4241
4242 srcu_idx = srcu_read_lock(&kvm->srcu);
4243
4244 this_cpu_disable_ftrace();
4245
4246
4247 trace_hardirqs_on();
4248
4249 trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr);
4250 vcpu->arch.trap = trap;
4251
4252 trace_hardirqs_off();
4253
4254 this_cpu_enable_ftrace();
4255
4256 srcu_read_unlock(&kvm->srcu, srcu_idx);
4257
4258 if (cpu_has_feature(CPU_FTR_HVMODE)) {
4259 mtspr(SPRN_LPID, kvm->arch.host_lpid);
4260 isync();
4261 }
4262
4263 set_irq_happened(trap);
4264
4265 kvmppc_set_host_core(pcpu);
4266
4267 context_tracking_guest_exit();
4268 if (!vtime_accounting_enabled_this_cpu()) {
4269 local_irq_enable();
4270
4271
4272
4273
4274
4275
4276
4277
4278 local_irq_disable();
4279 }
4280 vtime_account_guest_exit();
4281
4282 local_irq_enable();
4283
4284 cpumask_clear_cpu(pcpu, &kvm->arch.cpu_in_guest);
4285
4286 preempt_enable();
4287
4288
4289
4290
4291
4292
4293 if (kvmppc_core_pending_dec(vcpu) &&
4294 ((get_tb() < vcpu->arch.dec_expires) ||
4295 (trap == BOOK3S_INTERRUPT_SYSCALL &&
4296 kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED)))
4297 kvmppc_core_dequeue_dec(vcpu);
4298
4299 trace_kvm_guest_exit(vcpu);
4300 r = RESUME_GUEST;
4301 if (trap) {
4302 if (!nested)
4303 r = kvmppc_handle_exit_hv(vcpu, current);
4304 else
4305 r = kvmppc_handle_nested_exit(vcpu);
4306 }
4307 vcpu->arch.ret = r;
4308
4309 if (is_kvmppc_resume_guest(r) && vcpu->arch.ceded &&
4310 !kvmppc_vcpu_woken(vcpu)) {
4311 kvmppc_set_timer(vcpu);
4312 while (vcpu->arch.ceded && !kvmppc_vcpu_woken(vcpu)) {
4313 if (signal_pending(current)) {
4314 vcpu->stat.signal_exits++;
4315 run->exit_reason = KVM_EXIT_INTR;
4316 vcpu->arch.ret = -EINTR;
4317 break;
4318 }
4319 spin_lock(&vc->lock);
4320 kvmppc_vcore_blocked(vc);
4321 spin_unlock(&vc->lock);
4322 }
4323 }
4324 vcpu->arch.ceded = 0;
4325
4326 vc->vcore_state = VCORE_INACTIVE;
4327 trace_kvmppc_run_core(vc, 1);
4328
4329 done:
4330 kvmppc_remove_runnable(vc, vcpu);
4331 trace_kvmppc_run_vcpu_exit(vcpu);
4332
4333 return vcpu->arch.ret;
4334
4335 sigpend:
4336 vcpu->stat.signal_exits++;
4337 run->exit_reason = KVM_EXIT_INTR;
4338 vcpu->arch.ret = -EINTR;
4339 out:
4340 local_irq_enable();
4341 preempt_enable();
4342 goto done;
4343}
4344
4345static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
4346{
4347 struct kvm_run *run = vcpu->run;
4348 int r;
4349 int srcu_idx;
4350 unsigned long ebb_regs[3] = {};
4351 unsigned long user_tar = 0;
4352 unsigned int user_vrsave;
4353 struct kvm *kvm;
4354
4355 if (!vcpu->arch.sane) {
4356 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
4357 return -EINVAL;
4358 }
4359
4360
4361
4362
4363
4364
4365
4366#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
4367 if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
4368 (current->thread.regs->msr & MSR_TM)) {
4369 if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
4370 run->exit_reason = KVM_EXIT_FAIL_ENTRY;
4371 run->fail_entry.hardware_entry_failure_reason = 0;
4372 return -EINVAL;
4373 }
4374
4375 mtmsr(mfmsr() | MSR_TM);
4376 current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
4377 current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
4378 current->thread.tm_texasr = mfspr(SPRN_TEXASR);
4379 current->thread.regs->msr &= ~MSR_TM;
4380 }
4381#endif
4382
4383
4384
4385
4386
4387 if (!vcpu->arch.online) {
4388 atomic_inc(&vcpu->arch.vcore->online_count);
4389 vcpu->arch.online = 1;
4390 }
4391
4392 kvmppc_core_prepare_to_enter(vcpu);
4393
4394
4395 if (signal_pending(current)) {
4396 run->exit_reason = KVM_EXIT_INTR;
4397 return -EINTR;
4398 }
4399
4400 kvm = vcpu->kvm;
4401 atomic_inc(&kvm->arch.vcpus_running);
4402
4403 smp_mb();
4404
4405 flush_all_to_thread(current);
4406
4407
4408 if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
4409 ebb_regs[0] = mfspr(SPRN_EBBHR);
4410 ebb_regs[1] = mfspr(SPRN_EBBRR);
4411 ebb_regs[2] = mfspr(SPRN_BESCR);
4412 user_tar = mfspr(SPRN_TAR);
4413 }
4414 user_vrsave = mfspr(SPRN_VRSAVE);
4415
4416 vcpu->arch.waitp = &vcpu->arch.vcore->wait;
4417 vcpu->arch.pgdir = kvm->mm->pgd;
4418 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
4419
4420 do {
4421
4422
4423
4424
4425 if (kvm->arch.threads_indep && kvm_is_radix(kvm) &&
4426 !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
4427 r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
4428 vcpu->arch.vcore->lpcr);
4429 else
4430 r = kvmppc_run_vcpu(vcpu);
4431
4432 if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
4433 !(vcpu->arch.shregs.msr & MSR_PR)) {
4434 trace_kvm_hcall_enter(vcpu);
4435 r = kvmppc_pseries_do_hcall(vcpu);
4436 trace_kvm_hcall_exit(vcpu, r);
4437 kvmppc_core_prepare_to_enter(vcpu);
4438 } else if (r == RESUME_PAGE_FAULT) {
4439 srcu_idx = srcu_read_lock(&kvm->srcu);
4440 r = kvmppc_book3s_hv_page_fault(vcpu,
4441 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
4442 srcu_read_unlock(&kvm->srcu, srcu_idx);
4443 } else if (r == RESUME_PASSTHROUGH) {
4444 if (WARN_ON(xics_on_xive()))
4445 r = H_SUCCESS;
4446 else
4447 r = kvmppc_xics_rm_complete(vcpu, 0);
4448 }
4449 } while (is_kvmppc_resume_guest(r));
4450
4451
4452 if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
4453 mtspr(SPRN_EBBHR, ebb_regs[0]);
4454 mtspr(SPRN_EBBRR, ebb_regs[1]);
4455 mtspr(SPRN_BESCR, ebb_regs[2]);
4456 mtspr(SPRN_TAR, user_tar);
4457 mtspr(SPRN_FSCR, current->thread.fscr);
4458 }
4459 mtspr(SPRN_VRSAVE, user_vrsave);
4460
4461 vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
4462 atomic_dec(&kvm->arch.vcpus_running);
4463 return r;
4464}
4465
4466static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
4467 int shift, int sllp)
4468{
4469 (*sps)->page_shift = shift;
4470 (*sps)->slb_enc = sllp;
4471 (*sps)->enc[0].page_shift = shift;
4472 (*sps)->enc[0].pte_enc = kvmppc_pgsize_lp_encoding(shift, shift);
4473
4474
4475
4476 if (shift != 24) {
4477 int penc = kvmppc_pgsize_lp_encoding(shift, 24);
4478 if (penc != -1) {
4479 (*sps)->enc[1].page_shift = 24;
4480 (*sps)->enc[1].pte_enc = penc;
4481 }
4482 }
4483 (*sps)++;
4484}
4485
4486static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
4487 struct kvm_ppc_smmu_info *info)
4488{
4489 struct kvm_ppc_one_seg_page_size *sps;
4490
4491
4492
4493
4494
4495
4496 info->data_keys = 32;
4497 info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0;
4498
4499
4500 info->flags = KVM_PPC_PAGE_SIZES_REAL | KVM_PPC_1T_SEGMENTS;
4501 info->slb_size = 32;
4502
4503
4504 sps = &info->sps[0];
4505 kvmppc_add_seg_page_size(&sps, 12, 0);
4506 kvmppc_add_seg_page_size(&sps, 16, SLB_VSID_L | SLB_VSID_LP_01);
4507 kvmppc_add_seg_page_size(&sps, 24, SLB_VSID_L);
4508
4509
4510 if (kvmhv_on_pseries())
4511 info->flags |= KVM_PPC_NO_HASH;
4512
4513 return 0;
4514}
4515
4516
4517
4518
4519static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
4520 struct kvm_dirty_log *log)
4521{
4522 struct kvm_memslots *slots;
4523 struct kvm_memory_slot *memslot;
4524 int i, r;
4525 unsigned long n;
4526 unsigned long *buf, *p;
4527 struct kvm_vcpu *vcpu;
4528
4529 mutex_lock(&kvm->slots_lock);
4530
4531 r = -EINVAL;
4532 if (log->slot >= KVM_USER_MEM_SLOTS)
4533 goto out;
4534
4535 slots = kvm_memslots(kvm);
4536 memslot = id_to_memslot(slots, log->slot);
4537 r = -ENOENT;
4538 if (!memslot || !memslot->dirty_bitmap)
4539 goto out;
4540
4541
4542
4543
4544
4545 n = kvm_dirty_bitmap_bytes(memslot);
4546 buf = memslot->dirty_bitmap + n / sizeof(long);
4547 memset(buf, 0, n);
4548
4549 if (kvm_is_radix(kvm))
4550 r = kvmppc_hv_get_dirty_log_radix(kvm, memslot, buf);
4551 else
4552 r = kvmppc_hv_get_dirty_log_hpt(kvm, memslot, buf);
4553 if (r)
4554 goto out;
4555
4556
4557
4558
4559
4560
4561
4562 p = memslot->dirty_bitmap;
4563 for (i = 0; i < n / sizeof(long); ++i)
4564 buf[i] |= xchg(&p[i], 0);
4565
4566
4567
4568 kvm_for_each_vcpu(i, vcpu, kvm) {
4569 spin_lock(&vcpu->arch.vpa_update_lock);
4570 kvmppc_harvest_vpa_dirty(&vcpu->arch.vpa, memslot, buf);
4571 kvmppc_harvest_vpa_dirty(&vcpu->arch.dtl, memslot, buf);
4572 spin_unlock(&vcpu->arch.vpa_update_lock);
4573 }
4574
4575 r = -EFAULT;
4576 if (copy_to_user(log->dirty_bitmap, buf, n))
4577 goto out;
4578
4579 r = 0;
4580out:
4581 mutex_unlock(&kvm->slots_lock);
4582 return r;
4583}
4584
4585static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *slot)
4586{
4587 vfree(slot->arch.rmap);
4588 slot->arch.rmap = NULL;
4589}
4590
4591static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
4592 struct kvm_memory_slot *slot,
4593 const struct kvm_userspace_memory_region *mem,
4594 enum kvm_mr_change change)
4595{
4596 unsigned long npages = mem->memory_size >> PAGE_SHIFT;
4597
4598 if (change == KVM_MR_CREATE) {
4599 slot->arch.rmap = vzalloc(array_size(npages,
4600 sizeof(*slot->arch.rmap)));
4601 if (!slot->arch.rmap)
4602 return -ENOMEM;
4603 }
4604
4605 return 0;
4606}
4607
4608static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
4609 const struct kvm_userspace_memory_region *mem,
4610 const struct kvm_memory_slot *old,
4611 const struct kvm_memory_slot *new,
4612 enum kvm_mr_change change)
4613{
4614 unsigned long npages = mem->memory_size >> PAGE_SHIFT;
4615
4616
4617
4618
4619
4620
4621
4622 if (npages)
4623 atomic64_inc(&kvm->arch.mmio_update);
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638 if (change == KVM_MR_FLAGS_ONLY && kvm_is_radix(kvm) &&
4639 ((new->flags ^ old->flags) & KVM_MEM_LOG_DIRTY_PAGES))
4640 kvmppc_radix_flush_memslot(kvm, old);
4641
4642
4643
4644 if (!kvm->arch.secure_guest)
4645 return;
4646
4647 switch (change) {
4648 case KVM_MR_CREATE:
4649
4650
4651
4652
4653 kvmppc_uvmem_memslot_create(kvm, new);
4654 break;
4655 case KVM_MR_DELETE:
4656 kvmppc_uvmem_memslot_delete(kvm, old);
4657 break;
4658 default:
4659
4660 break;
4661 }
4662}
4663
4664
4665
4666
4667
4668
4669void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
4670{
4671 long int i;
4672 u32 cores_done = 0;
4673
4674 if ((kvm->arch.lpcr & mask) == lpcr)
4675 return;
4676
4677 kvm->arch.lpcr = (kvm->arch.lpcr & ~mask) | lpcr;
4678
4679 for (i = 0; i < KVM_MAX_VCORES; ++i) {
4680 struct kvmppc_vcore *vc = kvm->arch.vcores[i];
4681 if (!vc)
4682 continue;
4683
4684 spin_lock(&vc->lock);
4685 vc->lpcr = (vc->lpcr & ~mask) | lpcr;
4686 verify_lpcr(kvm, vc->lpcr);
4687 spin_unlock(&vc->lock);
4688 if (++cores_done >= kvm->arch.online_vcores)
4689 break;
4690 }
4691}
4692
4693void kvmppc_setup_partition_table(struct kvm *kvm)
4694{
4695 unsigned long dw0, dw1;
4696
4697 if (!kvm_is_radix(kvm)) {
4698
4699 dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) |
4700 ((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1);
4701
4702 dw0 |= kvm->arch.sdr1;
4703
4704
4705 dw1 = kvm->arch.process_table;
4706 } else {
4707 dw0 = PATB_HR | radix__get_tree_size() |
4708 __pa(kvm->arch.pgtable) | RADIX_PGD_INDEX_SIZE;
4709 dw1 = PATB_GR | kvm->arch.process_table;
4710 }
4711 kvmhv_set_ptbl_entry(kvm->arch.lpid, dw0, dw1);
4712}
4713
4714
4715
4716
4717
4718static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
4719{
4720 int err = 0;
4721 struct kvm *kvm = vcpu->kvm;
4722 unsigned long hva;
4723 struct kvm_memory_slot *memslot;
4724 struct vm_area_struct *vma;
4725 unsigned long lpcr = 0, senc;
4726 unsigned long psize, porder;
4727 int srcu_idx;
4728
4729
4730 if (!kvm->arch.hpt.virt) {
4731 int order = KVM_DEFAULT_HPT_ORDER;
4732 struct kvm_hpt_info info;
4733
4734 err = kvmppc_allocate_hpt(&info, order);
4735
4736
4737
4738 while ((err == -ENOMEM) && --order >= PPC_MIN_HPT_ORDER)
4739 err = kvmppc_allocate_hpt(&info, order);
4740
4741 if (err < 0) {
4742 pr_err("KVM: Couldn't alloc HPT\n");
4743 goto out;
4744 }
4745
4746 kvmppc_set_hpt(kvm, &info);
4747 }
4748
4749
4750 srcu_idx = srcu_read_lock(&kvm->srcu);
4751 memslot = gfn_to_memslot(kvm, 0);
4752
4753
4754 err = -EINVAL;
4755 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
4756 goto out_srcu;
4757
4758
4759 hva = memslot->userspace_addr;
4760 mmap_read_lock(kvm->mm);
4761 vma = vma_lookup(kvm->mm, hva);
4762 if (!vma || (vma->vm_flags & VM_IO))
4763 goto up_out;
4764
4765 psize = vma_kernel_pagesize(vma);
4766
4767 mmap_read_unlock(kvm->mm);
4768
4769
4770 if (psize >= 0x1000000)
4771 psize = 0x1000000;
4772 else if (psize >= 0x10000)
4773 psize = 0x10000;
4774 else
4775 psize = 0x1000;
4776 porder = __ilog2(psize);
4777
4778 senc = slb_pgsize_encoding(psize);
4779 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
4780 (VRMA_VSID << SLB_VSID_SHIFT_1T);
4781
4782 kvmppc_map_vrma(vcpu, memslot, porder);
4783
4784
4785 if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
4786
4787 lpcr = senc << (LPCR_VRMASD_SH - 4);
4788 kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
4789 }
4790
4791
4792 smp_wmb();
4793 err = 0;
4794 out_srcu:
4795 srcu_read_unlock(&kvm->srcu, srcu_idx);
4796 out:
4797 return err;
4798
4799 up_out:
4800 mmap_read_unlock(kvm->mm);
4801 goto out_srcu;
4802}
4803
4804
4805
4806
4807
4808int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
4809{
4810 if (nesting_enabled(kvm))
4811 kvmhv_release_all_nested(kvm);
4812 kvmppc_rmap_reset(kvm);
4813 kvm->arch.process_table = 0;
4814
4815 spin_lock(&kvm->mmu_lock);
4816 kvm->arch.radix = 0;
4817 spin_unlock(&kvm->mmu_lock);
4818 kvmppc_free_radix(kvm);
4819 kvmppc_update_lpcr(kvm, LPCR_VPM1,
4820 LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
4821 return 0;
4822}
4823
4824
4825
4826
4827
4828int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
4829{
4830 int err;
4831
4832 err = kvmppc_init_vm_radix(kvm);
4833 if (err)
4834 return err;
4835 kvmppc_rmap_reset(kvm);
4836
4837 spin_lock(&kvm->mmu_lock);
4838 kvm->arch.radix = 1;
4839 spin_unlock(&kvm->mmu_lock);
4840 kvmppc_free_hpt(&kvm->arch.hpt);
4841 kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR,
4842 LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
4843 return 0;
4844}
4845
4846#ifdef CONFIG_KVM_XICS
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857void kvmppc_alloc_host_rm_ops(void)
4858{
4859 struct kvmppc_host_rm_ops *ops;
4860 unsigned long l_ops;
4861 int cpu, core;
4862 int size;
4863
4864
4865 if (kvmppc_host_rm_ops_hv != NULL)
4866 return;
4867
4868 ops = kzalloc(sizeof(struct kvmppc_host_rm_ops), GFP_KERNEL);
4869 if (!ops)
4870 return;
4871
4872 size = cpu_nr_cores() * sizeof(struct kvmppc_host_rm_core);
4873 ops->rm_core = kzalloc(size, GFP_KERNEL);
4874
4875 if (!ops->rm_core) {
4876 kfree(ops);
4877 return;
4878 }
4879
4880 cpus_read_lock();
4881
4882 for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
4883 if (!cpu_online(cpu))
4884 continue;
4885
4886 core = cpu >> threads_shift;
4887 ops->rm_core[core].rm_state.in_host = 1;
4888 }
4889
4890 ops->vcpu_kick = kvmppc_fast_vcpu_kick_hv;
4891
4892
4893
4894
4895
4896
4897
4898 smp_wmb();
4899 l_ops = (unsigned long) ops;
4900
4901 if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
4902 cpus_read_unlock();
4903 kfree(ops->rm_core);
4904 kfree(ops);
4905 return;
4906 }
4907
4908 cpuhp_setup_state_nocalls_cpuslocked(CPUHP_KVM_PPC_BOOK3S_PREPARE,
4909 "ppc/kvm_book3s:prepare",
4910 kvmppc_set_host_core,
4911 kvmppc_clear_host_core);
4912 cpus_read_unlock();
4913}
4914
4915void kvmppc_free_host_rm_ops(void)
4916{
4917 if (kvmppc_host_rm_ops_hv) {
4918 cpuhp_remove_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE);
4919 kfree(kvmppc_host_rm_ops_hv->rm_core);
4920 kfree(kvmppc_host_rm_ops_hv);
4921 kvmppc_host_rm_ops_hv = NULL;
4922 }
4923}
4924#endif
4925
4926static int kvmppc_core_init_vm_hv(struct kvm *kvm)
4927{
4928 unsigned long lpcr, lpid;
4929 char buf[32];
4930 int ret;
4931
4932 mutex_init(&kvm->arch.uvmem_lock);
4933 INIT_LIST_HEAD(&kvm->arch.uvmem_pfns);
4934 mutex_init(&kvm->arch.mmu_setup_lock);
4935
4936
4937
4938 lpid = kvmppc_alloc_lpid();
4939 if ((long)lpid < 0)
4940 return -ENOMEM;
4941 kvm->arch.lpid = lpid;
4942
4943 kvmppc_alloc_host_rm_ops();
4944
4945 kvmhv_vm_nested_init(kvm);
4946
4947
4948
4949
4950
4951
4952
4953
4954 if (!cpu_has_feature(CPU_FTR_ARCH_300))
4955 cpumask_setall(&kvm->arch.need_tlb_flush);
4956
4957
4958 memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
4959 sizeof(kvm->arch.enabled_hcalls));
4960
4961 if (!cpu_has_feature(CPU_FTR_ARCH_300))
4962 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
4963
4964
4965 if (cpu_has_feature(CPU_FTR_HVMODE)) {
4966 kvm->arch.host_lpid = mfspr(SPRN_LPID);
4967 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
4968 lpcr &= LPCR_PECE | LPCR_LPES;
4969 } else {
4970 lpcr = 0;
4971 }
4972 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
4973 LPCR_VPM0 | LPCR_VPM1;
4974 kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
4975 (VRMA_VSID << SLB_VSID_SHIFT_1T);
4976
4977 if (cpu_has_feature(CPU_FTR_ARCH_207S))
4978 lpcr |= LPCR_ONL;
4979
4980
4981
4982
4983
4984
4985
4986 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
4987 lpcr &= ~LPCR_VPM0;
4988 lpcr |= LPCR_HVICE | LPCR_HEIC;
4989
4990
4991
4992
4993
4994 if (xics_on_xive())
4995 lpcr |= LPCR_LPES;
4996 }
4997
4998
4999
5000
5001 if (radix_enabled()) {
5002 kvm->arch.radix = 1;
5003 kvm->arch.mmu_ready = 1;
5004 lpcr &= ~LPCR_VPM1;
5005 lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR;
5006 ret = kvmppc_init_vm_radix(kvm);
5007 if (ret) {
5008 kvmppc_free_lpid(kvm->arch.lpid);
5009 return ret;
5010 }
5011 kvmppc_setup_partition_table(kvm);
5012 }
5013
5014 verify_lpcr(kvm, lpcr);
5015 kvm->arch.lpcr = lpcr;
5016
5017
5018 kvm->arch.resize_hpt = NULL;
5019
5020
5021
5022
5023
5024 if (cpu_has_feature(CPU_FTR_ARCH_31)) {
5025
5026
5027
5028 kvm->arch.tlb_sets = 1;
5029 } else if (radix_enabled())
5030 kvm->arch.tlb_sets = POWER9_TLB_SETS_RADIX;
5031 else if (cpu_has_feature(CPU_FTR_ARCH_300))
5032 kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH;
5033 else if (cpu_has_feature(CPU_FTR_ARCH_207S))
5034 kvm->arch.tlb_sets = POWER8_TLB_SETS;
5035 else
5036 kvm->arch.tlb_sets = POWER7_TLB_SETS;
5037
5038
5039
5040
5041
5042
5043
5044 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
5045 if (!indep_threads_mode && !cpu_has_feature(CPU_FTR_HVMODE)) {
5046 pr_warn("KVM: Ignoring indep_threads_mode=N in nested hypervisor\n");
5047 kvm->arch.threads_indep = true;
5048 } else {
5049 kvm->arch.threads_indep = indep_threads_mode;
5050 }
5051 }
5052 if (!kvm->arch.threads_indep)
5053 kvm_hv_vm_activated();
5054
5055
5056
5057
5058
5059
5060
5061
5062 if (!cpu_has_feature(CPU_FTR_ARCH_300))
5063 kvm->arch.smt_mode = threads_per_subcore;
5064 else
5065 kvm->arch.smt_mode = 1;
5066 kvm->arch.emul_smt_mode = 1;
5067
5068
5069
5070
5071 snprintf(buf, sizeof(buf), "vm%d", current->pid);
5072 kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir);
5073 kvmppc_mmu_debugfs_init(kvm);
5074 if (radix_enabled())
5075 kvmhv_radix_debugfs_init(kvm);
5076
5077 return 0;
5078}
5079
5080static void kvmppc_free_vcores(struct kvm *kvm)
5081{
5082 long int i;
5083
5084 for (i = 0; i < KVM_MAX_VCORES; ++i)
5085 kfree(kvm->arch.vcores[i]);
5086 kvm->arch.online_vcores = 0;
5087}
5088
5089static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
5090{
5091 debugfs_remove_recursive(kvm->arch.debugfs_dir);
5092
5093 if (!kvm->arch.threads_indep)
5094 kvm_hv_vm_deactivated();
5095
5096 kvmppc_free_vcores(kvm);
5097
5098
5099 if (kvm_is_radix(kvm))
5100 kvmppc_free_radix(kvm);
5101 else
5102 kvmppc_free_hpt(&kvm->arch.hpt);
5103
5104
5105 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
5106 if (nesting_enabled(kvm))
5107 kvmhv_release_all_nested(kvm);
5108 kvm->arch.process_table = 0;
5109 if (kvm->arch.secure_guest)
5110 uv_svm_terminate(kvm->arch.lpid);
5111 kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
5112 }
5113
5114 kvmppc_free_lpid(kvm->arch.lpid);
5115
5116 kvmppc_free_pimap(kvm);
5117}
5118
5119
5120static int kvmppc_core_emulate_op_hv(struct kvm_vcpu *vcpu,
5121 unsigned int inst, int *advance)
5122{
5123 return EMULATE_FAIL;
5124}
5125
5126static int kvmppc_core_emulate_mtspr_hv(struct kvm_vcpu *vcpu, int sprn,
5127 ulong spr_val)
5128{
5129 return EMULATE_FAIL;
5130}
5131
5132static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
5133 ulong *spr_val)
5134{
5135 return EMULATE_FAIL;
5136}
5137
5138static int kvmppc_core_check_processor_compat_hv(void)
5139{
5140 if (cpu_has_feature(CPU_FTR_HVMODE) &&
5141 cpu_has_feature(CPU_FTR_ARCH_206))
5142 return 0;
5143
5144
5145 if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled())
5146 return 0;
5147
5148 return -EIO;
5149}
5150
5151#ifdef CONFIG_KVM_XICS
5152
5153void kvmppc_free_pimap(struct kvm *kvm)
5154{
5155 kfree(kvm->arch.pimap);
5156}
5157
5158static struct kvmppc_passthru_irqmap *kvmppc_alloc_pimap(void)
5159{
5160 return kzalloc(sizeof(struct kvmppc_passthru_irqmap), GFP_KERNEL);
5161}
5162
5163static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
5164{
5165 struct irq_desc *desc;
5166 struct kvmppc_irq_map *irq_map;
5167 struct kvmppc_passthru_irqmap *pimap;
5168 struct irq_chip *chip;
5169 int i, rc = 0;
5170
5171 if (!kvm_irq_bypass)
5172 return 1;
5173
5174 desc = irq_to_desc(host_irq);
5175 if (!desc)
5176 return -EIO;
5177
5178 mutex_lock(&kvm->lock);
5179
5180 pimap = kvm->arch.pimap;
5181 if (pimap == NULL) {
5182
5183 pimap = kvmppc_alloc_pimap();
5184 if (pimap == NULL) {
5185 mutex_unlock(&kvm->lock);
5186 return -ENOMEM;
5187 }
5188 kvm->arch.pimap = pimap;
5189 }
5190
5191
5192
5193
5194
5195
5196 chip = irq_data_get_irq_chip(&desc->irq_data);
5197 if (!chip || !(is_pnv_opal_msi(chip) || is_xive_irq(chip))) {
5198 pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
5199 host_irq, guest_gsi);
5200 mutex_unlock(&kvm->lock);
5201 return -ENOENT;
5202 }
5203
5204
5205
5206
5207
5208
5209 for (i = 0; i < pimap->n_mapped; i++) {
5210 if (guest_gsi == pimap->mapped[i].v_hwirq) {
5211 if (pimap->mapped[i].r_hwirq) {
5212 mutex_unlock(&kvm->lock);
5213 return -EINVAL;
5214 }
5215 break;
5216 }
5217 }
5218
5219 if (i == KVMPPC_PIRQ_MAPPED) {
5220 mutex_unlock(&kvm->lock);
5221 return -EAGAIN;
5222 }
5223
5224 irq_map = &pimap->mapped[i];
5225
5226 irq_map->v_hwirq = guest_gsi;
5227 irq_map->desc = desc;
5228
5229
5230
5231
5232
5233 smp_wmb();
5234 irq_map->r_hwirq = desc->irq_data.hwirq;
5235
5236 if (i == pimap->n_mapped)
5237 pimap->n_mapped++;
5238
5239 if (xics_on_xive())
5240 rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc);
5241 else
5242 kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
5243 if (rc)
5244 irq_map->r_hwirq = 0;
5245
5246 mutex_unlock(&kvm->lock);
5247
5248 return 0;
5249}
5250
5251static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
5252{
5253 struct irq_desc *desc;
5254 struct kvmppc_passthru_irqmap *pimap;
5255 int i, rc = 0;
5256
5257 if (!kvm_irq_bypass)
5258 return 0;
5259
5260 desc = irq_to_desc(host_irq);
5261 if (!desc)
5262 return -EIO;
5263
5264 mutex_lock(&kvm->lock);
5265 if (!kvm->arch.pimap)
5266 goto unlock;
5267
5268 pimap = kvm->arch.pimap;
5269
5270 for (i = 0; i < pimap->n_mapped; i++) {
5271 if (guest_gsi == pimap->mapped[i].v_hwirq)
5272 break;
5273 }
5274
5275 if (i == pimap->n_mapped) {
5276 mutex_unlock(&kvm->lock);
5277 return -ENODEV;
5278 }
5279
5280 if (xics_on_xive())
5281 rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc);
5282 else
5283 kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
5284
5285
5286 pimap->mapped[i].r_hwirq = 0;
5287
5288
5289
5290
5291
5292 unlock:
5293 mutex_unlock(&kvm->lock);
5294 return rc;
5295}
5296
5297static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons,
5298 struct irq_bypass_producer *prod)
5299{
5300 int ret = 0;
5301 struct kvm_kernel_irqfd *irqfd =
5302 container_of(cons, struct kvm_kernel_irqfd, consumer);
5303
5304 irqfd->producer = prod;
5305
5306 ret = kvmppc_set_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
5307 if (ret)
5308 pr_info("kvmppc_set_passthru_irq (irq %d, gsi %d) fails: %d\n",
5309 prod->irq, irqfd->gsi, ret);
5310
5311 return ret;
5312}
5313
5314static void kvmppc_irq_bypass_del_producer_hv(struct irq_bypass_consumer *cons,
5315 struct irq_bypass_producer *prod)
5316{
5317 int ret;
5318 struct kvm_kernel_irqfd *irqfd =
5319 container_of(cons, struct kvm_kernel_irqfd, consumer);
5320
5321 irqfd->producer = NULL;
5322
5323
5324
5325
5326
5327
5328 ret = kvmppc_clr_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
5329 if (ret)
5330 pr_warn("kvmppc_clr_passthru_irq (irq %d, gsi %d) fails: %d\n",
5331 prod->irq, irqfd->gsi, ret);
5332}
5333#endif
5334
5335static long kvm_arch_vm_ioctl_hv(struct file *filp,
5336 unsigned int ioctl, unsigned long arg)
5337{
5338 struct kvm *kvm __maybe_unused = filp->private_data;
5339 void __user *argp = (void __user *)arg;
5340 long r;
5341
5342 switch (ioctl) {
5343
5344 case KVM_PPC_ALLOCATE_HTAB: {
5345 u32 htab_order;
5346
5347
5348 if (kvmhv_on_pseries()) {
5349 r = -EOPNOTSUPP;
5350 break;
5351 }
5352
5353 r = -EFAULT;
5354 if (get_user(htab_order, (u32 __user *)argp))
5355 break;
5356 r = kvmppc_alloc_reset_hpt(kvm, htab_order);
5357 if (r)
5358 break;
5359 r = 0;
5360 break;
5361 }
5362
5363 case KVM_PPC_GET_HTAB_FD: {
5364 struct kvm_get_htab_fd ghf;
5365
5366 r = -EFAULT;
5367 if (copy_from_user(&ghf, argp, sizeof(ghf)))
5368 break;
5369 r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf);
5370 break;
5371 }
5372
5373 case KVM_PPC_RESIZE_HPT_PREPARE: {
5374 struct kvm_ppc_resize_hpt rhpt;
5375
5376 r = -EFAULT;
5377 if (copy_from_user(&rhpt, argp, sizeof(rhpt)))
5378 break;
5379
5380 r = kvm_vm_ioctl_resize_hpt_prepare(kvm, &rhpt);
5381 break;
5382 }
5383
5384 case KVM_PPC_RESIZE_HPT_COMMIT: {
5385 struct kvm_ppc_resize_hpt rhpt;
5386
5387 r = -EFAULT;
5388 if (copy_from_user(&rhpt, argp, sizeof(rhpt)))
5389 break;
5390
5391 r = kvm_vm_ioctl_resize_hpt_commit(kvm, &rhpt);
5392 break;
5393 }
5394
5395 default:
5396 r = -ENOTTY;
5397 }
5398
5399 return r;
5400}
5401
5402
5403
5404
5405
5406
5407
5408static unsigned int default_hcall_list[] = {
5409 H_REMOVE,
5410 H_ENTER,
5411 H_READ,
5412 H_PROTECT,
5413 H_BULK_REMOVE,
5414#ifdef CONFIG_SPAPR_TCE_IOMMU
5415 H_GET_TCE,
5416 H_PUT_TCE,
5417#endif
5418 H_SET_DABR,
5419 H_SET_XDABR,
5420 H_CEDE,
5421 H_PROD,
5422 H_CONFER,
5423 H_REGISTER_VPA,
5424#ifdef CONFIG_KVM_XICS
5425 H_EOI,
5426 H_CPPR,
5427 H_IPI,
5428 H_IPOLL,
5429 H_XIRR,
5430 H_XIRR_X,
5431#endif
5432 0
5433};
5434
5435static void init_default_hcalls(void)
5436{
5437 int i;
5438 unsigned int hcall;
5439
5440 for (i = 0; default_hcall_list[i]; ++i) {
5441 hcall = default_hcall_list[i];
5442 WARN_ON(!kvmppc_hcall_impl_hv(hcall));
5443 __set_bit(hcall / 4, default_enabled_hcalls);
5444 }
5445}
5446
5447static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
5448{
5449 unsigned long lpcr;
5450 int radix;
5451 int err;
5452
5453
5454 if (!cpu_has_feature(CPU_FTR_ARCH_300))
5455 return -ENODEV;
5456
5457
5458 if (cfg->flags & ~(KVM_PPC_MMUV3_RADIX | KVM_PPC_MMUV3_GTSE))
5459 return -EINVAL;
5460
5461
5462 radix = !!(cfg->flags & KVM_PPC_MMUV3_RADIX);
5463 if (!!(cfg->process_table & PATB_GR) != radix)
5464 return -EINVAL;
5465
5466
5467 if ((cfg->process_table & PRTS_MASK) > 24)
5468 return -EINVAL;
5469
5470
5471 if (radix && !radix_enabled())
5472 return -EINVAL;
5473
5474
5475 if (kvmhv_on_pseries() && !radix)
5476 return -EINVAL;
5477
5478 mutex_lock(&kvm->arch.mmu_setup_lock);
5479 if (radix != kvm_is_radix(kvm)) {
5480 if (kvm->arch.mmu_ready) {
5481 kvm->arch.mmu_ready = 0;
5482
5483 smp_mb();
5484 if (atomic_read(&kvm->arch.vcpus_running)) {
5485 kvm->arch.mmu_ready = 1;
5486 err = -EBUSY;
5487 goto out_unlock;
5488 }
5489 }
5490 if (radix)
5491 err = kvmppc_switch_mmu_to_radix(kvm);
5492 else
5493 err = kvmppc_switch_mmu_to_hpt(kvm);
5494 if (err)
5495 goto out_unlock;
5496 }
5497
5498 kvm->arch.process_table = cfg->process_table;
5499 kvmppc_setup_partition_table(kvm);
5500
5501 lpcr = (cfg->flags & KVM_PPC_MMUV3_GTSE) ? LPCR_GTSE : 0;
5502 kvmppc_update_lpcr(kvm, lpcr, LPCR_GTSE);
5503 err = 0;
5504
5505 out_unlock:
5506 mutex_unlock(&kvm->arch.mmu_setup_lock);
5507 return err;
5508}
5509
5510static int kvmhv_enable_nested(struct kvm *kvm)
5511{
5512 if (!nested)
5513 return -EPERM;
5514 if (!cpu_has_feature(CPU_FTR_ARCH_300) || no_mixing_hpt_and_radix)
5515 return -ENODEV;
5516
5517
5518 if (kvm)
5519 kvm->arch.nested_enable = true;
5520 return 0;
5521}
5522
5523static int kvmhv_load_from_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
5524 int size)
5525{
5526 int rc = -EINVAL;
5527
5528 if (kvmhv_vcpu_is_radix(vcpu)) {
5529 rc = kvmhv_copy_from_guest_radix(vcpu, *eaddr, ptr, size);
5530
5531 if (rc > 0)
5532 rc = -EINVAL;
5533 }
5534
5535
5536 if (rc && vcpu->arch.nested)
5537 rc = -EAGAIN;
5538
5539 return rc;
5540}
5541
5542static int kvmhv_store_to_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
5543 int size)
5544{
5545 int rc = -EINVAL;
5546
5547 if (kvmhv_vcpu_is_radix(vcpu)) {
5548 rc = kvmhv_copy_to_guest_radix(vcpu, *eaddr, ptr, size);
5549
5550 if (rc > 0)
5551 rc = -EINVAL;
5552 }
5553
5554
5555 if (rc && vcpu->arch.nested)
5556 rc = -EAGAIN;
5557
5558 return rc;
5559}
5560
5561static void unpin_vpa_reset(struct kvm *kvm, struct kvmppc_vpa *vpa)
5562{
5563 unpin_vpa(kvm, vpa);
5564 vpa->gpa = 0;
5565 vpa->pinned_addr = NULL;
5566 vpa->dirty = false;
5567 vpa->update_pending = 0;
5568}
5569
5570
5571
5572
5573
5574
5575
5576static int kvmhv_enable_svm(struct kvm *kvm)
5577{
5578 if (!kvmppc_uvmem_available())
5579 return -EINVAL;
5580 if (kvm)
5581 kvm->arch.svm_enabled = 1;
5582 return 0;
5583}
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593static int kvmhv_svm_off(struct kvm *kvm)
5594{
5595 struct kvm_vcpu *vcpu;
5596 int mmu_was_ready;
5597 int srcu_idx;
5598 int ret = 0;
5599 int i;
5600
5601 if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
5602 return ret;
5603
5604 mutex_lock(&kvm->arch.mmu_setup_lock);
5605 mmu_was_ready = kvm->arch.mmu_ready;
5606 if (kvm->arch.mmu_ready) {
5607 kvm->arch.mmu_ready = 0;
5608
5609 smp_mb();
5610 if (atomic_read(&kvm->arch.vcpus_running)) {
5611 kvm->arch.mmu_ready = 1;
5612 ret = -EBUSY;
5613 goto out;
5614 }
5615 }
5616
5617 srcu_idx = srcu_read_lock(&kvm->srcu);
5618 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
5619 struct kvm_memory_slot *memslot;
5620 struct kvm_memslots *slots = __kvm_memslots(kvm, i);
5621
5622 if (!slots)
5623 continue;
5624
5625 kvm_for_each_memslot(memslot, slots) {
5626 kvmppc_uvmem_drop_pages(memslot, kvm, true);
5627 uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
5628 }
5629 }
5630 srcu_read_unlock(&kvm->srcu, srcu_idx);
5631
5632 ret = uv_svm_terminate(kvm->arch.lpid);
5633 if (ret != U_SUCCESS) {
5634 ret = -EINVAL;
5635 goto out;
5636 }
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648 kvm_for_each_vcpu(i, vcpu, kvm) {
5649 spin_lock(&vcpu->arch.vpa_update_lock);
5650 unpin_vpa_reset(kvm, &vcpu->arch.dtl);
5651 unpin_vpa_reset(kvm, &vcpu->arch.slb_shadow);
5652 unpin_vpa_reset(kvm, &vcpu->arch.vpa);
5653 spin_unlock(&vcpu->arch.vpa_update_lock);
5654 }
5655
5656 kvmppc_setup_partition_table(kvm);
5657 kvm->arch.secure_guest = 0;
5658 kvm->arch.mmu_ready = mmu_was_ready;
5659out:
5660 mutex_unlock(&kvm->arch.mmu_setup_lock);
5661 return ret;
5662}
5663
5664static bool kvmppc_hash_v3_possible(void)
5665{
5666 if (radix_enabled() && no_mixing_hpt_and_radix)
5667 return false;
5668
5669 return cpu_has_feature(CPU_FTR_ARCH_300) &&
5670 cpu_has_feature(CPU_FTR_HVMODE);
5671}
5672
5673static struct kvmppc_ops kvm_ops_hv = {
5674 .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
5675 .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
5676 .get_one_reg = kvmppc_get_one_reg_hv,
5677 .set_one_reg = kvmppc_set_one_reg_hv,
5678 .vcpu_load = kvmppc_core_vcpu_load_hv,
5679 .vcpu_put = kvmppc_core_vcpu_put_hv,
5680 .inject_interrupt = kvmppc_inject_interrupt_hv,
5681 .set_msr = kvmppc_set_msr_hv,
5682 .vcpu_run = kvmppc_vcpu_run_hv,
5683 .vcpu_create = kvmppc_core_vcpu_create_hv,
5684 .vcpu_free = kvmppc_core_vcpu_free_hv,
5685 .check_requests = kvmppc_core_check_requests_hv,
5686 .get_dirty_log = kvm_vm_ioctl_get_dirty_log_hv,
5687 .flush_memslot = kvmppc_core_flush_memslot_hv,
5688 .prepare_memory_region = kvmppc_core_prepare_memory_region_hv,
5689 .commit_memory_region = kvmppc_core_commit_memory_region_hv,
5690 .unmap_hva_range = kvm_unmap_hva_range_hv,
5691 .age_hva = kvm_age_hva_hv,
5692 .test_age_hva = kvm_test_age_hva_hv,
5693 .set_spte_hva = kvm_set_spte_hva_hv,
5694 .free_memslot = kvmppc_core_free_memslot_hv,
5695 .init_vm = kvmppc_core_init_vm_hv,
5696 .destroy_vm = kvmppc_core_destroy_vm_hv,
5697 .get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv,
5698 .emulate_op = kvmppc_core_emulate_op_hv,
5699 .emulate_mtspr = kvmppc_core_emulate_mtspr_hv,
5700 .emulate_mfspr = kvmppc_core_emulate_mfspr_hv,
5701 .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
5702 .arch_vm_ioctl = kvm_arch_vm_ioctl_hv,
5703 .hcall_implemented = kvmppc_hcall_impl_hv,
5704#ifdef CONFIG_KVM_XICS
5705 .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv,
5706 .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv,
5707#endif
5708 .configure_mmu = kvmhv_configure_mmu,
5709 .get_rmmu_info = kvmhv_get_rmmu_info,
5710 .set_smt_mode = kvmhv_set_smt_mode,
5711 .enable_nested = kvmhv_enable_nested,
5712 .load_from_eaddr = kvmhv_load_from_eaddr,
5713 .store_to_eaddr = kvmhv_store_to_eaddr,
5714 .enable_svm = kvmhv_enable_svm,
5715 .svm_off = kvmhv_svm_off,
5716 .hash_v3_possible = kvmppc_hash_v3_possible,
5717};
5718
5719static int kvm_init_subcore_bitmap(void)
5720{
5721 int i, j;
5722 int nr_cores = cpu_nr_cores();
5723 struct sibling_subcore_state *sibling_subcore_state;
5724
5725 for (i = 0; i < nr_cores; i++) {
5726 int first_cpu = i * threads_per_core;
5727 int node = cpu_to_node(first_cpu);
5728
5729
5730 if (paca_ptrs[first_cpu]->sibling_subcore_state)
5731 continue;
5732
5733 sibling_subcore_state =
5734 kzalloc_node(sizeof(struct sibling_subcore_state),
5735 GFP_KERNEL, node);
5736 if (!sibling_subcore_state)
5737 return -ENOMEM;
5738
5739
5740 for (j = 0; j < threads_per_core; j++) {
5741 int cpu = first_cpu + j;
5742
5743 paca_ptrs[cpu]->sibling_subcore_state =
5744 sibling_subcore_state;
5745 }
5746 }
5747 return 0;
5748}
5749
5750static int kvmppc_radix_possible(void)
5751{
5752 return cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled();
5753}
5754
5755static int kvmppc_book3s_init_hv(void)
5756{
5757 int r;
5758
5759 if (!tlbie_capable) {
5760 pr_err("KVM-HV: Host does not support TLBIE\n");
5761 return -ENODEV;
5762 }
5763
5764
5765
5766
5767 r = kvmppc_core_check_processor_compat_hv();
5768 if (r < 0)
5769 return -ENODEV;
5770
5771 r = kvmhv_nested_init();
5772 if (r)
5773 return r;
5774
5775 r = kvm_init_subcore_bitmap();
5776 if (r)
5777 return r;
5778
5779
5780
5781
5782
5783
5784#ifdef CONFIG_SMP
5785 if (!xics_on_xive() && !kvmhv_on_pseries() &&
5786 !local_paca->kvm_hstate.xics_phys) {
5787 struct device_node *np;
5788
5789 np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");
5790 if (!np) {
5791 pr_err("KVM-HV: Cannot determine method for accessing XICS\n");
5792 return -ENODEV;
5793 }
5794
5795 of_node_put(np);
5796 }
5797#endif
5798
5799 kvm_ops_hv.owner = THIS_MODULE;
5800 kvmppc_hv_ops = &kvm_ops_hv;
5801
5802 init_default_hcalls();
5803
5804 init_vcore_lists();
5805
5806 r = kvmppc_mmu_hv_init();
5807 if (r)
5808 return r;
5809
5810 if (kvmppc_radix_possible())
5811 r = kvmppc_radix_init();
5812
5813
5814
5815
5816
5817 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
5818 unsigned int pvr = mfspr(SPRN_PVR);
5819 if ((pvr >> 16) == PVR_POWER9 &&
5820 (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
5821 ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
5822 no_mixing_hpt_and_radix = true;
5823 }
5824
5825 r = kvmppc_uvmem_init();
5826 if (r < 0)
5827 pr_err("KVM-HV: kvmppc_uvmem_init failed %d\n", r);
5828
5829 return r;
5830}
5831
5832static void kvmppc_book3s_exit_hv(void)
5833{
5834 kvmppc_uvmem_free();
5835 kvmppc_free_host_rm_ops();
5836 if (kvmppc_radix_possible())
5837 kvmppc_radix_exit();
5838 kvmppc_hv_ops = NULL;
5839 kvmhv_nested_exit();
5840}
5841
5842module_init(kvmppc_book3s_init_hv);
5843module_exit(kvmppc_book3s_exit_hv);
5844MODULE_LICENSE("GPL");
5845MODULE_ALIAS_MISCDEV(KVM_MINOR);
5846MODULE_ALIAS("devname:kvm");
5847