1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kvm_host.h>
22#include <linux/kernel.h>
23#include <linux/err.h>
24#include <linux/slab.h>
25#include <linux/preempt.h>
26#include <linux/sched/signal.h>
27#include <linux/sched/stat.h>
28#include <linux/delay.h>
29#include <linux/export.h>
30#include <linux/fs.h>
31#include <linux/anon_inodes.h>
32#include <linux/cpu.h>
33#include <linux/cpumask.h>
34#include <linux/spinlock.h>
35#include <linux/page-flags.h>
36#include <linux/srcu.h>
37#include <linux/miscdevice.h>
38#include <linux/debugfs.h>
39#include <linux/gfp.h>
40#include <linux/vmalloc.h>
41#include <linux/highmem.h>
42#include <linux/hugetlb.h>
43#include <linux/kvm_irqfd.h>
44#include <linux/irqbypass.h>
45#include <linux/module.h>
46#include <linux/compiler.h>
47#include <linux/of.h>
48
49#include <asm/reg.h>
50#include <asm/ppc-opcode.h>
51#include <asm/asm-prototypes.h>
52#include <asm/disassemble.h>
53#include <asm/cputable.h>
54#include <asm/cacheflush.h>
55#include <asm/tlbflush.h>
56#include <linux/uaccess.h>
57#include <asm/io.h>
58#include <asm/kvm_ppc.h>
59#include <asm/kvm_book3s.h>
60#include <asm/mmu_context.h>
61#include <asm/lppaca.h>
62#include <asm/processor.h>
63#include <asm/cputhreads.h>
64#include <asm/page.h>
65#include <asm/hvcall.h>
66#include <asm/switch_to.h>
67#include <asm/smp.h>
68#include <asm/dbell.h>
69#include <asm/hmi.h>
70#include <asm/pnv-pci.h>
71#include <asm/mmu.h>
72#include <asm/opal.h>
73#include <asm/xics.h>
74#include <asm/xive.h>
75
76#include "book3s.h"
77
78#define CREATE_TRACE_POINTS
79#include "trace_hv.h"
80
81
82
83
84
85
86#define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1)
87
88#define RESUME_PASSTHROUGH (RESUME_GUEST | RESUME_FLAG_ARCH2)
89
90
91#define TB_NIL (~(u64)0)
92
93static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
94
95static int dynamic_mt_modes = 6;
96module_param(dynamic_mt_modes, int, 0644);
97MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)");
98static int target_smt_mode;
99module_param(target_smt_mode, int, 0644);
100MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
101
102static bool indep_threads_mode = true;
103module_param(indep_threads_mode, bool, S_IRUGO | S_IWUSR);
104MODULE_PARM_DESC(indep_threads_mode, "Independent-threads mode (only on POWER9)");
105
106#ifdef CONFIG_KVM_XICS
107static struct kernel_param_ops module_param_ops = {
108 .set = param_set_int,
109 .get = param_get_int,
110};
111
112module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass, 0644);
113MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization");
114
115module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, 0644);
116MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
117#endif
118
119
120static bool no_mixing_hpt_and_radix;
121
122static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
123static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
124
125static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
126 int *ip)
127{
128 int i = *ip;
129 struct kvm_vcpu *vcpu;
130
131 while (++i < MAX_SMT_THREADS) {
132 vcpu = READ_ONCE(vc->runnable_threads[i]);
133 if (vcpu) {
134 *ip = i;
135 return vcpu;
136 }
137 }
138 return NULL;
139}
140
141
142#define for_each_runnable_thread(i, vcpu, vc) \
143 for (i = -1; (vcpu = next_runnable_thread(vc, &i)); )
144
145static bool kvmppc_ipi_thread(int cpu)
146{
147 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
148
149
150 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
151 msg |= get_hard_smp_processor_id(cpu);
152 smp_mb();
153 __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
154 return true;
155 }
156
157
158 if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
159 preempt_disable();
160 if (cpu_first_thread_sibling(cpu) ==
161 cpu_first_thread_sibling(smp_processor_id())) {
162 msg |= cpu_thread_in_core(cpu);
163 smp_mb();
164 __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
165 preempt_enable();
166 return true;
167 }
168 preempt_enable();
169 }
170
171#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
172 if (cpu >= 0 && cpu < nr_cpu_ids) {
173 if (paca[cpu].kvm_hstate.xics_phys) {
174 xics_wake_cpu(cpu);
175 return true;
176 }
177 opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY);
178 return true;
179 }
180#endif
181
182 return false;
183}
184
185static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
186{
187 int cpu;
188 struct swait_queue_head *wqp;
189
190 wqp = kvm_arch_vcpu_wq(vcpu);
191 if (swq_has_sleeper(wqp)) {
192 swake_up(wqp);
193 ++vcpu->stat.halt_wakeup;
194 }
195
196 cpu = READ_ONCE(vcpu->arch.thread_cpu);
197 if (cpu >= 0 && kvmppc_ipi_thread(cpu))
198 return;
199
200
201 cpu = vcpu->cpu;
202 if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu))
203 smp_send_reschedule(cpu);
204}
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc)
240{
241 unsigned long flags;
242
243 spin_lock_irqsave(&vc->stoltb_lock, flags);
244 vc->preempt_tb = mftb();
245 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
246}
247
248static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc)
249{
250 unsigned long flags;
251
252 spin_lock_irqsave(&vc->stoltb_lock, flags);
253 if (vc->preempt_tb != TB_NIL) {
254 vc->stolen_tb += mftb() - vc->preempt_tb;
255 vc->preempt_tb = TB_NIL;
256 }
257 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
258}
259
260static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
261{
262 struct kvmppc_vcore *vc = vcpu->arch.vcore;
263 unsigned long flags;
264
265
266
267
268
269
270
271 if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
272 kvmppc_core_end_stolen(vc);
273
274 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
275 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
276 vcpu->arch.busy_preempt != TB_NIL) {
277 vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
278 vcpu->arch.busy_preempt = TB_NIL;
279 }
280 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
281}
282
283static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
284{
285 struct kvmppc_vcore *vc = vcpu->arch.vcore;
286 unsigned long flags;
287
288 if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
289 kvmppc_core_start_stolen(vc);
290
291 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
292 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
293 vcpu->arch.busy_preempt = mftb();
294 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
295}
296
297static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
298{
299
300
301
302
303 if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
304 msr &= ~MSR_TS_MASK;
305 vcpu->arch.shregs.msr = msr;
306 kvmppc_end_cede(vcpu);
307}
308
309static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
310{
311 vcpu->arch.pvr = pvr;
312}
313
314
315#define PCR_ARCH_300 (PCR_ARCH_207 << 1)
316
317static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
318{
319 unsigned long host_pcr_bit = 0, guest_pcr_bit = 0;
320 struct kvmppc_vcore *vc = vcpu->arch.vcore;
321
322
323 if (cpu_has_feature(CPU_FTR_ARCH_300))
324 host_pcr_bit = PCR_ARCH_300;
325 else if (cpu_has_feature(CPU_FTR_ARCH_207S))
326 host_pcr_bit = PCR_ARCH_207;
327 else if (cpu_has_feature(CPU_FTR_ARCH_206))
328 host_pcr_bit = PCR_ARCH_206;
329 else
330 host_pcr_bit = PCR_ARCH_205;
331
332
333 guest_pcr_bit = host_pcr_bit;
334 if (arch_compat) {
335 switch (arch_compat) {
336 case PVR_ARCH_205:
337 guest_pcr_bit = PCR_ARCH_205;
338 break;
339 case PVR_ARCH_206:
340 case PVR_ARCH_206p:
341 guest_pcr_bit = PCR_ARCH_206;
342 break;
343 case PVR_ARCH_207:
344 guest_pcr_bit = PCR_ARCH_207;
345 break;
346 case PVR_ARCH_300:
347 guest_pcr_bit = PCR_ARCH_300;
348 break;
349 default:
350 return -EINVAL;
351 }
352 }
353
354
355 if (guest_pcr_bit > host_pcr_bit)
356 return -EINVAL;
357
358 spin_lock(&vc->lock);
359 vc->arch_compat = arch_compat;
360
361 vc->pcr = host_pcr_bit - guest_pcr_bit;
362 spin_unlock(&vc->lock);
363
364 return 0;
365}
366
367static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
368{
369 int r;
370
371 pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id);
372 pr_err("pc = %.16lx msr = %.16llx trap = %x\n",
373 vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap);
374 for (r = 0; r < 16; ++r)
375 pr_err("r%2d = %.16lx r%d = %.16lx\n",
376 r, kvmppc_get_gpr(vcpu, r),
377 r+16, kvmppc_get_gpr(vcpu, r+16));
378 pr_err("ctr = %.16lx lr = %.16lx\n",
379 vcpu->arch.ctr, vcpu->arch.lr);
380 pr_err("srr0 = %.16llx srr1 = %.16llx\n",
381 vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1);
382 pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
383 vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
384 pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
385 vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
386 pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n",
387 vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr);
388 pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
389 pr_err("fault dar = %.16lx dsisr = %.8x\n",
390 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
391 pr_err("SLB (%d entries):\n", vcpu->arch.slb_max);
392 for (r = 0; r < vcpu->arch.slb_max; ++r)
393 pr_err(" ESID = %.16llx VSID = %.16llx\n",
394 vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
395 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
396 vcpu->arch.vcore->lpcr, vcpu->kvm->arch.sdr1,
397 vcpu->arch.last_inst);
398}
399
400static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
401{
402 struct kvm_vcpu *ret;
403
404 mutex_lock(&kvm->lock);
405 ret = kvm_get_vcpu_by_id(kvm, id);
406 mutex_unlock(&kvm->lock);
407 return ret;
408}
409
410static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
411{
412 vpa->__old_status |= LPPACA_OLD_SHARED_PROC;
413 vpa->yield_count = cpu_to_be32(1);
414}
415
416static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v,
417 unsigned long addr, unsigned long len)
418{
419
420 if (addr & (L1_CACHE_BYTES - 1))
421 return -EINVAL;
422 spin_lock(&vcpu->arch.vpa_update_lock);
423 if (v->next_gpa != addr || v->len != len) {
424 v->next_gpa = addr;
425 v->len = addr ? len : 0;
426 v->update_pending = 1;
427 }
428 spin_unlock(&vcpu->arch.vpa_update_lock);
429 return 0;
430}
431
432
433struct reg_vpa {
434 u32 dummy;
435 union {
436 __be16 hword;
437 __be32 word;
438 } length;
439};
440
441static int vpa_is_registered(struct kvmppc_vpa *vpap)
442{
443 if (vpap->update_pending)
444 return vpap->next_gpa != 0;
445 return vpap->pinned_addr != NULL;
446}
447
448static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
449 unsigned long flags,
450 unsigned long vcpuid, unsigned long vpa)
451{
452 struct kvm *kvm = vcpu->kvm;
453 unsigned long len, nb;
454 void *va;
455 struct kvm_vcpu *tvcpu;
456 int err;
457 int subfunc;
458 struct kvmppc_vpa *vpap;
459
460 tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
461 if (!tvcpu)
462 return H_PARAMETER;
463
464 subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK;
465 if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL ||
466 subfunc == H_VPA_REG_SLB) {
467
468 if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa)
469 return H_PARAMETER;
470
471
472 va = kvmppc_pin_guest_page(kvm, vpa, &nb);
473 if (va == NULL)
474 return H_PARAMETER;
475 if (subfunc == H_VPA_REG_VPA)
476 len = be16_to_cpu(((struct reg_vpa *)va)->length.hword);
477 else
478 len = be32_to_cpu(((struct reg_vpa *)va)->length.word);
479 kvmppc_unpin_guest_page(kvm, va, vpa, false);
480
481
482 if (len > nb || len < sizeof(struct reg_vpa))
483 return H_PARAMETER;
484 } else {
485 vpa = 0;
486 len = 0;
487 }
488
489 err = H_PARAMETER;
490 vpap = NULL;
491 spin_lock(&tvcpu->arch.vpa_update_lock);
492
493 switch (subfunc) {
494 case H_VPA_REG_VPA:
495
496
497
498
499
500
501 if (len < 640)
502 break;
503 vpap = &tvcpu->arch.vpa;
504 err = 0;
505 break;
506
507 case H_VPA_REG_DTL:
508 if (len < sizeof(struct dtl_entry))
509 break;
510 len -= len % sizeof(struct dtl_entry);
511
512
513 err = H_RESOURCE;
514 if (!vpa_is_registered(&tvcpu->arch.vpa))
515 break;
516
517 vpap = &tvcpu->arch.dtl;
518 err = 0;
519 break;
520
521 case H_VPA_REG_SLB:
522
523 err = H_RESOURCE;
524 if (!vpa_is_registered(&tvcpu->arch.vpa))
525 break;
526
527 vpap = &tvcpu->arch.slb_shadow;
528 err = 0;
529 break;
530
531 case H_VPA_DEREG_VPA:
532
533 err = H_RESOURCE;
534 if (vpa_is_registered(&tvcpu->arch.dtl) ||
535 vpa_is_registered(&tvcpu->arch.slb_shadow))
536 break;
537
538 vpap = &tvcpu->arch.vpa;
539 err = 0;
540 break;
541
542 case H_VPA_DEREG_DTL:
543 vpap = &tvcpu->arch.dtl;
544 err = 0;
545 break;
546
547 case H_VPA_DEREG_SLB:
548 vpap = &tvcpu->arch.slb_shadow;
549 err = 0;
550 break;
551 }
552
553 if (vpap) {
554 vpap->next_gpa = vpa;
555 vpap->len = len;
556 vpap->update_pending = 1;
557 }
558
559 spin_unlock(&tvcpu->arch.vpa_update_lock);
560
561 return err;
562}
563
564static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
565{
566 struct kvm *kvm = vcpu->kvm;
567 void *va;
568 unsigned long nb;
569 unsigned long gpa;
570
571
572
573
574
575
576
577
578
579 for (;;) {
580 gpa = vpap->next_gpa;
581 spin_unlock(&vcpu->arch.vpa_update_lock);
582 va = NULL;
583 nb = 0;
584 if (gpa)
585 va = kvmppc_pin_guest_page(kvm, gpa, &nb);
586 spin_lock(&vcpu->arch.vpa_update_lock);
587 if (gpa == vpap->next_gpa)
588 break;
589
590 if (va)
591 kvmppc_unpin_guest_page(kvm, va, gpa, false);
592 }
593
594 vpap->update_pending = 0;
595 if (va && nb < vpap->len) {
596
597
598
599
600
601 kvmppc_unpin_guest_page(kvm, va, gpa, false);
602 va = NULL;
603 }
604 if (vpap->pinned_addr)
605 kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa,
606 vpap->dirty);
607 vpap->gpa = gpa;
608 vpap->pinned_addr = va;
609 vpap->dirty = false;
610 if (va)
611 vpap->pinned_end = va + vpap->len;
612}
613
614static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
615{
616 if (!(vcpu->arch.vpa.update_pending ||
617 vcpu->arch.slb_shadow.update_pending ||
618 vcpu->arch.dtl.update_pending))
619 return;
620
621 spin_lock(&vcpu->arch.vpa_update_lock);
622 if (vcpu->arch.vpa.update_pending) {
623 kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
624 if (vcpu->arch.vpa.pinned_addr)
625 init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
626 }
627 if (vcpu->arch.dtl.update_pending) {
628 kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
629 vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
630 vcpu->arch.dtl_index = 0;
631 }
632 if (vcpu->arch.slb_shadow.update_pending)
633 kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow);
634 spin_unlock(&vcpu->arch.vpa_update_lock);
635}
636
637
638
639
640
641static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
642{
643 u64 p;
644 unsigned long flags;
645
646 spin_lock_irqsave(&vc->stoltb_lock, flags);
647 p = vc->stolen_tb;
648 if (vc->vcore_state != VCORE_INACTIVE &&
649 vc->preempt_tb != TB_NIL)
650 p += now - vc->preempt_tb;
651 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
652 return p;
653}
654
655static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
656 struct kvmppc_vcore *vc)
657{
658 struct dtl_entry *dt;
659 struct lppaca *vpa;
660 unsigned long stolen;
661 unsigned long core_stolen;
662 u64 now;
663 unsigned long flags;
664
665 dt = vcpu->arch.dtl_ptr;
666 vpa = vcpu->arch.vpa.pinned_addr;
667 now = mftb();
668 core_stolen = vcore_stolen_time(vc, now);
669 stolen = core_stolen - vcpu->arch.stolen_logged;
670 vcpu->arch.stolen_logged = core_stolen;
671 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
672 stolen += vcpu->arch.busy_stolen;
673 vcpu->arch.busy_stolen = 0;
674 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
675 if (!dt || !vpa)
676 return;
677 memset(dt, 0, sizeof(struct dtl_entry));
678 dt->dispatch_reason = 7;
679 dt->processor_id = cpu_to_be16(vc->pcpu + vcpu->arch.ptid);
680 dt->timebase = cpu_to_be64(now + vc->tb_offset);
681 dt->enqueue_to_dispatch_time = cpu_to_be32(stolen);
682 dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu));
683 dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr);
684 ++dt;
685 if (dt == vcpu->arch.dtl.pinned_end)
686 dt = vcpu->arch.dtl.pinned_addr;
687 vcpu->arch.dtl_ptr = dt;
688
689 smp_wmb();
690 vpa->dtl_idx = cpu_to_be64(++vcpu->arch.dtl_index);
691 vcpu->arch.dtl.dirty = true;
692}
693
694
695static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
696{
697 int thr;
698 struct kvmppc_vcore *vc;
699
700 if (vcpu->arch.doorbell_request)
701 return true;
702
703
704
705
706
707
708 smp_rmb();
709 vc = vcpu->arch.vcore;
710 thr = vcpu->vcpu_id - vc->first_vcpuid;
711 return !!(vc->dpdes & (1 << thr));
712}
713
714static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu)
715{
716 if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207)
717 return true;
718 if ((!vcpu->arch.vcore->arch_compat) &&
719 cpu_has_feature(CPU_FTR_ARCH_207S))
720 return true;
721 return false;
722}
723
724static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
725 unsigned long resource, unsigned long value1,
726 unsigned long value2)
727{
728 switch (resource) {
729 case H_SET_MODE_RESOURCE_SET_CIABR:
730 if (!kvmppc_power8_compatible(vcpu))
731 return H_P2;
732 if (value2)
733 return H_P4;
734 if (mflags)
735 return H_UNSUPPORTED_FLAG_START;
736
737 if ((value1 & CIABR_PRIV) == CIABR_PRIV_HYPER)
738 return H_P3;
739 vcpu->arch.ciabr = value1;
740 return H_SUCCESS;
741 case H_SET_MODE_RESOURCE_SET_DAWR:
742 if (!kvmppc_power8_compatible(vcpu))
743 return H_P2;
744 if (mflags)
745 return H_UNSUPPORTED_FLAG_START;
746 if (value2 & DABRX_HYP)
747 return H_P4;
748 vcpu->arch.dawr = value1;
749 vcpu->arch.dawrx = value2;
750 return H_SUCCESS;
751 default:
752 return H_TOO_HARD;
753 }
754}
755
756static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
757{
758 struct kvmppc_vcore *vcore = target->arch.vcore;
759
760
761
762
763
764
765
766
767
768 spin_lock(&vcore->lock);
769 if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
770 vcore->vcore_state != VCORE_INACTIVE &&
771 vcore->runner)
772 target = vcore->runner;
773 spin_unlock(&vcore->lock);
774
775 return kvm_vcpu_yield_to(target);
776}
777
778static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
779{
780 int yield_count = 0;
781 struct lppaca *lppaca;
782
783 spin_lock(&vcpu->arch.vpa_update_lock);
784 lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr;
785 if (lppaca)
786 yield_count = be32_to_cpu(lppaca->yield_count);
787 spin_unlock(&vcpu->arch.vpa_update_lock);
788 return yield_count;
789}
790
791int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
792{
793 unsigned long req = kvmppc_get_gpr(vcpu, 3);
794 unsigned long target, ret = H_SUCCESS;
795 int yield_count;
796 struct kvm_vcpu *tvcpu;
797 int idx, rc;
798
799 if (req <= MAX_HCALL_OPCODE &&
800 !test_bit(req/4, vcpu->kvm->arch.enabled_hcalls))
801 return RESUME_HOST;
802
803 switch (req) {
804 case H_CEDE:
805 break;
806 case H_PROD:
807 target = kvmppc_get_gpr(vcpu, 4);
808 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
809 if (!tvcpu) {
810 ret = H_PARAMETER;
811 break;
812 }
813 tvcpu->arch.prodded = 1;
814 smp_mb();
815 if (tvcpu->arch.ceded)
816 kvmppc_fast_vcpu_kick_hv(tvcpu);
817 break;
818 case H_CONFER:
819 target = kvmppc_get_gpr(vcpu, 4);
820 if (target == -1)
821 break;
822 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
823 if (!tvcpu) {
824 ret = H_PARAMETER;
825 break;
826 }
827 yield_count = kvmppc_get_gpr(vcpu, 5);
828 if (kvmppc_get_yield_count(tvcpu) != yield_count)
829 break;
830 kvm_arch_vcpu_yield_to(tvcpu);
831 break;
832 case H_REGISTER_VPA:
833 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
834 kvmppc_get_gpr(vcpu, 5),
835 kvmppc_get_gpr(vcpu, 6));
836 break;
837 case H_RTAS:
838 if (list_empty(&vcpu->kvm->arch.rtas_tokens))
839 return RESUME_HOST;
840
841 idx = srcu_read_lock(&vcpu->kvm->srcu);
842 rc = kvmppc_rtas_hcall(vcpu);
843 srcu_read_unlock(&vcpu->kvm->srcu, idx);
844
845 if (rc == -ENOENT)
846 return RESUME_HOST;
847 else if (rc == 0)
848 break;
849
850
851 return rc;
852 case H_LOGICAL_CI_LOAD:
853 ret = kvmppc_h_logical_ci_load(vcpu);
854 if (ret == H_TOO_HARD)
855 return RESUME_HOST;
856 break;
857 case H_LOGICAL_CI_STORE:
858 ret = kvmppc_h_logical_ci_store(vcpu);
859 if (ret == H_TOO_HARD)
860 return RESUME_HOST;
861 break;
862 case H_SET_MODE:
863 ret = kvmppc_h_set_mode(vcpu, kvmppc_get_gpr(vcpu, 4),
864 kvmppc_get_gpr(vcpu, 5),
865 kvmppc_get_gpr(vcpu, 6),
866 kvmppc_get_gpr(vcpu, 7));
867 if (ret == H_TOO_HARD)
868 return RESUME_HOST;
869 break;
870 case H_XIRR:
871 case H_CPPR:
872 case H_EOI:
873 case H_IPI:
874 case H_IPOLL:
875 case H_XIRR_X:
876 if (kvmppc_xics_enabled(vcpu)) {
877 if (xive_enabled()) {
878 ret = H_NOT_AVAILABLE;
879 return RESUME_GUEST;
880 }
881 ret = kvmppc_xics_hcall(vcpu, req);
882 break;
883 }
884 return RESUME_HOST;
885 case H_PUT_TCE:
886 ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
887 kvmppc_get_gpr(vcpu, 5),
888 kvmppc_get_gpr(vcpu, 6));
889 if (ret == H_TOO_HARD)
890 return RESUME_HOST;
891 break;
892 case H_PUT_TCE_INDIRECT:
893 ret = kvmppc_h_put_tce_indirect(vcpu, kvmppc_get_gpr(vcpu, 4),
894 kvmppc_get_gpr(vcpu, 5),
895 kvmppc_get_gpr(vcpu, 6),
896 kvmppc_get_gpr(vcpu, 7));
897 if (ret == H_TOO_HARD)
898 return RESUME_HOST;
899 break;
900 case H_STUFF_TCE:
901 ret = kvmppc_h_stuff_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
902 kvmppc_get_gpr(vcpu, 5),
903 kvmppc_get_gpr(vcpu, 6),
904 kvmppc_get_gpr(vcpu, 7));
905 if (ret == H_TOO_HARD)
906 return RESUME_HOST;
907 break;
908 default:
909 return RESUME_HOST;
910 }
911 kvmppc_set_gpr(vcpu, 3, ret);
912 vcpu->arch.hcall_needed = 0;
913 return RESUME_GUEST;
914}
915
916static int kvmppc_hcall_impl_hv(unsigned long cmd)
917{
918 switch (cmd) {
919 case H_CEDE:
920 case H_PROD:
921 case H_CONFER:
922 case H_REGISTER_VPA:
923 case H_SET_MODE:
924 case H_LOGICAL_CI_LOAD:
925 case H_LOGICAL_CI_STORE:
926#ifdef CONFIG_KVM_XICS
927 case H_XIRR:
928 case H_CPPR:
929 case H_EOI:
930 case H_IPI:
931 case H_IPOLL:
932 case H_XIRR_X:
933#endif
934 return 1;
935 }
936
937
938 return kvmppc_hcall_impl_hv_realmode(cmd);
939}
940
941static int kvmppc_emulate_debug_inst(struct kvm_run *run,
942 struct kvm_vcpu *vcpu)
943{
944 u32 last_inst;
945
946 if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) !=
947 EMULATE_DONE) {
948
949
950
951
952 return RESUME_GUEST;
953 }
954
955 if (last_inst == KVMPPC_INST_SW_BREAKPOINT) {
956 run->exit_reason = KVM_EXIT_DEBUG;
957 run->debug.arch.address = kvmppc_get_pc(vcpu);
958 return RESUME_HOST;
959 } else {
960 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
961 return RESUME_GUEST;
962 }
963}
964
965static void do_nothing(void *x)
966{
967}
968
969static unsigned long kvmppc_read_dpdes(struct kvm_vcpu *vcpu)
970{
971 int thr, cpu, pcpu, nthreads;
972 struct kvm_vcpu *v;
973 unsigned long dpdes;
974
975 nthreads = vcpu->kvm->arch.emul_smt_mode;
976 dpdes = 0;
977 cpu = vcpu->vcpu_id & ~(nthreads - 1);
978 for (thr = 0; thr < nthreads; ++thr, ++cpu) {
979 v = kvmppc_find_vcpu(vcpu->kvm, cpu);
980 if (!v)
981 continue;
982
983
984
985
986
987 pcpu = READ_ONCE(v->cpu);
988 if (pcpu >= 0)
989 smp_call_function_single(pcpu, do_nothing, NULL, 1);
990 if (kvmppc_doorbell_pending(v))
991 dpdes |= 1 << thr;
992 }
993 return dpdes;
994}
995
996
997
998
999
1000
1001
1002static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
1003{
1004 u32 inst, rb, thr;
1005 unsigned long arg;
1006 struct kvm *kvm = vcpu->kvm;
1007 struct kvm_vcpu *tvcpu;
1008
1009 if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE)
1010 return RESUME_GUEST;
1011 if (get_op(inst) != 31)
1012 return EMULATE_FAIL;
1013 rb = get_rb(inst);
1014 thr = vcpu->vcpu_id & (kvm->arch.emul_smt_mode - 1);
1015 switch (get_xop(inst)) {
1016 case OP_31_XOP_MSGSNDP:
1017 arg = kvmppc_get_gpr(vcpu, rb);
1018 if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
1019 break;
1020 arg &= 0x3f;
1021 if (arg >= kvm->arch.emul_smt_mode)
1022 break;
1023 tvcpu = kvmppc_find_vcpu(kvm, vcpu->vcpu_id - thr + arg);
1024 if (!tvcpu)
1025 break;
1026 if (!tvcpu->arch.doorbell_request) {
1027 tvcpu->arch.doorbell_request = 1;
1028 kvmppc_fast_vcpu_kick_hv(tvcpu);
1029 }
1030 break;
1031 case OP_31_XOP_MSGCLRP:
1032 arg = kvmppc_get_gpr(vcpu, rb);
1033 if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
1034 break;
1035 vcpu->arch.vcore->dpdes = 0;
1036 vcpu->arch.doorbell_request = 0;
1037 break;
1038 case OP_31_XOP_MFSPR:
1039 switch (get_sprn(inst)) {
1040 case SPRN_TIR:
1041 arg = thr;
1042 break;
1043 case SPRN_DPDES:
1044 arg = kvmppc_read_dpdes(vcpu);
1045 break;
1046 default:
1047 return EMULATE_FAIL;
1048 }
1049 kvmppc_set_gpr(vcpu, get_rt(inst), arg);
1050 break;
1051 default:
1052 return EMULATE_FAIL;
1053 }
1054 kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
1055 return RESUME_GUEST;
1056}
1057
1058
1059static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
1060 struct task_struct *tsk)
1061{
1062 int r = RESUME_HOST;
1063
1064 vcpu->stat.sum_exits++;
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074 if (vcpu->arch.shregs.msr & MSR_HV) {
1075 printk(KERN_EMERG "KVM trap in HV mode!\n");
1076 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
1077 vcpu->arch.trap, kvmppc_get_pc(vcpu),
1078 vcpu->arch.shregs.msr);
1079 kvmppc_dump_regs(vcpu);
1080 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1081 run->hw.hardware_exit_reason = vcpu->arch.trap;
1082 return RESUME_HOST;
1083 }
1084 run->exit_reason = KVM_EXIT_UNKNOWN;
1085 run->ready_for_interrupt_injection = 1;
1086 switch (vcpu->arch.trap) {
1087
1088 case BOOK3S_INTERRUPT_HV_DECREMENTER:
1089 vcpu->stat.dec_exits++;
1090 r = RESUME_GUEST;
1091 break;
1092 case BOOK3S_INTERRUPT_EXTERNAL:
1093 case BOOK3S_INTERRUPT_H_DOORBELL:
1094 case BOOK3S_INTERRUPT_H_VIRT:
1095 vcpu->stat.ext_intr_exits++;
1096 r = RESUME_GUEST;
1097 break;
1098
1099 case BOOK3S_INTERRUPT_HMI:
1100 case BOOK3S_INTERRUPT_PERFMON:
1101 case BOOK3S_INTERRUPT_SYSTEM_RESET:
1102 r = RESUME_GUEST;
1103 break;
1104 case BOOK3S_INTERRUPT_MACHINE_CHECK:
1105
1106 run->exit_reason = KVM_EXIT_NMI;
1107 run->hw.hardware_exit_reason = vcpu->arch.trap;
1108
1109 run->flags &= ~KVM_RUN_PPC_NMI_DISP_MASK;
1110
1111 if (vcpu->arch.mce_evt.disposition == MCE_DISPOSITION_RECOVERED)
1112 run->flags |= KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
1113 else
1114 run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
1115
1116 r = RESUME_HOST;
1117
1118 machine_check_print_event_info(&vcpu->arch.mce_evt, false);
1119 break;
1120 case BOOK3S_INTERRUPT_PROGRAM:
1121 {
1122 ulong flags;
1123
1124
1125
1126
1127
1128
1129 flags = vcpu->arch.shregs.msr & 0x1f0000ull;
1130 kvmppc_core_queue_program(vcpu, flags);
1131 r = RESUME_GUEST;
1132 break;
1133 }
1134 case BOOK3S_INTERRUPT_SYSCALL:
1135 {
1136
1137 int i;
1138
1139
1140
1141
1142
1143 run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
1144 for (i = 0; i < 9; ++i)
1145 run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
1146 run->exit_reason = KVM_EXIT_PAPR_HCALL;
1147 vcpu->arch.hcall_needed = 1;
1148 r = RESUME_HOST;
1149 break;
1150 }
1151
1152
1153
1154
1155
1156
1157
1158 case BOOK3S_INTERRUPT_H_DATA_STORAGE:
1159 r = RESUME_PAGE_FAULT;
1160 break;
1161 case BOOK3S_INTERRUPT_H_INST_STORAGE:
1162 vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
1163 vcpu->arch.fault_dsisr = 0;
1164 r = RESUME_PAGE_FAULT;
1165 break;
1166
1167
1168
1169
1170
1171
1172
1173 case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
1174 if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED)
1175 vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ?
1176 swab32(vcpu->arch.emul_inst) :
1177 vcpu->arch.emul_inst;
1178 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
1179
1180 spin_unlock(&vcpu->arch.vcore->lock);
1181 r = kvmppc_emulate_debug_inst(run, vcpu);
1182 spin_lock(&vcpu->arch.vcore->lock);
1183 } else {
1184 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
1185 r = RESUME_GUEST;
1186 }
1187 break;
1188
1189
1190
1191
1192
1193
1194
1195 case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
1196 r = EMULATE_FAIL;
1197 if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) &&
1198 cpu_has_feature(CPU_FTR_ARCH_300)) {
1199
1200 spin_unlock(&vcpu->arch.vcore->lock);
1201 r = kvmppc_emulate_doorbell_instr(vcpu);
1202 spin_lock(&vcpu->arch.vcore->lock);
1203 }
1204 if (r == EMULATE_FAIL) {
1205 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
1206 r = RESUME_GUEST;
1207 }
1208 break;
1209 case BOOK3S_INTERRUPT_HV_RM_HARD:
1210 r = RESUME_PASSTHROUGH;
1211 break;
1212 default:
1213 kvmppc_dump_regs(vcpu);
1214 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
1215 vcpu->arch.trap, kvmppc_get_pc(vcpu),
1216 vcpu->arch.shregs.msr);
1217 run->hw.hardware_exit_reason = vcpu->arch.trap;
1218 r = RESUME_HOST;
1219 break;
1220 }
1221
1222 return r;
1223}
1224
1225static int kvm_arch_vcpu_ioctl_get_sregs_hv(struct kvm_vcpu *vcpu,
1226 struct kvm_sregs *sregs)
1227{
1228 int i;
1229
1230 memset(sregs, 0, sizeof(struct kvm_sregs));
1231 sregs->pvr = vcpu->arch.pvr;
1232 for (i = 0; i < vcpu->arch.slb_max; i++) {
1233 sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
1234 sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
1235 }
1236
1237 return 0;
1238}
1239
1240static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
1241 struct kvm_sregs *sregs)
1242{
1243 int i, j;
1244
1245
1246 if (sregs->pvr != vcpu->arch.pvr)
1247 return -EINVAL;
1248
1249 j = 0;
1250 for (i = 0; i < vcpu->arch.slb_nr; i++) {
1251 if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) {
1252 vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe;
1253 vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv;
1254 ++j;
1255 }
1256 }
1257 vcpu->arch.slb_max = j;
1258
1259 return 0;
1260}
1261
1262static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
1263 bool preserve_top32)
1264{
1265 struct kvm *kvm = vcpu->kvm;
1266 struct kvmppc_vcore *vc = vcpu->arch.vcore;
1267 u64 mask;
1268
1269 mutex_lock(&kvm->lock);
1270 spin_lock(&vc->lock);
1271
1272
1273
1274
1275 if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) {
1276 struct kvm_vcpu *vcpu;
1277 int i;
1278
1279 kvm_for_each_vcpu(i, vcpu, kvm) {
1280 if (vcpu->arch.vcore != vc)
1281 continue;
1282 if (new_lpcr & LPCR_ILE)
1283 vcpu->arch.intr_msr |= MSR_LE;
1284 else
1285 vcpu->arch.intr_msr &= ~MSR_LE;
1286 }
1287 }
1288
1289
1290
1291
1292
1293
1294 mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
1295 if (cpu_has_feature(CPU_FTR_ARCH_207S))
1296 mask |= LPCR_AIL;
1297
1298
1299
1300
1301 if (cpu_has_feature(CPU_FTR_ARCH_300))
1302 mask |= LPCR_LD;
1303
1304
1305 if (preserve_top32)
1306 mask &= 0xFFFFFFFF;
1307 vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
1308 spin_unlock(&vc->lock);
1309 mutex_unlock(&kvm->lock);
1310}
1311
1312static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1313 union kvmppc_one_reg *val)
1314{
1315 int r = 0;
1316 long int i;
1317
1318 switch (id) {
1319 case KVM_REG_PPC_DEBUG_INST:
1320 *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
1321 break;
1322 case KVM_REG_PPC_HIOR:
1323 *val = get_reg_val(id, 0);
1324 break;
1325 case KVM_REG_PPC_DABR:
1326 *val = get_reg_val(id, vcpu->arch.dabr);
1327 break;
1328 case KVM_REG_PPC_DABRX:
1329 *val = get_reg_val(id, vcpu->arch.dabrx);
1330 break;
1331 case KVM_REG_PPC_DSCR:
1332 *val = get_reg_val(id, vcpu->arch.dscr);
1333 break;
1334 case KVM_REG_PPC_PURR:
1335 *val = get_reg_val(id, vcpu->arch.purr);
1336 break;
1337 case KVM_REG_PPC_SPURR:
1338 *val = get_reg_val(id, vcpu->arch.spurr);
1339 break;
1340 case KVM_REG_PPC_AMR:
1341 *val = get_reg_val(id, vcpu->arch.amr);
1342 break;
1343 case KVM_REG_PPC_UAMOR:
1344 *val = get_reg_val(id, vcpu->arch.uamor);
1345 break;
1346 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:
1347 i = id - KVM_REG_PPC_MMCR0;
1348 *val = get_reg_val(id, vcpu->arch.mmcr[i]);
1349 break;
1350 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
1351 i = id - KVM_REG_PPC_PMC1;
1352 *val = get_reg_val(id, vcpu->arch.pmc[i]);
1353 break;
1354 case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
1355 i = id - KVM_REG_PPC_SPMC1;
1356 *val = get_reg_val(id, vcpu->arch.spmc[i]);
1357 break;
1358 case KVM_REG_PPC_SIAR:
1359 *val = get_reg_val(id, vcpu->arch.siar);
1360 break;
1361 case KVM_REG_PPC_SDAR:
1362 *val = get_reg_val(id, vcpu->arch.sdar);
1363 break;
1364 case KVM_REG_PPC_SIER:
1365 *val = get_reg_val(id, vcpu->arch.sier);
1366 break;
1367 case KVM_REG_PPC_IAMR:
1368 *val = get_reg_val(id, vcpu->arch.iamr);
1369 break;
1370 case KVM_REG_PPC_PSPB:
1371 *val = get_reg_val(id, vcpu->arch.pspb);
1372 break;
1373 case KVM_REG_PPC_DPDES:
1374 *val = get_reg_val(id, vcpu->arch.vcore->dpdes);
1375 break;
1376 case KVM_REG_PPC_VTB:
1377 *val = get_reg_val(id, vcpu->arch.vcore->vtb);
1378 break;
1379 case KVM_REG_PPC_DAWR:
1380 *val = get_reg_val(id, vcpu->arch.dawr);
1381 break;
1382 case KVM_REG_PPC_DAWRX:
1383 *val = get_reg_val(id, vcpu->arch.dawrx);
1384 break;
1385 case KVM_REG_PPC_CIABR:
1386 *val = get_reg_val(id, vcpu->arch.ciabr);
1387 break;
1388 case KVM_REG_PPC_CSIGR:
1389 *val = get_reg_val(id, vcpu->arch.csigr);
1390 break;
1391 case KVM_REG_PPC_TACR:
1392 *val = get_reg_val(id, vcpu->arch.tacr);
1393 break;
1394 case KVM_REG_PPC_TCSCR:
1395 *val = get_reg_val(id, vcpu->arch.tcscr);
1396 break;
1397 case KVM_REG_PPC_PID:
1398 *val = get_reg_val(id, vcpu->arch.pid);
1399 break;
1400 case KVM_REG_PPC_ACOP:
1401 *val = get_reg_val(id, vcpu->arch.acop);
1402 break;
1403 case KVM_REG_PPC_WORT:
1404 *val = get_reg_val(id, vcpu->arch.wort);
1405 break;
1406 case KVM_REG_PPC_TIDR:
1407 *val = get_reg_val(id, vcpu->arch.tid);
1408 break;
1409 case KVM_REG_PPC_PSSCR:
1410 *val = get_reg_val(id, vcpu->arch.psscr);
1411 break;
1412 case KVM_REG_PPC_VPA_ADDR:
1413 spin_lock(&vcpu->arch.vpa_update_lock);
1414 *val = get_reg_val(id, vcpu->arch.vpa.next_gpa);
1415 spin_unlock(&vcpu->arch.vpa_update_lock);
1416 break;
1417 case KVM_REG_PPC_VPA_SLB:
1418 spin_lock(&vcpu->arch.vpa_update_lock);
1419 val->vpaval.addr = vcpu->arch.slb_shadow.next_gpa;
1420 val->vpaval.length = vcpu->arch.slb_shadow.len;
1421 spin_unlock(&vcpu->arch.vpa_update_lock);
1422 break;
1423 case KVM_REG_PPC_VPA_DTL:
1424 spin_lock(&vcpu->arch.vpa_update_lock);
1425 val->vpaval.addr = vcpu->arch.dtl.next_gpa;
1426 val->vpaval.length = vcpu->arch.dtl.len;
1427 spin_unlock(&vcpu->arch.vpa_update_lock);
1428 break;
1429 case KVM_REG_PPC_TB_OFFSET:
1430 *val = get_reg_val(id, vcpu->arch.vcore->tb_offset);
1431 break;
1432 case KVM_REG_PPC_LPCR:
1433 case KVM_REG_PPC_LPCR_64:
1434 *val = get_reg_val(id, vcpu->arch.vcore->lpcr);
1435 break;
1436 case KVM_REG_PPC_PPR:
1437 *val = get_reg_val(id, vcpu->arch.ppr);
1438 break;
1439#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1440 case KVM_REG_PPC_TFHAR:
1441 *val = get_reg_val(id, vcpu->arch.tfhar);
1442 break;
1443 case KVM_REG_PPC_TFIAR:
1444 *val = get_reg_val(id, vcpu->arch.tfiar);
1445 break;
1446 case KVM_REG_PPC_TEXASR:
1447 *val = get_reg_val(id, vcpu->arch.texasr);
1448 break;
1449 case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
1450 i = id - KVM_REG_PPC_TM_GPR0;
1451 *val = get_reg_val(id, vcpu->arch.gpr_tm[i]);
1452 break;
1453 case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
1454 {
1455 int j;
1456 i = id - KVM_REG_PPC_TM_VSR0;
1457 if (i < 32)
1458 for (j = 0; j < TS_FPRWIDTH; j++)
1459 val->vsxval[j] = vcpu->arch.fp_tm.fpr[i][j];
1460 else {
1461 if (cpu_has_feature(CPU_FTR_ALTIVEC))
1462 val->vval = vcpu->arch.vr_tm.vr[i-32];
1463 else
1464 r = -ENXIO;
1465 }
1466 break;
1467 }
1468 case KVM_REG_PPC_TM_CR:
1469 *val = get_reg_val(id, vcpu->arch.cr_tm);
1470 break;
1471 case KVM_REG_PPC_TM_XER:
1472 *val = get_reg_val(id, vcpu->arch.xer_tm);
1473 break;
1474 case KVM_REG_PPC_TM_LR:
1475 *val = get_reg_val(id, vcpu->arch.lr_tm);
1476 break;
1477 case KVM_REG_PPC_TM_CTR:
1478 *val = get_reg_val(id, vcpu->arch.ctr_tm);
1479 break;
1480 case KVM_REG_PPC_TM_FPSCR:
1481 *val = get_reg_val(id, vcpu->arch.fp_tm.fpscr);
1482 break;
1483 case KVM_REG_PPC_TM_AMR:
1484 *val = get_reg_val(id, vcpu->arch.amr_tm);
1485 break;
1486 case KVM_REG_PPC_TM_PPR:
1487 *val = get_reg_val(id, vcpu->arch.ppr_tm);
1488 break;
1489 case KVM_REG_PPC_TM_VRSAVE:
1490 *val = get_reg_val(id, vcpu->arch.vrsave_tm);
1491 break;
1492 case KVM_REG_PPC_TM_VSCR:
1493 if (cpu_has_feature(CPU_FTR_ALTIVEC))
1494 *val = get_reg_val(id, vcpu->arch.vr_tm.vscr.u[3]);
1495 else
1496 r = -ENXIO;
1497 break;
1498 case KVM_REG_PPC_TM_DSCR:
1499 *val = get_reg_val(id, vcpu->arch.dscr_tm);
1500 break;
1501 case KVM_REG_PPC_TM_TAR:
1502 *val = get_reg_val(id, vcpu->arch.tar_tm);
1503 break;
1504#endif
1505 case KVM_REG_PPC_ARCH_COMPAT:
1506 *val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
1507 break;
1508 case KVM_REG_PPC_DEC_EXPIRY:
1509 *val = get_reg_val(id, vcpu->arch.dec_expires +
1510 vcpu->arch.vcore->tb_offset);
1511 break;
1512 default:
1513 r = -EINVAL;
1514 break;
1515 }
1516
1517 return r;
1518}
1519
1520static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1521 union kvmppc_one_reg *val)
1522{
1523 int r = 0;
1524 long int i;
1525 unsigned long addr, len;
1526
1527 switch (id) {
1528 case KVM_REG_PPC_HIOR:
1529
1530 if (set_reg_val(id, *val))
1531 r = -EINVAL;
1532 break;
1533 case KVM_REG_PPC_DABR:
1534 vcpu->arch.dabr = set_reg_val(id, *val);
1535 break;
1536 case KVM_REG_PPC_DABRX:
1537 vcpu->arch.dabrx = set_reg_val(id, *val) & ~DABRX_HYP;
1538 break;
1539 case KVM_REG_PPC_DSCR:
1540 vcpu->arch.dscr = set_reg_val(id, *val);
1541 break;
1542 case KVM_REG_PPC_PURR:
1543 vcpu->arch.purr = set_reg_val(id, *val);
1544 break;
1545 case KVM_REG_PPC_SPURR:
1546 vcpu->arch.spurr = set_reg_val(id, *val);
1547 break;
1548 case KVM_REG_PPC_AMR:
1549 vcpu->arch.amr = set_reg_val(id, *val);
1550 break;
1551 case KVM_REG_PPC_UAMOR:
1552 vcpu->arch.uamor = set_reg_val(id, *val);
1553 break;
1554 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:
1555 i = id - KVM_REG_PPC_MMCR0;
1556 vcpu->arch.mmcr[i] = set_reg_val(id, *val);
1557 break;
1558 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
1559 i = id - KVM_REG_PPC_PMC1;
1560 vcpu->arch.pmc[i] = set_reg_val(id, *val);
1561 break;
1562 case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
1563 i = id - KVM_REG_PPC_SPMC1;
1564 vcpu->arch.spmc[i] = set_reg_val(id, *val);
1565 break;
1566 case KVM_REG_PPC_SIAR:
1567 vcpu->arch.siar = set_reg_val(id, *val);
1568 break;
1569 case KVM_REG_PPC_SDAR:
1570 vcpu->arch.sdar = set_reg_val(id, *val);
1571 break;
1572 case KVM_REG_PPC_SIER:
1573 vcpu->arch.sier = set_reg_val(id, *val);
1574 break;
1575 case KVM_REG_PPC_IAMR:
1576 vcpu->arch.iamr = set_reg_val(id, *val);
1577 break;
1578 case KVM_REG_PPC_PSPB:
1579 vcpu->arch.pspb = set_reg_val(id, *val);
1580 break;
1581 case KVM_REG_PPC_DPDES:
1582 vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
1583 break;
1584 case KVM_REG_PPC_VTB:
1585 vcpu->arch.vcore->vtb = set_reg_val(id, *val);
1586 break;
1587 case KVM_REG_PPC_DAWR:
1588 vcpu->arch.dawr = set_reg_val(id, *val);
1589 break;
1590 case KVM_REG_PPC_DAWRX:
1591 vcpu->arch.dawrx = set_reg_val(id, *val) & ~DAWRX_HYP;
1592 break;
1593 case KVM_REG_PPC_CIABR:
1594 vcpu->arch.ciabr = set_reg_val(id, *val);
1595
1596 if ((vcpu->arch.ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
1597 vcpu->arch.ciabr &= ~CIABR_PRIV;
1598 break;
1599 case KVM_REG_PPC_CSIGR:
1600 vcpu->arch.csigr = set_reg_val(id, *val);
1601 break;
1602 case KVM_REG_PPC_TACR:
1603 vcpu->arch.tacr = set_reg_val(id, *val);
1604 break;
1605 case KVM_REG_PPC_TCSCR:
1606 vcpu->arch.tcscr = set_reg_val(id, *val);
1607 break;
1608 case KVM_REG_PPC_PID:
1609 vcpu->arch.pid = set_reg_val(id, *val);
1610 break;
1611 case KVM_REG_PPC_ACOP:
1612 vcpu->arch.acop = set_reg_val(id, *val);
1613 break;
1614 case KVM_REG_PPC_WORT:
1615 vcpu->arch.wort = set_reg_val(id, *val);
1616 break;
1617 case KVM_REG_PPC_TIDR:
1618 vcpu->arch.tid = set_reg_val(id, *val);
1619 break;
1620 case KVM_REG_PPC_PSSCR:
1621 vcpu->arch.psscr = set_reg_val(id, *val) & PSSCR_GUEST_VIS;
1622 break;
1623 case KVM_REG_PPC_VPA_ADDR:
1624 addr = set_reg_val(id, *val);
1625 r = -EINVAL;
1626 if (!addr && (vcpu->arch.slb_shadow.next_gpa ||
1627 vcpu->arch.dtl.next_gpa))
1628 break;
1629 r = set_vpa(vcpu, &vcpu->arch.vpa, addr, sizeof(struct lppaca));
1630 break;
1631 case KVM_REG_PPC_VPA_SLB:
1632 addr = val->vpaval.addr;
1633 len = val->vpaval.length;
1634 r = -EINVAL;
1635 if (addr && !vcpu->arch.vpa.next_gpa)
1636 break;
1637 r = set_vpa(vcpu, &vcpu->arch.slb_shadow, addr, len);
1638 break;
1639 case KVM_REG_PPC_VPA_DTL:
1640 addr = val->vpaval.addr;
1641 len = val->vpaval.length;
1642 r = -EINVAL;
1643 if (addr && (len < sizeof(struct dtl_entry) ||
1644 !vcpu->arch.vpa.next_gpa))
1645 break;
1646 len -= len % sizeof(struct dtl_entry);
1647 r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
1648 break;
1649 case KVM_REG_PPC_TB_OFFSET:
1650
1651
1652
1653
1654
1655
1656 if (cpu_has_feature(CPU_FTR_POWER9_DD1))
1657 break;
1658
1659 vcpu->arch.vcore->tb_offset =
1660 ALIGN(set_reg_val(id, *val), 1UL << 24);
1661 break;
1662 case KVM_REG_PPC_LPCR:
1663 kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), true);
1664 break;
1665 case KVM_REG_PPC_LPCR_64:
1666 kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), false);
1667 break;
1668 case KVM_REG_PPC_PPR:
1669 vcpu->arch.ppr = set_reg_val(id, *val);
1670 break;
1671#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1672 case KVM_REG_PPC_TFHAR:
1673 vcpu->arch.tfhar = set_reg_val(id, *val);
1674 break;
1675 case KVM_REG_PPC_TFIAR:
1676 vcpu->arch.tfiar = set_reg_val(id, *val);
1677 break;
1678 case KVM_REG_PPC_TEXASR:
1679 vcpu->arch.texasr = set_reg_val(id, *val);
1680 break;
1681 case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
1682 i = id - KVM_REG_PPC_TM_GPR0;
1683 vcpu->arch.gpr_tm[i] = set_reg_val(id, *val);
1684 break;
1685 case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
1686 {
1687 int j;
1688 i = id - KVM_REG_PPC_TM_VSR0;
1689 if (i < 32)
1690 for (j = 0; j < TS_FPRWIDTH; j++)
1691 vcpu->arch.fp_tm.fpr[i][j] = val->vsxval[j];
1692 else
1693 if (cpu_has_feature(CPU_FTR_ALTIVEC))
1694 vcpu->arch.vr_tm.vr[i-32] = val->vval;
1695 else
1696 r = -ENXIO;
1697 break;
1698 }
1699 case KVM_REG_PPC_TM_CR:
1700 vcpu->arch.cr_tm = set_reg_val(id, *val);
1701 break;
1702 case KVM_REG_PPC_TM_XER:
1703 vcpu->arch.xer_tm = set_reg_val(id, *val);
1704 break;
1705 case KVM_REG_PPC_TM_LR:
1706 vcpu->arch.lr_tm = set_reg_val(id, *val);
1707 break;
1708 case KVM_REG_PPC_TM_CTR:
1709 vcpu->arch.ctr_tm = set_reg_val(id, *val);
1710 break;
1711 case KVM_REG_PPC_TM_FPSCR:
1712 vcpu->arch.fp_tm.fpscr = set_reg_val(id, *val);
1713 break;
1714 case KVM_REG_PPC_TM_AMR:
1715 vcpu->arch.amr_tm = set_reg_val(id, *val);
1716 break;
1717 case KVM_REG_PPC_TM_PPR:
1718 vcpu->arch.ppr_tm = set_reg_val(id, *val);
1719 break;
1720 case KVM_REG_PPC_TM_VRSAVE:
1721 vcpu->arch.vrsave_tm = set_reg_val(id, *val);
1722 break;
1723 case KVM_REG_PPC_TM_VSCR:
1724 if (cpu_has_feature(CPU_FTR_ALTIVEC))
1725 vcpu->arch.vr.vscr.u[3] = set_reg_val(id, *val);
1726 else
1727 r = - ENXIO;
1728 break;
1729 case KVM_REG_PPC_TM_DSCR:
1730 vcpu->arch.dscr_tm = set_reg_val(id, *val);
1731 break;
1732 case KVM_REG_PPC_TM_TAR:
1733 vcpu->arch.tar_tm = set_reg_val(id, *val);
1734 break;
1735#endif
1736 case KVM_REG_PPC_ARCH_COMPAT:
1737 r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
1738 break;
1739 case KVM_REG_PPC_DEC_EXPIRY:
1740 vcpu->arch.dec_expires = set_reg_val(id, *val) -
1741 vcpu->arch.vcore->tb_offset;
1742 break;
1743 default:
1744 r = -EINVAL;
1745 break;
1746 }
1747
1748 return r;
1749}
1750
1751
1752
1753
1754
1755
1756
1757
1758static int threads_per_vcore(struct kvm *kvm)
1759{
1760 if (kvm->arch.threads_indep)
1761 return 1;
1762 return threads_per_subcore;
1763}
1764
1765static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
1766{
1767 struct kvmppc_vcore *vcore;
1768
1769 vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
1770
1771 if (vcore == NULL)
1772 return NULL;
1773
1774 spin_lock_init(&vcore->lock);
1775 spin_lock_init(&vcore->stoltb_lock);
1776 init_swait_queue_head(&vcore->wq);
1777 vcore->preempt_tb = TB_NIL;
1778 vcore->lpcr = kvm->arch.lpcr;
1779 vcore->first_vcpuid = core * kvm->arch.smt_mode;
1780 vcore->kvm = kvm;
1781 INIT_LIST_HEAD(&vcore->preempt_list);
1782
1783 return vcore;
1784}
1785
1786#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1787static struct debugfs_timings_element {
1788 const char *name;
1789 size_t offset;
1790} timings[] = {
1791 {"rm_entry", offsetof(struct kvm_vcpu, arch.rm_entry)},
1792 {"rm_intr", offsetof(struct kvm_vcpu, arch.rm_intr)},
1793 {"rm_exit", offsetof(struct kvm_vcpu, arch.rm_exit)},
1794 {"guest", offsetof(struct kvm_vcpu, arch.guest_time)},
1795 {"cede", offsetof(struct kvm_vcpu, arch.cede_time)},
1796};
1797
1798#define N_TIMINGS (ARRAY_SIZE(timings))
1799
1800struct debugfs_timings_state {
1801 struct kvm_vcpu *vcpu;
1802 unsigned int buflen;
1803 char buf[N_TIMINGS * 100];
1804};
1805
1806static int debugfs_timings_open(struct inode *inode, struct file *file)
1807{
1808 struct kvm_vcpu *vcpu = inode->i_private;
1809 struct debugfs_timings_state *p;
1810
1811 p = kzalloc(sizeof(*p), GFP_KERNEL);
1812 if (!p)
1813 return -ENOMEM;
1814
1815 kvm_get_kvm(vcpu->kvm);
1816 p->vcpu = vcpu;
1817 file->private_data = p;
1818
1819 return nonseekable_open(inode, file);
1820}
1821
1822static int debugfs_timings_release(struct inode *inode, struct file *file)
1823{
1824 struct debugfs_timings_state *p = file->private_data;
1825
1826 kvm_put_kvm(p->vcpu->kvm);
1827 kfree(p);
1828 return 0;
1829}
1830
1831static ssize_t debugfs_timings_read(struct file *file, char __user *buf,
1832 size_t len, loff_t *ppos)
1833{
1834 struct debugfs_timings_state *p = file->private_data;
1835 struct kvm_vcpu *vcpu = p->vcpu;
1836 char *s, *buf_end;
1837 struct kvmhv_tb_accumulator tb;
1838 u64 count;
1839 loff_t pos;
1840 ssize_t n;
1841 int i, loops;
1842 bool ok;
1843
1844 if (!p->buflen) {
1845 s = p->buf;
1846 buf_end = s + sizeof(p->buf);
1847 for (i = 0; i < N_TIMINGS; ++i) {
1848 struct kvmhv_tb_accumulator *acc;
1849
1850 acc = (struct kvmhv_tb_accumulator *)
1851 ((unsigned long)vcpu + timings[i].offset);
1852 ok = false;
1853 for (loops = 0; loops < 1000; ++loops) {
1854 count = acc->seqcount;
1855 if (!(count & 1)) {
1856 smp_rmb();
1857 tb = *acc;
1858 smp_rmb();
1859 if (count == acc->seqcount) {
1860 ok = true;
1861 break;
1862 }
1863 }
1864 udelay(1);
1865 }
1866 if (!ok)
1867 snprintf(s, buf_end - s, "%s: stuck\n",
1868 timings[i].name);
1869 else
1870 snprintf(s, buf_end - s,
1871 "%s: %llu %llu %llu %llu\n",
1872 timings[i].name, count / 2,
1873 tb_to_ns(tb.tb_total),
1874 tb_to_ns(tb.tb_min),
1875 tb_to_ns(tb.tb_max));
1876 s += strlen(s);
1877 }
1878 p->buflen = s - p->buf;
1879 }
1880
1881 pos = *ppos;
1882 if (pos >= p->buflen)
1883 return 0;
1884 if (len > p->buflen - pos)
1885 len = p->buflen - pos;
1886 n = copy_to_user(buf, p->buf + pos, len);
1887 if (n) {
1888 if (n == len)
1889 return -EFAULT;
1890 len -= n;
1891 }
1892 *ppos = pos + len;
1893 return len;
1894}
1895
1896static ssize_t debugfs_timings_write(struct file *file, const char __user *buf,
1897 size_t len, loff_t *ppos)
1898{
1899 return -EACCES;
1900}
1901
1902static const struct file_operations debugfs_timings_ops = {
1903 .owner = THIS_MODULE,
1904 .open = debugfs_timings_open,
1905 .release = debugfs_timings_release,
1906 .read = debugfs_timings_read,
1907 .write = debugfs_timings_write,
1908 .llseek = generic_file_llseek,
1909};
1910
1911
1912static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
1913{
1914 char buf[16];
1915 struct kvm *kvm = vcpu->kvm;
1916
1917 snprintf(buf, sizeof(buf), "vcpu%u", id);
1918 if (IS_ERR_OR_NULL(kvm->arch.debugfs_dir))
1919 return;
1920 vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir);
1921 if (IS_ERR_OR_NULL(vcpu->arch.debugfs_dir))
1922 return;
1923 vcpu->arch.debugfs_timings =
1924 debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir,
1925 vcpu, &debugfs_timings_ops);
1926}
1927
1928#else
1929static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
1930{
1931}
1932#endif
1933
1934static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
1935 unsigned int id)
1936{
1937 struct kvm_vcpu *vcpu;
1938 int err;
1939 int core;
1940 struct kvmppc_vcore *vcore;
1941
1942 err = -ENOMEM;
1943 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1944 if (!vcpu)
1945 goto out;
1946
1947 err = kvm_vcpu_init(vcpu, kvm, id);
1948 if (err)
1949 goto free_vcpu;
1950
1951 vcpu->arch.shared = &vcpu->arch.shregs;
1952#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
1953
1954
1955
1956
1957#ifdef __BIG_ENDIAN__
1958 vcpu->arch.shared_big_endian = true;
1959#else
1960 vcpu->arch.shared_big_endian = false;
1961#endif
1962#endif
1963 vcpu->arch.mmcr[0] = MMCR0_FC;
1964 vcpu->arch.ctrl = CTRL_RUNLATCH;
1965
1966 kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
1967 spin_lock_init(&vcpu->arch.vpa_update_lock);
1968 spin_lock_init(&vcpu->arch.tbacct_lock);
1969 vcpu->arch.busy_preempt = TB_NIL;
1970 vcpu->arch.intr_msr = MSR_SF | MSR_ME;
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980 vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
1981 if (!cpu_has_feature(CPU_FTR_TM))
1982 vcpu->arch.hfscr &= ~HFSCR_TM;
1983 if (cpu_has_feature(CPU_FTR_ARCH_300))
1984 vcpu->arch.hfscr &= ~HFSCR_MSGP;
1985
1986 kvmppc_mmu_book3s_hv_init(vcpu);
1987
1988 vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
1989
1990 init_waitqueue_head(&vcpu->arch.cpu_run);
1991
1992 mutex_lock(&kvm->lock);
1993 vcore = NULL;
1994 err = -EINVAL;
1995 core = id / kvm->arch.smt_mode;
1996 if (core < KVM_MAX_VCORES) {
1997 vcore = kvm->arch.vcores[core];
1998 if (!vcore) {
1999 err = -ENOMEM;
2000 vcore = kvmppc_vcore_create(kvm, core);
2001 kvm->arch.vcores[core] = vcore;
2002 kvm->arch.online_vcores++;
2003 }
2004 }
2005 mutex_unlock(&kvm->lock);
2006
2007 if (!vcore)
2008 goto free_vcpu;
2009
2010 spin_lock(&vcore->lock);
2011 ++vcore->num_threads;
2012 spin_unlock(&vcore->lock);
2013 vcpu->arch.vcore = vcore;
2014 vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;
2015 vcpu->arch.thread_cpu = -1;
2016 vcpu->arch.prev_cpu = -1;
2017
2018 vcpu->arch.cpu_type = KVM_CPU_3S_64;
2019 kvmppc_sanity_check(vcpu);
2020
2021 debugfs_vcpu_init(vcpu, id);
2022
2023 return vcpu;
2024
2025free_vcpu:
2026 kmem_cache_free(kvm_vcpu_cache, vcpu);
2027out:
2028 return ERR_PTR(err);
2029}
2030
2031static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode,
2032 unsigned long flags)
2033{
2034 int err;
2035 int esmt = 0;
2036
2037 if (flags)
2038 return -EINVAL;
2039 if (smt_mode > MAX_SMT_THREADS || !is_power_of_2(smt_mode))
2040 return -EINVAL;
2041 if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
2042
2043
2044
2045
2046 if (smt_mode > threads_per_subcore)
2047 return -EINVAL;
2048 } else {
2049
2050
2051
2052
2053 esmt = smt_mode;
2054 smt_mode = 1;
2055 }
2056 mutex_lock(&kvm->lock);
2057 err = -EBUSY;
2058 if (!kvm->arch.online_vcores) {
2059 kvm->arch.smt_mode = smt_mode;
2060 kvm->arch.emul_smt_mode = esmt;
2061 err = 0;
2062 }
2063 mutex_unlock(&kvm->lock);
2064
2065 return err;
2066}
2067
2068static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
2069{
2070 if (vpa->pinned_addr)
2071 kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa,
2072 vpa->dirty);
2073}
2074
2075static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu)
2076{
2077 spin_lock(&vcpu->arch.vpa_update_lock);
2078 unpin_vpa(vcpu->kvm, &vcpu->arch.dtl);
2079 unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow);
2080 unpin_vpa(vcpu->kvm, &vcpu->arch.vpa);
2081 spin_unlock(&vcpu->arch.vpa_update_lock);
2082 kvm_vcpu_uninit(vcpu);
2083 kmem_cache_free(kvm_vcpu_cache, vcpu);
2084}
2085
2086static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu)
2087{
2088
2089 return 1;
2090}
2091
2092static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
2093{
2094 unsigned long dec_nsec, now;
2095
2096 now = get_tb();
2097 if (now > vcpu->arch.dec_expires) {
2098
2099 kvmppc_core_queue_dec(vcpu);
2100 kvmppc_core_prepare_to_enter(vcpu);
2101 return;
2102 }
2103 dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
2104 / tb_ticks_per_sec;
2105 hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL);
2106 vcpu->arch.timer_running = 1;
2107}
2108
2109static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
2110{
2111 vcpu->arch.ceded = 0;
2112 if (vcpu->arch.timer_running) {
2113 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
2114 vcpu->arch.timer_running = 0;
2115 }
2116}
2117
2118extern int __kvmppc_vcore_entry(void);
2119
2120static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
2121 struct kvm_vcpu *vcpu)
2122{
2123 u64 now;
2124
2125 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
2126 return;
2127 spin_lock_irq(&vcpu->arch.tbacct_lock);
2128 now = mftb();
2129 vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) -
2130 vcpu->arch.stolen_logged;
2131 vcpu->arch.busy_preempt = now;
2132 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
2133 spin_unlock_irq(&vcpu->arch.tbacct_lock);
2134 --vc->n_runnable;
2135 WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], NULL);
2136}
2137
2138static int kvmppc_grab_hwthread(int cpu)
2139{
2140 struct paca_struct *tpaca;
2141 long timeout = 10000;
2142
2143 tpaca = &paca[cpu];
2144
2145
2146 tpaca->kvm_hstate.kvm_vcpu = NULL;
2147 tpaca->kvm_hstate.kvm_vcore = NULL;
2148 tpaca->kvm_hstate.napping = 0;
2149 smp_wmb();
2150 tpaca->kvm_hstate.hwthread_req = 1;
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161 smp_mb();
2162 while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) {
2163 if (--timeout <= 0) {
2164 pr_err("KVM: couldn't grab cpu %d\n", cpu);
2165 return -EBUSY;
2166 }
2167 udelay(1);
2168 }
2169 return 0;
2170}
2171
2172static void kvmppc_release_hwthread(int cpu)
2173{
2174 struct paca_struct *tpaca;
2175
2176 tpaca = &paca[cpu];
2177 tpaca->kvm_hstate.hwthread_req = 0;
2178 tpaca->kvm_hstate.kvm_vcpu = NULL;
2179 tpaca->kvm_hstate.kvm_vcore = NULL;
2180 tpaca->kvm_hstate.kvm_split_mode = NULL;
2181}
2182
2183static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
2184{
2185 int i;
2186
2187 cpu = cpu_first_thread_sibling(cpu);
2188 cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush);
2189
2190
2191
2192
2193
2194 smp_mb();
2195 for (i = 0; i < threads_per_core; ++i)
2196 if (cpumask_test_cpu(cpu + i, &kvm->arch.cpu_in_guest))
2197 smp_call_function_single(cpu + i, do_nothing, NULL, 1);
2198}
2199
2200static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
2201{
2202 struct kvm *kvm = vcpu->kvm;
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216 if (vcpu->arch.prev_cpu != pcpu) {
2217 if (vcpu->arch.prev_cpu >= 0 &&
2218 cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
2219 cpu_first_thread_sibling(pcpu))
2220 radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
2221 vcpu->arch.prev_cpu = pcpu;
2222 }
2223}
2224
2225static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
2226{
2227 int cpu;
2228 struct paca_struct *tpaca;
2229 struct kvm *kvm = vc->kvm;
2230
2231 cpu = vc->pcpu;
2232 if (vcpu) {
2233 if (vcpu->arch.timer_running) {
2234 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
2235 vcpu->arch.timer_running = 0;
2236 }
2237 cpu += vcpu->arch.ptid;
2238 vcpu->cpu = vc->pcpu;
2239 vcpu->arch.thread_cpu = cpu;
2240 cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
2241 }
2242 tpaca = &paca[cpu];
2243 tpaca->kvm_hstate.kvm_vcpu = vcpu;
2244 tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
2245
2246 smp_wmb();
2247 tpaca->kvm_hstate.kvm_vcore = vc;
2248 if (cpu != smp_processor_id())
2249 kvmppc_ipi_thread(cpu);
2250}
2251
2252static void kvmppc_wait_for_nap(int n_threads)
2253{
2254 int cpu = smp_processor_id();
2255 int i, loops;
2256
2257 if (n_threads <= 1)
2258 return;
2259 for (loops = 0; loops < 1000000; ++loops) {
2260
2261
2262
2263
2264
2265
2266 for (i = 1; i < n_threads; ++i)
2267 if (paca[cpu + i].kvm_hstate.kvm_vcore)
2268 break;
2269 if (i == n_threads) {
2270 HMT_medium();
2271 return;
2272 }
2273 HMT_low();
2274 }
2275 HMT_medium();
2276 for (i = 1; i < n_threads; ++i)
2277 if (paca[cpu + i].kvm_hstate.kvm_vcore)
2278 pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
2279}
2280
2281
2282
2283
2284
2285
2286static int on_primary_thread(void)
2287{
2288 int cpu = smp_processor_id();
2289 int thr;
2290
2291
2292 if (cpu_thread_in_subcore(cpu))
2293 return 0;
2294
2295 thr = 0;
2296 while (++thr < threads_per_subcore)
2297 if (cpu_online(cpu + thr))
2298 return 0;
2299
2300
2301 for (thr = 1; thr < threads_per_subcore; ++thr) {
2302 if (kvmppc_grab_hwthread(cpu + thr)) {
2303
2304 do {
2305 kvmppc_release_hwthread(cpu + thr);
2306 } while (--thr > 0);
2307 return 0;
2308 }
2309 }
2310 return 1;
2311}
2312
2313
2314
2315
2316
2317
2318struct preempted_vcore_list {
2319 struct list_head list;
2320 spinlock_t lock;
2321};
2322
2323static DEFINE_PER_CPU(struct preempted_vcore_list, preempted_vcores);
2324
2325static void init_vcore_lists(void)
2326{
2327 int cpu;
2328
2329 for_each_possible_cpu(cpu) {
2330 struct preempted_vcore_list *lp = &per_cpu(preempted_vcores, cpu);
2331 spin_lock_init(&lp->lock);
2332 INIT_LIST_HEAD(&lp->list);
2333 }
2334}
2335
2336static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
2337{
2338 struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
2339
2340 vc->vcore_state = VCORE_PREEMPT;
2341 vc->pcpu = smp_processor_id();
2342 if (vc->num_threads < threads_per_vcore(vc->kvm)) {
2343 spin_lock(&lp->lock);
2344 list_add_tail(&vc->preempt_list, &lp->list);
2345 spin_unlock(&lp->lock);
2346 }
2347
2348
2349 kvmppc_core_start_stolen(vc);
2350}
2351
2352static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc)
2353{
2354 struct preempted_vcore_list *lp;
2355
2356 kvmppc_core_end_stolen(vc);
2357 if (!list_empty(&vc->preempt_list)) {
2358 lp = &per_cpu(preempted_vcores, vc->pcpu);
2359 spin_lock(&lp->lock);
2360 list_del_init(&vc->preempt_list);
2361 spin_unlock(&lp->lock);
2362 }
2363 vc->vcore_state = VCORE_INACTIVE;
2364}
2365
2366
2367
2368
2369
2370struct core_info {
2371 int n_subcores;
2372 int max_subcore_threads;
2373 int total_threads;
2374 int subcore_threads[MAX_SUBCORES];
2375 struct kvmppc_vcore *vc[MAX_SUBCORES];
2376};
2377
2378
2379
2380
2381
2382static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
2383
2384static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
2385{
2386 memset(cip, 0, sizeof(*cip));
2387 cip->n_subcores = 1;
2388 cip->max_subcore_threads = vc->num_threads;
2389 cip->total_threads = vc->num_threads;
2390 cip->subcore_threads[0] = vc->num_threads;
2391 cip->vc[0] = vc;
2392}
2393
2394static bool subcore_config_ok(int n_subcores, int n_threads)
2395{
2396
2397
2398
2399
2400 if (cpu_has_feature(CPU_FTR_ARCH_300))
2401 return n_subcores <= 4 && n_threads == 1;
2402
2403
2404 if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS)
2405 return false;
2406 if (n_subcores > MAX_SUBCORES)
2407 return false;
2408 if (n_subcores > 1) {
2409 if (!(dynamic_mt_modes & 2))
2410 n_subcores = 4;
2411 if (n_subcores > 2 && !(dynamic_mt_modes & 4))
2412 return false;
2413 }
2414
2415 return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
2416}
2417
2418static void init_vcore_to_run(struct kvmppc_vcore *vc)
2419{
2420 vc->entry_exit_map = 0;
2421 vc->in_guest = 0;
2422 vc->napping_threads = 0;
2423 vc->conferring_threads = 0;
2424}
2425
2426static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
2427{
2428 int n_threads = vc->num_threads;
2429 int sub;
2430
2431 if (!cpu_has_feature(CPU_FTR_ARCH_207S))
2432 return false;
2433
2434
2435 if (no_mixing_hpt_and_radix &&
2436 kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm))
2437 return false;
2438
2439 if (n_threads < cip->max_subcore_threads)
2440 n_threads = cip->max_subcore_threads;
2441 if (!subcore_config_ok(cip->n_subcores + 1, n_threads))
2442 return false;
2443 cip->max_subcore_threads = n_threads;
2444
2445 sub = cip->n_subcores;
2446 ++cip->n_subcores;
2447 cip->total_threads += vc->num_threads;
2448 cip->subcore_threads[sub] = vc->num_threads;
2449 cip->vc[sub] = vc;
2450 init_vcore_to_run(vc);
2451 list_del_init(&vc->preempt_list);
2452
2453 return true;
2454}
2455
2456
2457
2458
2459
2460static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
2461 int target_threads)
2462{
2463 if (cip->total_threads + pvc->num_threads > target_threads)
2464 return false;
2465
2466 return can_dynamic_split(pvc, cip);
2467}
2468
2469static void prepare_threads(struct kvmppc_vcore *vc)
2470{
2471 int i;
2472 struct kvm_vcpu *vcpu;
2473
2474 for_each_runnable_thread(i, vcpu, vc) {
2475 if (signal_pending(vcpu->arch.run_task))
2476 vcpu->arch.ret = -EINTR;
2477 else if (vcpu->arch.vpa.update_pending ||
2478 vcpu->arch.slb_shadow.update_pending ||
2479 vcpu->arch.dtl.update_pending)
2480 vcpu->arch.ret = RESUME_GUEST;
2481 else
2482 continue;
2483 kvmppc_remove_runnable(vc, vcpu);
2484 wake_up(&vcpu->arch.cpu_run);
2485 }
2486}
2487
2488static void collect_piggybacks(struct core_info *cip, int target_threads)
2489{
2490 struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
2491 struct kvmppc_vcore *pvc, *vcnext;
2492
2493 spin_lock(&lp->lock);
2494 list_for_each_entry_safe(pvc, vcnext, &lp->list, preempt_list) {
2495 if (!spin_trylock(&pvc->lock))
2496 continue;
2497 prepare_threads(pvc);
2498 if (!pvc->n_runnable) {
2499 list_del_init(&pvc->preempt_list);
2500 if (pvc->runner == NULL) {
2501 pvc->vcore_state = VCORE_INACTIVE;
2502 kvmppc_core_end_stolen(pvc);
2503 }
2504 spin_unlock(&pvc->lock);
2505 continue;
2506 }
2507 if (!can_piggyback(pvc, cip, target_threads)) {
2508 spin_unlock(&pvc->lock);
2509 continue;
2510 }
2511 kvmppc_core_end_stolen(pvc);
2512 pvc->vcore_state = VCORE_PIGGYBACK;
2513 if (cip->total_threads >= target_threads)
2514 break;
2515 }
2516 spin_unlock(&lp->lock);
2517}
2518
2519static bool recheck_signals(struct core_info *cip)
2520{
2521 int sub, i;
2522 struct kvm_vcpu *vcpu;
2523
2524 for (sub = 0; sub < cip->n_subcores; ++sub)
2525 for_each_runnable_thread(i, vcpu, cip->vc[sub])
2526 if (signal_pending(vcpu->arch.run_task))
2527 return true;
2528 return false;
2529}
2530
2531static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
2532{
2533 int still_running = 0, i;
2534 u64 now;
2535 long ret;
2536 struct kvm_vcpu *vcpu;
2537
2538 spin_lock(&vc->lock);
2539 now = get_tb();
2540 for_each_runnable_thread(i, vcpu, vc) {
2541
2542 if (now < vcpu->arch.dec_expires &&
2543 kvmppc_core_pending_dec(vcpu))
2544 kvmppc_core_dequeue_dec(vcpu);
2545
2546 trace_kvm_guest_exit(vcpu);
2547
2548 ret = RESUME_GUEST;
2549 if (vcpu->arch.trap)
2550 ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
2551 vcpu->arch.run_task);
2552
2553 vcpu->arch.ret = ret;
2554 vcpu->arch.trap = 0;
2555
2556 if (is_kvmppc_resume_guest(vcpu->arch.ret)) {
2557 if (vcpu->arch.pending_exceptions)
2558 kvmppc_core_prepare_to_enter(vcpu);
2559 if (vcpu->arch.ceded)
2560 kvmppc_set_timer(vcpu);
2561 else
2562 ++still_running;
2563 } else {
2564 kvmppc_remove_runnable(vc, vcpu);
2565 wake_up(&vcpu->arch.cpu_run);
2566 }
2567 }
2568 if (!is_master) {
2569 if (still_running > 0) {
2570 kvmppc_vcore_preempt(vc);
2571 } else if (vc->runner) {
2572 vc->vcore_state = VCORE_PREEMPT;
2573 kvmppc_core_start_stolen(vc);
2574 } else {
2575 vc->vcore_state = VCORE_INACTIVE;
2576 }
2577 if (vc->n_runnable > 0 && vc->runner == NULL) {
2578
2579 i = -1;
2580 vcpu = next_runnable_thread(vc, &i);
2581 wake_up(&vcpu->arch.cpu_run);
2582 }
2583 }
2584 spin_unlock(&vc->lock);
2585}
2586
2587
2588
2589
2590
2591
2592static inline int kvmppc_clear_host_core(unsigned int cpu)
2593{
2594 int core;
2595
2596 if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
2597 return 0;
2598
2599
2600
2601
2602
2603 core = cpu >> threads_shift;
2604 kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0;
2605 return 0;
2606}
2607
2608
2609
2610
2611
2612
2613static inline int kvmppc_set_host_core(unsigned int cpu)
2614{
2615 int core;
2616
2617 if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
2618 return 0;
2619
2620
2621
2622
2623
2624 core = cpu >> threads_shift;
2625 kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1;
2626 return 0;
2627}
2628
2629static void set_irq_happened(int trap)
2630{
2631 switch (trap) {
2632 case BOOK3S_INTERRUPT_EXTERNAL:
2633 local_paca->irq_happened |= PACA_IRQ_EE;
2634 break;
2635 case BOOK3S_INTERRUPT_H_DOORBELL:
2636 local_paca->irq_happened |= PACA_IRQ_DBELL;
2637 break;
2638 case BOOK3S_INTERRUPT_HMI:
2639 local_paca->irq_happened |= PACA_IRQ_HMI;
2640 break;
2641 case BOOK3S_INTERRUPT_SYSTEM_RESET:
2642 replay_system_reset();
2643 break;
2644 }
2645}
2646
2647
2648
2649
2650
2651static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2652{
2653 struct kvm_vcpu *vcpu;
2654 int i;
2655 int srcu_idx;
2656 struct core_info core_info;
2657 struct kvmppc_vcore *pvc;
2658 struct kvm_split_mode split_info, *sip;
2659 int split, subcore_size, active;
2660 int sub;
2661 bool thr0_done;
2662 unsigned long cmd_bit, stat_bit;
2663 int pcpu, thr;
2664 int target_threads;
2665 int controlled_threads;
2666 int trap;
2667 bool is_power8;
2668 bool hpt_on_radix;
2669
2670
2671
2672
2673
2674 prepare_threads(vc);
2675
2676
2677 if (vc->runner->arch.state != KVMPPC_VCPU_RUNNABLE)
2678 return;
2679
2680
2681
2682
2683 init_vcore_to_run(vc);
2684 vc->preempt_tb = TB_NIL;
2685
2686
2687
2688
2689
2690
2691 controlled_threads = threads_per_vcore(vc->kvm);
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701 hpt_on_radix = no_mixing_hpt_and_radix && radix_enabled() &&
2702 !kvm_is_radix(vc->kvm);
2703 if (((controlled_threads > 1) &&
2704 ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) ||
2705 (hpt_on_radix && vc->kvm->arch.threads_indep)) {
2706 for_each_runnable_thread(i, vcpu, vc) {
2707 vcpu->arch.ret = -EBUSY;
2708 kvmppc_remove_runnable(vc, vcpu);
2709 wake_up(&vcpu->arch.cpu_run);
2710 }
2711 goto out;
2712 }
2713
2714
2715
2716
2717
2718 init_core_info(&core_info, vc);
2719 pcpu = smp_processor_id();
2720 target_threads = controlled_threads;
2721 if (target_smt_mode && target_smt_mode < target_threads)
2722 target_threads = target_smt_mode;
2723 if (vc->num_threads < target_threads)
2724 collect_piggybacks(&core_info, target_threads);
2725
2726
2727
2728
2729
2730
2731 pcpu = smp_processor_id();
2732 if (kvm_is_radix(vc->kvm)) {
2733 for (sub = 0; sub < core_info.n_subcores; ++sub)
2734 for_each_runnable_thread(i, vcpu, core_info.vc[sub])
2735 kvmppc_prepare_radix_vcpu(vcpu, pcpu);
2736 }
2737
2738
2739
2740
2741
2742
2743
2744
2745 local_irq_disable();
2746 hard_irq_disable();
2747 if (lazy_irq_pending() || need_resched() ||
2748 recheck_signals(&core_info) || !vc->kvm->arch.mmu_ready) {
2749 local_irq_enable();
2750 vc->vcore_state = VCORE_INACTIVE;
2751
2752 for (sub = 1; sub < core_info.n_subcores; ++sub) {
2753 pvc = core_info.vc[sub];
2754
2755 kvmppc_vcore_preempt(pvc);
2756 spin_unlock(&pvc->lock);
2757 }
2758 for (i = 0; i < controlled_threads; ++i)
2759 kvmppc_release_hwthread(pcpu + i);
2760 return;
2761 }
2762
2763 kvmppc_clear_host_core(pcpu);
2764
2765
2766 subcore_size = threads_per_subcore;
2767 cmd_bit = stat_bit = 0;
2768 split = core_info.n_subcores;
2769 sip = NULL;
2770 is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S)
2771 && !cpu_has_feature(CPU_FTR_ARCH_300);
2772
2773 if (split > 1 || hpt_on_radix) {
2774 sip = &split_info;
2775 memset(&split_info, 0, sizeof(split_info));
2776 for (sub = 0; sub < core_info.n_subcores; ++sub)
2777 split_info.vc[sub] = core_info.vc[sub];
2778
2779 if (is_power8) {
2780 if (split == 2 && (dynamic_mt_modes & 2)) {
2781 cmd_bit = HID0_POWER8_1TO2LPAR;
2782 stat_bit = HID0_POWER8_2LPARMODE;
2783 } else {
2784 split = 4;
2785 cmd_bit = HID0_POWER8_1TO4LPAR;
2786 stat_bit = HID0_POWER8_4LPARMODE;
2787 }
2788 subcore_size = MAX_SMT_THREADS / split;
2789 split_info.rpr = mfspr(SPRN_RPR);
2790 split_info.pmmar = mfspr(SPRN_PMMAR);
2791 split_info.ldbar = mfspr(SPRN_LDBAR);
2792 split_info.subcore_size = subcore_size;
2793 } else {
2794 split_info.subcore_size = 1;
2795 if (hpt_on_radix) {
2796
2797 split_info.lpcr_req = vc->lpcr;
2798 split_info.lpidr_req = vc->kvm->arch.lpid;
2799 split_info.host_lpcr = vc->kvm->arch.host_lpcr;
2800 split_info.do_set = 1;
2801 }
2802 }
2803
2804
2805 smp_wmb();
2806 }
2807
2808 for (thr = 0; thr < controlled_threads; ++thr) {
2809 paca[pcpu + thr].kvm_hstate.tid = thr;
2810 paca[pcpu + thr].kvm_hstate.napping = 0;
2811 paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
2812 }
2813
2814
2815 if (cmd_bit) {
2816 unsigned long hid0 = mfspr(SPRN_HID0);
2817
2818 hid0 |= cmd_bit | HID0_POWER8_DYNLPARDIS;
2819 mb();
2820 mtspr(SPRN_HID0, hid0);
2821 isync();
2822 for (;;) {
2823 hid0 = mfspr(SPRN_HID0);
2824 if (hid0 & stat_bit)
2825 break;
2826 cpu_relax();
2827 }
2828 }
2829
2830
2831 active = 0;
2832 for (sub = 0; sub < core_info.n_subcores; ++sub) {
2833 thr = is_power8 ? subcore_thread_map[sub] : sub;
2834 thr0_done = false;
2835 active |= 1 << thr;
2836 pvc = core_info.vc[sub];
2837 pvc->pcpu = pcpu + thr;
2838 for_each_runnable_thread(i, vcpu, pvc) {
2839 kvmppc_start_thread(vcpu, pvc);
2840 kvmppc_create_dtl_entry(vcpu, pvc);
2841 trace_kvm_guest_enter(vcpu);
2842 if (!vcpu->arch.ptid)
2843 thr0_done = true;
2844 active |= 1 << (thr + vcpu->arch.ptid);
2845 }
2846
2847
2848
2849
2850 if (!thr0_done)
2851 kvmppc_start_thread(NULL, pvc);
2852 }
2853
2854
2855
2856
2857
2858 smp_mb();
2859
2860
2861
2862
2863
2864
2865
2866
2867 if (cmd_bit || hpt_on_radix) {
2868 split_info.do_nap = 1;
2869 for (thr = 1; thr < threads_per_subcore; ++thr)
2870 if (!(active & (1 << thr)))
2871 kvmppc_ipi_thread(pcpu + thr);
2872 }
2873
2874 vc->vcore_state = VCORE_RUNNING;
2875 preempt_disable();
2876
2877 trace_kvmppc_run_core(vc, 0);
2878
2879 for (sub = 0; sub < core_info.n_subcores; ++sub)
2880 spin_unlock(&core_info.vc[sub]->lock);
2881
2882
2883
2884
2885
2886 trace_hardirqs_on();
2887
2888 guest_enter_irqoff();
2889
2890 srcu_idx = srcu_read_lock(&vc->kvm->srcu);
2891
2892 trap = __kvmppc_vcore_entry();
2893
2894 srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
2895
2896 trace_hardirqs_off();
2897 set_irq_happened(trap);
2898
2899 spin_lock(&vc->lock);
2900
2901 vc->vcore_state = VCORE_EXITING;
2902
2903
2904 kvmppc_wait_for_nap(controlled_threads);
2905
2906
2907 if (cmd_bit) {
2908 unsigned long hid0 = mfspr(SPRN_HID0);
2909 unsigned long loops = 0;
2910
2911 hid0 &= ~HID0_POWER8_DYNLPARDIS;
2912 stat_bit = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
2913 mb();
2914 mtspr(SPRN_HID0, hid0);
2915 isync();
2916 for (;;) {
2917 hid0 = mfspr(SPRN_HID0);
2918 if (!(hid0 & stat_bit))
2919 break;
2920 cpu_relax();
2921 ++loops;
2922 }
2923 } else if (hpt_on_radix) {
2924
2925 for (thr = 1; thr < controlled_threads; ++thr) {
2926 while (paca[pcpu + thr].kvm_hstate.kvm_split_mode) {
2927 HMT_low();
2928 barrier();
2929 }
2930 HMT_medium();
2931 }
2932 }
2933 split_info.do_nap = 0;
2934
2935 kvmppc_set_host_core(pcpu);
2936
2937 local_irq_enable();
2938 guest_exit();
2939
2940
2941 for (i = 0; i < controlled_threads; ++i) {
2942 kvmppc_release_hwthread(pcpu + i);
2943 if (sip && sip->napped[i])
2944 kvmppc_ipi_thread(pcpu + i);
2945 cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
2946 }
2947
2948 spin_unlock(&vc->lock);
2949
2950
2951 smp_mb();
2952
2953 preempt_enable();
2954
2955 for (sub = 0; sub < core_info.n_subcores; ++sub) {
2956 pvc = core_info.vc[sub];
2957 post_guest_process(pvc, pvc == vc);
2958 }
2959
2960 spin_lock(&vc->lock);
2961
2962 out:
2963 vc->vcore_state = VCORE_INACTIVE;
2964 trace_kvmppc_run_core(vc, 1);
2965}
2966
2967
2968
2969
2970
2971static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
2972 struct kvm_vcpu *vcpu, int wait_state)
2973{
2974 DEFINE_WAIT(wait);
2975
2976 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
2977 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
2978 spin_unlock(&vc->lock);
2979 schedule();
2980 spin_lock(&vc->lock);
2981 }
2982 finish_wait(&vcpu->arch.cpu_run, &wait);
2983}
2984
2985static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
2986{
2987
2988 if (vc->halt_poll_ns == 0 && halt_poll_ns_grow)
2989 vc->halt_poll_ns = 10000;
2990 else
2991 vc->halt_poll_ns *= halt_poll_ns_grow;
2992}
2993
2994static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
2995{
2996 if (halt_poll_ns_shrink == 0)
2997 vc->halt_poll_ns = 0;
2998 else
2999 vc->halt_poll_ns /= halt_poll_ns_shrink;
3000}
3001
3002#ifdef CONFIG_KVM_XICS
3003static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
3004{
3005 if (!xive_enabled())
3006 return false;
3007 return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr <
3008 vcpu->arch.xive_saved_state.cppr;
3009}
3010#else
3011static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
3012{
3013 return false;
3014}
3015#endif
3016
3017static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu)
3018{
3019 if (vcpu->arch.pending_exceptions || vcpu->arch.prodded ||
3020 kvmppc_doorbell_pending(vcpu) || xive_interrupt_pending(vcpu))
3021 return true;
3022
3023 return false;
3024}
3025
3026
3027
3028
3029
3030static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
3031{
3032 struct kvm_vcpu *vcpu;
3033 int i;
3034
3035 for_each_runnable_thread(i, vcpu, vc) {
3036 if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu))
3037 return 1;
3038 }
3039
3040 return 0;
3041}
3042
3043
3044
3045
3046
3047static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
3048{
3049 ktime_t cur, start_poll, start_wait;
3050 int do_sleep = 1;
3051 u64 block_ns;
3052 DECLARE_SWAITQUEUE(wait);
3053
3054
3055 cur = start_poll = ktime_get();
3056 if (vc->halt_poll_ns) {
3057 ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns);
3058 ++vc->runner->stat.halt_attempted_poll;
3059
3060 vc->vcore_state = VCORE_POLLING;
3061 spin_unlock(&vc->lock);
3062
3063 do {
3064 if (kvmppc_vcore_check_block(vc)) {
3065 do_sleep = 0;
3066 break;
3067 }
3068 cur = ktime_get();
3069 } while (single_task_running() && ktime_before(cur, stop));
3070
3071 spin_lock(&vc->lock);
3072 vc->vcore_state = VCORE_INACTIVE;
3073
3074 if (!do_sleep) {
3075 ++vc->runner->stat.halt_successful_poll;
3076 goto out;
3077 }
3078 }
3079
3080 prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
3081
3082 if (kvmppc_vcore_check_block(vc)) {
3083 finish_swait(&vc->wq, &wait);
3084 do_sleep = 0;
3085
3086 if (vc->halt_poll_ns)
3087 ++vc->runner->stat.halt_successful_poll;
3088 goto out;
3089 }
3090
3091 start_wait = ktime_get();
3092
3093 vc->vcore_state = VCORE_SLEEPING;
3094 trace_kvmppc_vcore_blocked(vc, 0);
3095 spin_unlock(&vc->lock);
3096 schedule();
3097 finish_swait(&vc->wq, &wait);
3098 spin_lock(&vc->lock);
3099 vc->vcore_state = VCORE_INACTIVE;
3100 trace_kvmppc_vcore_blocked(vc, 1);
3101 ++vc->runner->stat.halt_successful_wait;
3102
3103 cur = ktime_get();
3104
3105out:
3106 block_ns = ktime_to_ns(cur) - ktime_to_ns(start_poll);
3107
3108
3109 if (do_sleep) {
3110 vc->runner->stat.halt_wait_ns +=
3111 ktime_to_ns(cur) - ktime_to_ns(start_wait);
3112
3113 if (vc->halt_poll_ns)
3114 vc->runner->stat.halt_poll_fail_ns +=
3115 ktime_to_ns(start_wait) -
3116 ktime_to_ns(start_poll);
3117 } else {
3118
3119 if (vc->halt_poll_ns)
3120 vc->runner->stat.halt_poll_success_ns +=
3121 ktime_to_ns(cur) -
3122 ktime_to_ns(start_poll);
3123 }
3124
3125
3126 if (halt_poll_ns) {
3127 if (block_ns <= vc->halt_poll_ns)
3128 ;
3129
3130 else if (vc->halt_poll_ns && block_ns > halt_poll_ns)
3131 shrink_halt_poll_ns(vc);
3132
3133 else if (vc->halt_poll_ns < halt_poll_ns &&
3134 block_ns < halt_poll_ns)
3135 grow_halt_poll_ns(vc);
3136 if (vc->halt_poll_ns > halt_poll_ns)
3137 vc->halt_poll_ns = halt_poll_ns;
3138 } else
3139 vc->halt_poll_ns = 0;
3140
3141 trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
3142}
3143
3144static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu)
3145{
3146 int r = 0;
3147 struct kvm *kvm = vcpu->kvm;
3148
3149 mutex_lock(&kvm->lock);
3150 if (!kvm->arch.mmu_ready) {
3151 if (!kvm_is_radix(kvm))
3152 r = kvmppc_hv_setup_htab_rma(vcpu);
3153 if (!r) {
3154 if (cpu_has_feature(CPU_FTR_ARCH_300))
3155 kvmppc_setup_partition_table(kvm);
3156 kvm->arch.mmu_ready = 1;
3157 }
3158 }
3159 mutex_unlock(&kvm->lock);
3160 return r;
3161}
3162
3163static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3164{
3165 int n_ceded, i, r;
3166 struct kvmppc_vcore *vc;
3167 struct kvm_vcpu *v;
3168
3169 trace_kvmppc_run_vcpu_enter(vcpu);
3170
3171 kvm_run->exit_reason = 0;
3172 vcpu->arch.ret = RESUME_GUEST;
3173 vcpu->arch.trap = 0;
3174 kvmppc_update_vpas(vcpu);
3175
3176
3177
3178
3179 vc = vcpu->arch.vcore;
3180 spin_lock(&vc->lock);
3181 vcpu->arch.ceded = 0;
3182 vcpu->arch.run_task = current;
3183 vcpu->arch.kvm_run = kvm_run;
3184 vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
3185 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
3186 vcpu->arch.busy_preempt = TB_NIL;
3187 WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], vcpu);
3188 ++vc->n_runnable;
3189
3190
3191
3192
3193
3194
3195 if (!signal_pending(current)) {
3196 if ((vc->vcore_state == VCORE_PIGGYBACK ||
3197 vc->vcore_state == VCORE_RUNNING) &&
3198 !VCORE_IS_EXITING(vc)) {
3199 kvmppc_create_dtl_entry(vcpu, vc);
3200 kvmppc_start_thread(vcpu, vc);
3201 trace_kvm_guest_enter(vcpu);
3202 } else if (vc->vcore_state == VCORE_SLEEPING) {
3203 swake_up(&vc->wq);
3204 }
3205
3206 }
3207
3208 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
3209 !signal_pending(current)) {
3210
3211 if (!vcpu->kvm->arch.mmu_ready) {
3212 spin_unlock(&vc->lock);
3213 r = kvmhv_setup_mmu(vcpu);
3214 spin_lock(&vc->lock);
3215 if (r) {
3216 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
3217 kvm_run->fail_entry.
3218 hardware_entry_failure_reason = 0;
3219 vcpu->arch.ret = r;
3220 break;
3221 }
3222 }
3223
3224 if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
3225 kvmppc_vcore_end_preempt(vc);
3226
3227 if (vc->vcore_state != VCORE_INACTIVE) {
3228 kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE);
3229 continue;
3230 }
3231 for_each_runnable_thread(i, v, vc) {
3232 kvmppc_core_prepare_to_enter(v);
3233 if (signal_pending(v->arch.run_task)) {
3234 kvmppc_remove_runnable(vc, v);
3235 v->stat.signal_exits++;
3236 v->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
3237 v->arch.ret = -EINTR;
3238 wake_up(&v->arch.cpu_run);
3239 }
3240 }
3241 if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
3242 break;
3243 n_ceded = 0;
3244 for_each_runnable_thread(i, v, vc) {
3245 if (!kvmppc_vcpu_woken(v))
3246 n_ceded += v->arch.ceded;
3247 else
3248 v->arch.ceded = 0;
3249 }
3250 vc->runner = vcpu;
3251 if (n_ceded == vc->n_runnable) {
3252 kvmppc_vcore_blocked(vc);
3253 } else if (need_resched()) {
3254 kvmppc_vcore_preempt(vc);
3255
3256 cond_resched_lock(&vc->lock);
3257 if (vc->vcore_state == VCORE_PREEMPT)
3258 kvmppc_vcore_end_preempt(vc);
3259 } else {
3260 kvmppc_run_core(vc);
3261 }
3262 vc->runner = NULL;
3263 }
3264
3265 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
3266 (vc->vcore_state == VCORE_RUNNING ||
3267 vc->vcore_state == VCORE_EXITING ||
3268 vc->vcore_state == VCORE_PIGGYBACK))
3269 kvmppc_wait_for_exec(vc, vcpu, TASK_UNINTERRUPTIBLE);
3270
3271 if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
3272 kvmppc_vcore_end_preempt(vc);
3273
3274 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
3275 kvmppc_remove_runnable(vc, vcpu);
3276 vcpu->stat.signal_exits++;
3277 kvm_run->exit_reason = KVM_EXIT_INTR;
3278 vcpu->arch.ret = -EINTR;
3279 }
3280
3281 if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) {
3282
3283 i = -1;
3284 v = next_runnable_thread(vc, &i);
3285 wake_up(&v->arch.cpu_run);
3286 }
3287
3288 trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
3289 spin_unlock(&vc->lock);
3290 return vcpu->arch.ret;
3291}
3292
3293static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
3294{
3295 int r;
3296 int srcu_idx;
3297 unsigned long ebb_regs[3] = {};
3298 unsigned long user_tar = 0;
3299 unsigned int user_vrsave;
3300 struct kvm *kvm;
3301
3302 if (!vcpu->arch.sane) {
3303 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3304 return -EINVAL;
3305 }
3306
3307
3308
3309
3310
3311
3312
3313#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
3314 if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
3315 (current->thread.regs->msr & MSR_TM)) {
3316 if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
3317 run->exit_reason = KVM_EXIT_FAIL_ENTRY;
3318 run->fail_entry.hardware_entry_failure_reason = 0;
3319 return -EINVAL;
3320 }
3321
3322 mtmsr(mfmsr() | MSR_TM);
3323 current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
3324 current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
3325 current->thread.tm_texasr = mfspr(SPRN_TEXASR);
3326 current->thread.regs->msr &= ~MSR_TM;
3327 }
3328#endif
3329
3330 kvmppc_core_prepare_to_enter(vcpu);
3331
3332
3333 if (signal_pending(current)) {
3334 run->exit_reason = KVM_EXIT_INTR;
3335 return -EINTR;
3336 }
3337
3338 kvm = vcpu->kvm;
3339 atomic_inc(&kvm->arch.vcpus_running);
3340
3341 smp_mb();
3342
3343 flush_all_to_thread(current);
3344
3345
3346 if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
3347 ebb_regs[0] = mfspr(SPRN_EBBHR);
3348 ebb_regs[1] = mfspr(SPRN_EBBRR);
3349 ebb_regs[2] = mfspr(SPRN_BESCR);
3350 user_tar = mfspr(SPRN_TAR);
3351 }
3352 user_vrsave = mfspr(SPRN_VRSAVE);
3353
3354 vcpu->arch.wqp = &vcpu->arch.vcore->wq;
3355 vcpu->arch.pgdir = current->mm->pgd;
3356 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
3357
3358 do {
3359 r = kvmppc_run_vcpu(run, vcpu);
3360
3361 if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
3362 !(vcpu->arch.shregs.msr & MSR_PR)) {
3363 trace_kvm_hcall_enter(vcpu);
3364 r = kvmppc_pseries_do_hcall(vcpu);
3365 trace_kvm_hcall_exit(vcpu, r);
3366 kvmppc_core_prepare_to_enter(vcpu);
3367 } else if (r == RESUME_PAGE_FAULT) {
3368 srcu_idx = srcu_read_lock(&kvm->srcu);
3369 r = kvmppc_book3s_hv_page_fault(run, vcpu,
3370 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
3371 srcu_read_unlock(&kvm->srcu, srcu_idx);
3372 } else if (r == RESUME_PASSTHROUGH) {
3373 if (WARN_ON(xive_enabled()))
3374 r = H_SUCCESS;
3375 else
3376 r = kvmppc_xics_rm_complete(vcpu, 0);
3377 }
3378 } while (is_kvmppc_resume_guest(r));
3379
3380
3381 if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
3382 mtspr(SPRN_EBBHR, ebb_regs[0]);
3383 mtspr(SPRN_EBBRR, ebb_regs[1]);
3384 mtspr(SPRN_BESCR, ebb_regs[2]);
3385 mtspr(SPRN_TAR, user_tar);
3386 mtspr(SPRN_FSCR, current->thread.fscr);
3387 }
3388 mtspr(SPRN_VRSAVE, user_vrsave);
3389
3390 vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
3391 atomic_dec(&kvm->arch.vcpus_running);
3392 return r;
3393}
3394
3395static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
3396 int shift, int sllp)
3397{
3398 (*sps)->page_shift = shift;
3399 (*sps)->slb_enc = sllp;
3400 (*sps)->enc[0].page_shift = shift;
3401 (*sps)->enc[0].pte_enc = kvmppc_pgsize_lp_encoding(shift, shift);
3402
3403
3404
3405 if (shift != 24) {
3406 int penc = kvmppc_pgsize_lp_encoding(shift, 24);
3407 if (penc != -1) {
3408 (*sps)->enc[1].page_shift = 24;
3409 (*sps)->enc[1].pte_enc = penc;
3410 }
3411 }
3412 (*sps)++;
3413}
3414
3415static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
3416 struct kvm_ppc_smmu_info *info)
3417{
3418 struct kvm_ppc_one_seg_page_size *sps;
3419
3420
3421
3422
3423
3424
3425 info->data_keys = 32;
3426 info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0;
3427
3428
3429 info->flags = KVM_PPC_PAGE_SIZES_REAL | KVM_PPC_1T_SEGMENTS;
3430 info->slb_size = 32;
3431
3432
3433 sps = &info->sps[0];
3434 kvmppc_add_seg_page_size(&sps, 12, 0);
3435 kvmppc_add_seg_page_size(&sps, 16, SLB_VSID_L | SLB_VSID_LP_01);
3436 kvmppc_add_seg_page_size(&sps, 24, SLB_VSID_L);
3437
3438 return 0;
3439}
3440
3441
3442
3443
3444static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
3445 struct kvm_dirty_log *log)
3446{
3447 struct kvm_memslots *slots;
3448 struct kvm_memory_slot *memslot;
3449 int i, r;
3450 unsigned long n;
3451 unsigned long *buf, *p;
3452 struct kvm_vcpu *vcpu;
3453
3454 mutex_lock(&kvm->slots_lock);
3455
3456 r = -EINVAL;
3457 if (log->slot >= KVM_USER_MEM_SLOTS)
3458 goto out;
3459
3460 slots = kvm_memslots(kvm);
3461 memslot = id_to_memslot(slots, log->slot);
3462 r = -ENOENT;
3463 if (!memslot->dirty_bitmap)
3464 goto out;
3465
3466
3467
3468
3469
3470 n = kvm_dirty_bitmap_bytes(memslot);
3471 buf = memslot->dirty_bitmap + n / sizeof(long);
3472 memset(buf, 0, n);
3473
3474 if (kvm_is_radix(kvm))
3475 r = kvmppc_hv_get_dirty_log_radix(kvm, memslot, buf);
3476 else
3477 r = kvmppc_hv_get_dirty_log_hpt(kvm, memslot, buf);
3478 if (r)
3479 goto out;
3480
3481
3482
3483
3484
3485
3486
3487 p = memslot->dirty_bitmap;
3488 for (i = 0; i < n / sizeof(long); ++i)
3489 buf[i] |= xchg(&p[i], 0);
3490
3491
3492
3493 kvm_for_each_vcpu(i, vcpu, kvm) {
3494 spin_lock(&vcpu->arch.vpa_update_lock);
3495 kvmppc_harvest_vpa_dirty(&vcpu->arch.vpa, memslot, buf);
3496 kvmppc_harvest_vpa_dirty(&vcpu->arch.dtl, memslot, buf);
3497 spin_unlock(&vcpu->arch.vpa_update_lock);
3498 }
3499
3500 r = -EFAULT;
3501 if (copy_to_user(log->dirty_bitmap, buf, n))
3502 goto out;
3503
3504 r = 0;
3505out:
3506 mutex_unlock(&kvm->slots_lock);
3507 return r;
3508}
3509
3510static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
3511 struct kvm_memory_slot *dont)
3512{
3513 if (!dont || free->arch.rmap != dont->arch.rmap) {
3514 vfree(free->arch.rmap);
3515 free->arch.rmap = NULL;
3516 }
3517}
3518
3519static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
3520 unsigned long npages)
3521{
3522 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
3523 if (!slot->arch.rmap)
3524 return -ENOMEM;
3525
3526 return 0;
3527}
3528
3529static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
3530 struct kvm_memory_slot *memslot,
3531 const struct kvm_userspace_memory_region *mem)
3532{
3533 return 0;
3534}
3535
3536static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
3537 const struct kvm_userspace_memory_region *mem,
3538 const struct kvm_memory_slot *old,
3539 const struct kvm_memory_slot *new)
3540{
3541 unsigned long npages = mem->memory_size >> PAGE_SHIFT;
3542
3543
3544
3545
3546
3547
3548
3549 if (npages)
3550 atomic64_inc(&kvm->arch.mmio_update);
3551}
3552
3553
3554
3555
3556
3557void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
3558{
3559 long int i;
3560 u32 cores_done = 0;
3561
3562 if ((kvm->arch.lpcr & mask) == lpcr)
3563 return;
3564
3565 kvm->arch.lpcr = (kvm->arch.lpcr & ~mask) | lpcr;
3566
3567 for (i = 0; i < KVM_MAX_VCORES; ++i) {
3568 struct kvmppc_vcore *vc = kvm->arch.vcores[i];
3569 if (!vc)
3570 continue;
3571 spin_lock(&vc->lock);
3572 vc->lpcr = (vc->lpcr & ~mask) | lpcr;
3573 spin_unlock(&vc->lock);
3574 if (++cores_done >= kvm->arch.online_vcores)
3575 break;
3576 }
3577}
3578
3579static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu)
3580{
3581 return;
3582}
3583
3584void kvmppc_setup_partition_table(struct kvm *kvm)
3585{
3586 unsigned long dw0, dw1;
3587
3588 if (!kvm_is_radix(kvm)) {
3589
3590 dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) |
3591 ((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1);
3592
3593 dw0 |= kvm->arch.sdr1;
3594
3595
3596 dw1 = kvm->arch.process_table;
3597 } else {
3598 dw0 = PATB_HR | radix__get_tree_size() |
3599 __pa(kvm->arch.pgtable) | RADIX_PGD_INDEX_SIZE;
3600 dw1 = PATB_GR | kvm->arch.process_table;
3601 }
3602
3603 mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1);
3604}
3605
3606
3607
3608
3609
3610static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
3611{
3612 int err = 0;
3613 struct kvm *kvm = vcpu->kvm;
3614 unsigned long hva;
3615 struct kvm_memory_slot *memslot;
3616 struct vm_area_struct *vma;
3617 unsigned long lpcr = 0, senc;
3618 unsigned long psize, porder;
3619 int srcu_idx;
3620
3621
3622 if (!kvm->arch.hpt.virt) {
3623 int order = KVM_DEFAULT_HPT_ORDER;
3624 struct kvm_hpt_info info;
3625
3626 err = kvmppc_allocate_hpt(&info, order);
3627
3628
3629
3630 while ((err == -ENOMEM) && --order >= PPC_MIN_HPT_ORDER)
3631 err = kvmppc_allocate_hpt(&info, order);
3632
3633 if (err < 0) {
3634 pr_err("KVM: Couldn't alloc HPT\n");
3635 goto out;
3636 }
3637
3638 kvmppc_set_hpt(kvm, &info);
3639 }
3640
3641
3642 srcu_idx = srcu_read_lock(&kvm->srcu);
3643 memslot = gfn_to_memslot(kvm, 0);
3644
3645
3646 err = -EINVAL;
3647 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
3648 goto out_srcu;
3649
3650
3651 hva = memslot->userspace_addr;
3652 down_read(¤t->mm->mmap_sem);
3653 vma = find_vma(current->mm, hva);
3654 if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
3655 goto up_out;
3656
3657 psize = vma_kernel_pagesize(vma);
3658
3659 up_read(¤t->mm->mmap_sem);
3660
3661
3662 if (psize >= 0x1000000)
3663 psize = 0x1000000;
3664 else if (psize >= 0x10000)
3665 psize = 0x10000;
3666 else
3667 psize = 0x1000;
3668 porder = __ilog2(psize);
3669
3670 senc = slb_pgsize_encoding(psize);
3671 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
3672 (VRMA_VSID << SLB_VSID_SHIFT_1T);
3673
3674 kvmppc_map_vrma(vcpu, memslot, porder);
3675
3676
3677 if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
3678
3679 lpcr = senc << (LPCR_VRMASD_SH - 4);
3680 kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
3681 }
3682
3683
3684 smp_wmb();
3685 err = 0;
3686 out_srcu:
3687 srcu_read_unlock(&kvm->srcu, srcu_idx);
3688 out:
3689 return err;
3690
3691 up_out:
3692 up_read(¤t->mm->mmap_sem);
3693 goto out_srcu;
3694}
3695
3696
3697int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
3698{
3699 kvmppc_free_radix(kvm);
3700 kvmppc_update_lpcr(kvm, LPCR_VPM1,
3701 LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
3702 kvmppc_rmap_reset(kvm);
3703 kvm->arch.radix = 0;
3704 kvm->arch.process_table = 0;
3705 return 0;
3706}
3707
3708
3709int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
3710{
3711 int err;
3712
3713 err = kvmppc_init_vm_radix(kvm);
3714 if (err)
3715 return err;
3716
3717 kvmppc_free_hpt(&kvm->arch.hpt);
3718 kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR,
3719 LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
3720 kvm->arch.radix = 1;
3721 return 0;
3722}
3723
3724#ifdef CONFIG_KVM_XICS
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735void kvmppc_alloc_host_rm_ops(void)
3736{
3737 struct kvmppc_host_rm_ops *ops;
3738 unsigned long l_ops;
3739 int cpu, core;
3740 int size;
3741
3742
3743 if (kvmppc_host_rm_ops_hv != NULL)
3744 return;
3745
3746 ops = kzalloc(sizeof(struct kvmppc_host_rm_ops), GFP_KERNEL);
3747 if (!ops)
3748 return;
3749
3750 size = cpu_nr_cores() * sizeof(struct kvmppc_host_rm_core);
3751 ops->rm_core = kzalloc(size, GFP_KERNEL);
3752
3753 if (!ops->rm_core) {
3754 kfree(ops);
3755 return;
3756 }
3757
3758 cpus_read_lock();
3759
3760 for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
3761 if (!cpu_online(cpu))
3762 continue;
3763
3764 core = cpu >> threads_shift;
3765 ops->rm_core[core].rm_state.in_host = 1;
3766 }
3767
3768 ops->vcpu_kick = kvmppc_fast_vcpu_kick_hv;
3769
3770
3771
3772
3773
3774
3775
3776 smp_wmb();
3777 l_ops = (unsigned long) ops;
3778
3779 if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
3780 cpus_read_unlock();
3781 kfree(ops->rm_core);
3782 kfree(ops);
3783 return;
3784 }
3785
3786 cpuhp_setup_state_nocalls_cpuslocked(CPUHP_KVM_PPC_BOOK3S_PREPARE,
3787 "ppc/kvm_book3s:prepare",
3788 kvmppc_set_host_core,
3789 kvmppc_clear_host_core);
3790 cpus_read_unlock();
3791}
3792
3793void kvmppc_free_host_rm_ops(void)
3794{
3795 if (kvmppc_host_rm_ops_hv) {
3796 cpuhp_remove_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE);
3797 kfree(kvmppc_host_rm_ops_hv->rm_core);
3798 kfree(kvmppc_host_rm_ops_hv);
3799 kvmppc_host_rm_ops_hv = NULL;
3800 }
3801}
3802#endif
3803
3804static int kvmppc_core_init_vm_hv(struct kvm *kvm)
3805{
3806 unsigned long lpcr, lpid;
3807 char buf[32];
3808 int ret;
3809
3810
3811
3812 lpid = kvmppc_alloc_lpid();
3813 if ((long)lpid < 0)
3814 return -ENOMEM;
3815 kvm->arch.lpid = lpid;
3816
3817 kvmppc_alloc_host_rm_ops();
3818
3819
3820
3821
3822
3823
3824
3825
3826 if (!cpu_has_feature(CPU_FTR_ARCH_300))
3827 cpumask_setall(&kvm->arch.need_tlb_flush);
3828
3829
3830 memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
3831 sizeof(kvm->arch.enabled_hcalls));
3832
3833 if (!cpu_has_feature(CPU_FTR_ARCH_300))
3834 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
3835
3836
3837 kvm->arch.host_lpid = mfspr(SPRN_LPID);
3838 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
3839 lpcr &= LPCR_PECE | LPCR_LPES;
3840 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
3841 LPCR_VPM0 | LPCR_VPM1;
3842 kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
3843 (VRMA_VSID << SLB_VSID_SHIFT_1T);
3844
3845 if (cpu_has_feature(CPU_FTR_ARCH_207S))
3846 lpcr |= LPCR_ONL;
3847
3848
3849
3850
3851
3852
3853
3854 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
3855 lpcr &= ~LPCR_VPM0;
3856 lpcr |= LPCR_HVICE | LPCR_HEIC;
3857
3858
3859
3860
3861
3862 if (xive_enabled())
3863 lpcr |= LPCR_LPES;
3864 }
3865
3866
3867
3868
3869 if (radix_enabled()) {
3870 kvm->arch.radix = 1;
3871 kvm->arch.mmu_ready = 1;
3872 lpcr &= ~LPCR_VPM1;
3873 lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR;
3874 ret = kvmppc_init_vm_radix(kvm);
3875 if (ret) {
3876 kvmppc_free_lpid(kvm->arch.lpid);
3877 return ret;
3878 }
3879 kvmppc_setup_partition_table(kvm);
3880 }
3881
3882 kvm->arch.lpcr = lpcr;
3883
3884
3885 kvm->arch.resize_hpt = NULL;
3886
3887
3888
3889
3890
3891 if (radix_enabled())
3892 kvm->arch.tlb_sets = POWER9_TLB_SETS_RADIX;
3893 else if (cpu_has_feature(CPU_FTR_ARCH_300))
3894 kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH;
3895 else if (cpu_has_feature(CPU_FTR_ARCH_207S))
3896 kvm->arch.tlb_sets = POWER8_TLB_SETS;
3897 else
3898 kvm->arch.tlb_sets = POWER7_TLB_SETS;
3899
3900
3901
3902
3903
3904
3905
3906 if (cpu_has_feature(CPU_FTR_ARCH_300))
3907 kvm->arch.threads_indep = indep_threads_mode;
3908 if (!kvm->arch.threads_indep)
3909 kvm_hv_vm_activated();
3910
3911
3912
3913
3914
3915
3916
3917
3918 if (!cpu_has_feature(CPU_FTR_ARCH_300))
3919 kvm->arch.smt_mode = threads_per_subcore;
3920 else
3921 kvm->arch.smt_mode = 1;
3922 kvm->arch.emul_smt_mode = 1;
3923
3924
3925
3926
3927 snprintf(buf, sizeof(buf), "vm%d", current->pid);
3928 kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir);
3929 if (!IS_ERR_OR_NULL(kvm->arch.debugfs_dir))
3930 kvmppc_mmu_debugfs_init(kvm);
3931
3932 return 0;
3933}
3934
3935static void kvmppc_free_vcores(struct kvm *kvm)
3936{
3937 long int i;
3938
3939 for (i = 0; i < KVM_MAX_VCORES; ++i)
3940 kfree(kvm->arch.vcores[i]);
3941 kvm->arch.online_vcores = 0;
3942}
3943
3944static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
3945{
3946 debugfs_remove_recursive(kvm->arch.debugfs_dir);
3947
3948 if (!kvm->arch.threads_indep)
3949 kvm_hv_vm_deactivated();
3950
3951 kvmppc_free_vcores(kvm);
3952
3953 kvmppc_free_lpid(kvm->arch.lpid);
3954
3955 if (kvm_is_radix(kvm))
3956 kvmppc_free_radix(kvm);
3957 else
3958 kvmppc_free_hpt(&kvm->arch.hpt);
3959
3960 kvmppc_free_pimap(kvm);
3961}
3962
3963
3964static int kvmppc_core_emulate_op_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
3965 unsigned int inst, int *advance)
3966{
3967 return EMULATE_FAIL;
3968}
3969
3970static int kvmppc_core_emulate_mtspr_hv(struct kvm_vcpu *vcpu, int sprn,
3971 ulong spr_val)
3972{
3973 return EMULATE_FAIL;
3974}
3975
3976static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
3977 ulong *spr_val)
3978{
3979 return EMULATE_FAIL;
3980}
3981
3982static int kvmppc_core_check_processor_compat_hv(void)
3983{
3984 if (!cpu_has_feature(CPU_FTR_HVMODE) ||
3985 !cpu_has_feature(CPU_FTR_ARCH_206))
3986 return -EIO;
3987
3988 return 0;
3989}
3990
3991#ifdef CONFIG_KVM_XICS
3992
3993void kvmppc_free_pimap(struct kvm *kvm)
3994{
3995 kfree(kvm->arch.pimap);
3996}
3997
3998static struct kvmppc_passthru_irqmap *kvmppc_alloc_pimap(void)
3999{
4000 return kzalloc(sizeof(struct kvmppc_passthru_irqmap), GFP_KERNEL);
4001}
4002
4003static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
4004{
4005 struct irq_desc *desc;
4006 struct kvmppc_irq_map *irq_map;
4007 struct kvmppc_passthru_irqmap *pimap;
4008 struct irq_chip *chip;
4009 int i, rc = 0;
4010
4011 if (!kvm_irq_bypass)
4012 return 1;
4013
4014 desc = irq_to_desc(host_irq);
4015 if (!desc)
4016 return -EIO;
4017
4018 mutex_lock(&kvm->lock);
4019
4020 pimap = kvm->arch.pimap;
4021 if (pimap == NULL) {
4022
4023 pimap = kvmppc_alloc_pimap();
4024 if (pimap == NULL) {
4025 mutex_unlock(&kvm->lock);
4026 return -ENOMEM;
4027 }
4028 kvm->arch.pimap = pimap;
4029 }
4030
4031
4032
4033
4034
4035
4036 chip = irq_data_get_irq_chip(&desc->irq_data);
4037 if (!chip || !(is_pnv_opal_msi(chip) || is_xive_irq(chip))) {
4038 pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
4039 host_irq, guest_gsi);
4040 mutex_unlock(&kvm->lock);
4041 return -ENOENT;
4042 }
4043
4044
4045
4046
4047
4048
4049 for (i = 0; i < pimap->n_mapped; i++) {
4050 if (guest_gsi == pimap->mapped[i].v_hwirq) {
4051 if (pimap->mapped[i].r_hwirq) {
4052 mutex_unlock(&kvm->lock);
4053 return -EINVAL;
4054 }
4055 break;
4056 }
4057 }
4058
4059 if (i == KVMPPC_PIRQ_MAPPED) {
4060 mutex_unlock(&kvm->lock);
4061 return -EAGAIN;
4062 }
4063
4064 irq_map = &pimap->mapped[i];
4065
4066 irq_map->v_hwirq = guest_gsi;
4067 irq_map->desc = desc;
4068
4069
4070
4071
4072
4073 smp_wmb();
4074 irq_map->r_hwirq = desc->irq_data.hwirq;
4075
4076 if (i == pimap->n_mapped)
4077 pimap->n_mapped++;
4078
4079 if (xive_enabled())
4080 rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc);
4081 else
4082 kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
4083 if (rc)
4084 irq_map->r_hwirq = 0;
4085
4086 mutex_unlock(&kvm->lock);
4087
4088 return 0;
4089}
4090
4091static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
4092{
4093 struct irq_desc *desc;
4094 struct kvmppc_passthru_irqmap *pimap;
4095 int i, rc = 0;
4096
4097 if (!kvm_irq_bypass)
4098 return 0;
4099
4100 desc = irq_to_desc(host_irq);
4101 if (!desc)
4102 return -EIO;
4103
4104 mutex_lock(&kvm->lock);
4105 if (!kvm->arch.pimap)
4106 goto unlock;
4107
4108 pimap = kvm->arch.pimap;
4109
4110 for (i = 0; i < pimap->n_mapped; i++) {
4111 if (guest_gsi == pimap->mapped[i].v_hwirq)
4112 break;
4113 }
4114
4115 if (i == pimap->n_mapped) {
4116 mutex_unlock(&kvm->lock);
4117 return -ENODEV;
4118 }
4119
4120 if (xive_enabled())
4121 rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc);
4122 else
4123 kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
4124
4125
4126 pimap->mapped[i].r_hwirq = 0;
4127
4128
4129
4130
4131
4132 unlock:
4133 mutex_unlock(&kvm->lock);
4134 return rc;
4135}
4136
4137static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons,
4138 struct irq_bypass_producer *prod)
4139{
4140 int ret = 0;
4141 struct kvm_kernel_irqfd *irqfd =
4142 container_of(cons, struct kvm_kernel_irqfd, consumer);
4143
4144 irqfd->producer = prod;
4145
4146 ret = kvmppc_set_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
4147 if (ret)
4148 pr_info("kvmppc_set_passthru_irq (irq %d, gsi %d) fails: %d\n",
4149 prod->irq, irqfd->gsi, ret);
4150
4151 return ret;
4152}
4153
4154static void kvmppc_irq_bypass_del_producer_hv(struct irq_bypass_consumer *cons,
4155 struct irq_bypass_producer *prod)
4156{
4157 int ret;
4158 struct kvm_kernel_irqfd *irqfd =
4159 container_of(cons, struct kvm_kernel_irqfd, consumer);
4160
4161 irqfd->producer = NULL;
4162
4163
4164
4165
4166
4167
4168 ret = kvmppc_clr_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
4169 if (ret)
4170 pr_warn("kvmppc_clr_passthru_irq (irq %d, gsi %d) fails: %d\n",
4171 prod->irq, irqfd->gsi, ret);
4172}
4173#endif
4174
4175static long kvm_arch_vm_ioctl_hv(struct file *filp,
4176 unsigned int ioctl, unsigned long arg)
4177{
4178 struct kvm *kvm __maybe_unused = filp->private_data;
4179 void __user *argp = (void __user *)arg;
4180 long r;
4181
4182 switch (ioctl) {
4183
4184 case KVM_PPC_ALLOCATE_HTAB: {
4185 u32 htab_order;
4186
4187 r = -EFAULT;
4188 if (get_user(htab_order, (u32 __user *)argp))
4189 break;
4190 r = kvmppc_alloc_reset_hpt(kvm, htab_order);
4191 if (r)
4192 break;
4193 r = 0;
4194 break;
4195 }
4196
4197 case KVM_PPC_GET_HTAB_FD: {
4198 struct kvm_get_htab_fd ghf;
4199
4200 r = -EFAULT;
4201 if (copy_from_user(&ghf, argp, sizeof(ghf)))
4202 break;
4203 r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf);
4204 break;
4205 }
4206
4207 case KVM_PPC_RESIZE_HPT_PREPARE: {
4208 struct kvm_ppc_resize_hpt rhpt;
4209
4210 r = -EFAULT;
4211 if (copy_from_user(&rhpt, argp, sizeof(rhpt)))
4212 break;
4213
4214 r = kvm_vm_ioctl_resize_hpt_prepare(kvm, &rhpt);
4215 break;
4216 }
4217
4218 case KVM_PPC_RESIZE_HPT_COMMIT: {
4219 struct kvm_ppc_resize_hpt rhpt;
4220
4221 r = -EFAULT;
4222 if (copy_from_user(&rhpt, argp, sizeof(rhpt)))
4223 break;
4224
4225 r = kvm_vm_ioctl_resize_hpt_commit(kvm, &rhpt);
4226 break;
4227 }
4228
4229 default:
4230 r = -ENOTTY;
4231 }
4232
4233 return r;
4234}
4235
4236
4237
4238
4239
4240
4241
4242static unsigned int default_hcall_list[] = {
4243 H_REMOVE,
4244 H_ENTER,
4245 H_READ,
4246 H_PROTECT,
4247 H_BULK_REMOVE,
4248 H_GET_TCE,
4249 H_PUT_TCE,
4250 H_SET_DABR,
4251 H_SET_XDABR,
4252 H_CEDE,
4253 H_PROD,
4254 H_CONFER,
4255 H_REGISTER_VPA,
4256#ifdef CONFIG_KVM_XICS
4257 H_EOI,
4258 H_CPPR,
4259 H_IPI,
4260 H_IPOLL,
4261 H_XIRR,
4262 H_XIRR_X,
4263#endif
4264 0
4265};
4266
4267static void init_default_hcalls(void)
4268{
4269 int i;
4270 unsigned int hcall;
4271
4272 for (i = 0; default_hcall_list[i]; ++i) {
4273 hcall = default_hcall_list[i];
4274 WARN_ON(!kvmppc_hcall_impl_hv(hcall));
4275 __set_bit(hcall / 4, default_enabled_hcalls);
4276 }
4277}
4278
4279static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
4280{
4281 unsigned long lpcr;
4282 int radix;
4283 int err;
4284
4285
4286 if (!cpu_has_feature(CPU_FTR_ARCH_300))
4287 return -ENODEV;
4288
4289
4290 if (cfg->flags & ~(KVM_PPC_MMUV3_RADIX | KVM_PPC_MMUV3_GTSE))
4291 return -EINVAL;
4292
4293
4294 radix = !!(cfg->flags & KVM_PPC_MMUV3_RADIX);
4295 if (!!(cfg->process_table & PATB_GR) != radix)
4296 return -EINVAL;
4297
4298
4299 if ((cfg->process_table & PRTS_MASK) > 24)
4300 return -EINVAL;
4301
4302
4303 if (radix && !radix_enabled())
4304 return -EINVAL;
4305
4306 mutex_lock(&kvm->lock);
4307 if (radix != kvm_is_radix(kvm)) {
4308 if (kvm->arch.mmu_ready) {
4309 kvm->arch.mmu_ready = 0;
4310
4311 smp_mb();
4312 if (atomic_read(&kvm->arch.vcpus_running)) {
4313 kvm->arch.mmu_ready = 1;
4314 err = -EBUSY;
4315 goto out_unlock;
4316 }
4317 }
4318 if (radix)
4319 err = kvmppc_switch_mmu_to_radix(kvm);
4320 else
4321 err = kvmppc_switch_mmu_to_hpt(kvm);
4322 if (err)
4323 goto out_unlock;
4324 }
4325
4326 kvm->arch.process_table = cfg->process_table;
4327 kvmppc_setup_partition_table(kvm);
4328
4329 lpcr = (cfg->flags & KVM_PPC_MMUV3_GTSE) ? LPCR_GTSE : 0;
4330 kvmppc_update_lpcr(kvm, lpcr, LPCR_GTSE);
4331 err = 0;
4332
4333 out_unlock:
4334 mutex_unlock(&kvm->lock);
4335 return err;
4336}
4337
4338static struct kvmppc_ops kvm_ops_hv = {
4339 .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
4340 .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
4341 .get_one_reg = kvmppc_get_one_reg_hv,
4342 .set_one_reg = kvmppc_set_one_reg_hv,
4343 .vcpu_load = kvmppc_core_vcpu_load_hv,
4344 .vcpu_put = kvmppc_core_vcpu_put_hv,
4345 .set_msr = kvmppc_set_msr_hv,
4346 .vcpu_run = kvmppc_vcpu_run_hv,
4347 .vcpu_create = kvmppc_core_vcpu_create_hv,
4348 .vcpu_free = kvmppc_core_vcpu_free_hv,
4349 .check_requests = kvmppc_core_check_requests_hv,
4350 .get_dirty_log = kvm_vm_ioctl_get_dirty_log_hv,
4351 .flush_memslot = kvmppc_core_flush_memslot_hv,
4352 .prepare_memory_region = kvmppc_core_prepare_memory_region_hv,
4353 .commit_memory_region = kvmppc_core_commit_memory_region_hv,
4354 .unmap_hva = kvm_unmap_hva_hv,
4355 .unmap_hva_range = kvm_unmap_hva_range_hv,
4356 .age_hva = kvm_age_hva_hv,
4357 .test_age_hva = kvm_test_age_hva_hv,
4358 .set_spte_hva = kvm_set_spte_hva_hv,
4359 .mmu_destroy = kvmppc_mmu_destroy_hv,
4360 .free_memslot = kvmppc_core_free_memslot_hv,
4361 .create_memslot = kvmppc_core_create_memslot_hv,
4362 .init_vm = kvmppc_core_init_vm_hv,
4363 .destroy_vm = kvmppc_core_destroy_vm_hv,
4364 .get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv,
4365 .emulate_op = kvmppc_core_emulate_op_hv,
4366 .emulate_mtspr = kvmppc_core_emulate_mtspr_hv,
4367 .emulate_mfspr = kvmppc_core_emulate_mfspr_hv,
4368 .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
4369 .arch_vm_ioctl = kvm_arch_vm_ioctl_hv,
4370 .hcall_implemented = kvmppc_hcall_impl_hv,
4371#ifdef CONFIG_KVM_XICS
4372 .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv,
4373 .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv,
4374#endif
4375 .configure_mmu = kvmhv_configure_mmu,
4376 .get_rmmu_info = kvmhv_get_rmmu_info,
4377 .set_smt_mode = kvmhv_set_smt_mode,
4378};
4379
4380static int kvm_init_subcore_bitmap(void)
4381{
4382 int i, j;
4383 int nr_cores = cpu_nr_cores();
4384 struct sibling_subcore_state *sibling_subcore_state;
4385
4386 for (i = 0; i < nr_cores; i++) {
4387 int first_cpu = i * threads_per_core;
4388 int node = cpu_to_node(first_cpu);
4389
4390
4391 if (paca[first_cpu].sibling_subcore_state)
4392 continue;
4393
4394 sibling_subcore_state =
4395 kmalloc_node(sizeof(struct sibling_subcore_state),
4396 GFP_KERNEL, node);
4397 if (!sibling_subcore_state)
4398 return -ENOMEM;
4399
4400 memset(sibling_subcore_state, 0,
4401 sizeof(struct sibling_subcore_state));
4402
4403 for (j = 0; j < threads_per_core; j++) {
4404 int cpu = first_cpu + j;
4405
4406 paca[cpu].sibling_subcore_state = sibling_subcore_state;
4407 }
4408 }
4409 return 0;
4410}
4411
4412static int kvmppc_radix_possible(void)
4413{
4414 return cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled();
4415}
4416
4417static int kvmppc_book3s_init_hv(void)
4418{
4419 int r;
4420
4421
4422
4423 r = kvmppc_core_check_processor_compat_hv();
4424 if (r < 0)
4425 return -ENODEV;
4426
4427 r = kvm_init_subcore_bitmap();
4428 if (r)
4429 return r;
4430
4431
4432
4433
4434
4435
4436#ifdef CONFIG_SMP
4437 if (!xive_enabled() && !local_paca->kvm_hstate.xics_phys) {
4438 struct device_node *np;
4439
4440 np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");
4441 if (!np) {
4442 pr_err("KVM-HV: Cannot determine method for accessing XICS\n");
4443 return -ENODEV;
4444 }
4445 }
4446#endif
4447
4448 kvm_ops_hv.owner = THIS_MODULE;
4449 kvmppc_hv_ops = &kvm_ops_hv;
4450
4451 init_default_hcalls();
4452
4453 init_vcore_lists();
4454
4455 r = kvmppc_mmu_hv_init();
4456 if (r)
4457 return r;
4458
4459 if (kvmppc_radix_possible())
4460 r = kvmppc_radix_init();
4461
4462
4463
4464
4465
4466 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
4467 unsigned int pvr = mfspr(SPRN_PVR);
4468 if ((pvr >> 16) == PVR_POWER9 &&
4469 (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
4470 ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
4471 no_mixing_hpt_and_radix = true;
4472 }
4473
4474 return r;
4475}
4476
4477static void kvmppc_book3s_exit_hv(void)
4478{
4479 kvmppc_free_host_rm_ops();
4480 if (kvmppc_radix_possible())
4481 kvmppc_radix_exit();
4482 kvmppc_hv_ops = NULL;
4483}
4484
4485module_init(kvmppc_book3s_init_hv);
4486module_exit(kvmppc_book3s_exit_hv);
4487MODULE_LICENSE("GPL");
4488MODULE_ALIAS_MISCDEV(KVM_MINOR);
4489MODULE_ALIAS("devname:kvm");
4490