1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kvm_host.h>
22#include <linux/err.h>
23#include <linux/slab.h>
24#include <linux/preempt.h>
25#include <linux/sched.h>
26#include <linux/delay.h>
27#include <linux/export.h>
28#include <linux/fs.h>
29#include <linux/anon_inodes.h>
30#include <linux/cpumask.h>
31#include <linux/spinlock.h>
32#include <linux/page-flags.h>
33#include <linux/srcu.h>
34#include <linux/miscdevice.h>
35#include <linux/debugfs.h>
36
37#include <asm/reg.h>
38#include <asm/cputable.h>
39#include <asm/cacheflush.h>
40#include <asm/tlbflush.h>
41#include <asm/uaccess.h>
42#include <asm/io.h>
43#include <asm/kvm_ppc.h>
44#include <asm/kvm_book3s.h>
45#include <asm/mmu_context.h>
46#include <asm/lppaca.h>
47#include <asm/processor.h>
48#include <asm/cputhreads.h>
49#include <asm/page.h>
50#include <asm/hvcall.h>
51#include <asm/switch_to.h>
52#include <asm/smp.h>
53#include <asm/dbell.h>
54#include <linux/gfp.h>
55#include <linux/vmalloc.h>
56#include <linux/highmem.h>
57#include <linux/hugetlb.h>
58#include <linux/module.h>
59
60#include "book3s.h"
61
62#define CREATE_TRACE_POINTS
63#include "trace_hv.h"
64
65
66
67
68
69
70#define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1)
71
72
73#define TB_NIL (~(u64)0)
74
75static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
76
77static int dynamic_mt_modes = 6;
78module_param(dynamic_mt_modes, int, S_IRUGO | S_IWUSR);
79MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)");
80static int target_smt_mode;
81module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
82MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
83
84#ifdef CONFIG_KVM_XICS
85static struct kernel_param_ops module_param_ops = {
86 .set = param_set_int,
87 .get = param_get_int,
88};
89
90module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
91 S_IRUGO | S_IWUSR);
92MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
93#endif
94
95static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
96static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
97
98static bool kvmppc_ipi_thread(int cpu)
99{
100
101 if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
102 preempt_disable();
103 if (cpu_first_thread_sibling(cpu) ==
104 cpu_first_thread_sibling(smp_processor_id())) {
105 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
106 msg |= cpu_thread_in_core(cpu);
107 smp_mb();
108 __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
109 preempt_enable();
110 return true;
111 }
112 preempt_enable();
113 }
114
115#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
116 if (cpu >= 0 && cpu < nr_cpu_ids && paca[cpu].kvm_hstate.xics_phys) {
117 xics_wake_cpu(cpu);
118 return true;
119 }
120#endif
121
122 return false;
123}
124
125static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
126{
127 int cpu;
128 struct swait_queue_head *wqp;
129
130 wqp = kvm_arch_vcpu_wq(vcpu);
131 if (swait_active(wqp)) {
132 swake_up(wqp);
133 ++vcpu->stat.halt_wakeup;
134 }
135
136 if (kvmppc_ipi_thread(vcpu->arch.thread_cpu))
137 return;
138
139
140 cpu = vcpu->cpu;
141 if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu))
142 smp_send_reschedule(cpu);
143}
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc)
179{
180 unsigned long flags;
181
182 spin_lock_irqsave(&vc->stoltb_lock, flags);
183 vc->preempt_tb = mftb();
184 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
185}
186
187static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc)
188{
189 unsigned long flags;
190
191 spin_lock_irqsave(&vc->stoltb_lock, flags);
192 if (vc->preempt_tb != TB_NIL) {
193 vc->stolen_tb += mftb() - vc->preempt_tb;
194 vc->preempt_tb = TB_NIL;
195 }
196 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
197}
198
199static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
200{
201 struct kvmppc_vcore *vc = vcpu->arch.vcore;
202 unsigned long flags;
203
204
205
206
207
208
209
210 if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
211 kvmppc_core_end_stolen(vc);
212
213 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
214 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
215 vcpu->arch.busy_preempt != TB_NIL) {
216 vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
217 vcpu->arch.busy_preempt = TB_NIL;
218 }
219 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
220}
221
222static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
223{
224 struct kvmppc_vcore *vc = vcpu->arch.vcore;
225 unsigned long flags;
226
227 if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
228 kvmppc_core_start_stolen(vc);
229
230 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
231 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
232 vcpu->arch.busy_preempt = mftb();
233 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
234}
235
236static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
237{
238
239
240
241
242 if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
243 msr &= ~MSR_TS_MASK;
244 vcpu->arch.shregs.msr = msr;
245 kvmppc_end_cede(vcpu);
246}
247
248static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
249{
250 vcpu->arch.pvr = pvr;
251}
252
253static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
254{
255 unsigned long pcr = 0;
256 struct kvmppc_vcore *vc = vcpu->arch.vcore;
257
258 if (arch_compat) {
259 switch (arch_compat) {
260 case PVR_ARCH_205:
261
262
263
264
265 pcr = PCR_ARCH_206 | PCR_ARCH_205;
266 break;
267 case PVR_ARCH_206:
268 case PVR_ARCH_206p:
269 pcr = PCR_ARCH_206;
270 break;
271 case PVR_ARCH_207:
272 break;
273 default:
274 return -EINVAL;
275 }
276
277 if (!cpu_has_feature(CPU_FTR_ARCH_207S)) {
278
279 if (!(pcr & PCR_ARCH_206))
280 return -EINVAL;
281 pcr &= ~PCR_ARCH_206;
282 }
283 }
284
285 spin_lock(&vc->lock);
286 vc->arch_compat = arch_compat;
287 vc->pcr = pcr;
288 spin_unlock(&vc->lock);
289
290 return 0;
291}
292
293static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
294{
295 int r;
296
297 pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id);
298 pr_err("pc = %.16lx msr = %.16llx trap = %x\n",
299 vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap);
300 for (r = 0; r < 16; ++r)
301 pr_err("r%2d = %.16lx r%d = %.16lx\n",
302 r, kvmppc_get_gpr(vcpu, r),
303 r+16, kvmppc_get_gpr(vcpu, r+16));
304 pr_err("ctr = %.16lx lr = %.16lx\n",
305 vcpu->arch.ctr, vcpu->arch.lr);
306 pr_err("srr0 = %.16llx srr1 = %.16llx\n",
307 vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1);
308 pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
309 vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
310 pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
311 vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
312 pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n",
313 vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr);
314 pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
315 pr_err("fault dar = %.16lx dsisr = %.8x\n",
316 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
317 pr_err("SLB (%d entries):\n", vcpu->arch.slb_max);
318 for (r = 0; r < vcpu->arch.slb_max; ++r)
319 pr_err(" ESID = %.16llx VSID = %.16llx\n",
320 vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
321 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
322 vcpu->arch.vcore->lpcr, vcpu->kvm->arch.sdr1,
323 vcpu->arch.last_inst);
324}
325
326static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
327{
328 struct kvm_vcpu *ret;
329
330 mutex_lock(&kvm->lock);
331 ret = kvm_get_vcpu_by_id(kvm, id);
332 mutex_unlock(&kvm->lock);
333 return ret;
334}
335
336static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
337{
338 vpa->__old_status |= LPPACA_OLD_SHARED_PROC;
339 vpa->yield_count = cpu_to_be32(1);
340}
341
342static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v,
343 unsigned long addr, unsigned long len)
344{
345
346 if (addr & (L1_CACHE_BYTES - 1))
347 return -EINVAL;
348 spin_lock(&vcpu->arch.vpa_update_lock);
349 if (v->next_gpa != addr || v->len != len) {
350 v->next_gpa = addr;
351 v->len = addr ? len : 0;
352 v->update_pending = 1;
353 }
354 spin_unlock(&vcpu->arch.vpa_update_lock);
355 return 0;
356}
357
358
359struct reg_vpa {
360 u32 dummy;
361 union {
362 __be16 hword;
363 __be32 word;
364 } length;
365};
366
367static int vpa_is_registered(struct kvmppc_vpa *vpap)
368{
369 if (vpap->update_pending)
370 return vpap->next_gpa != 0;
371 return vpap->pinned_addr != NULL;
372}
373
374static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
375 unsigned long flags,
376 unsigned long vcpuid, unsigned long vpa)
377{
378 struct kvm *kvm = vcpu->kvm;
379 unsigned long len, nb;
380 void *va;
381 struct kvm_vcpu *tvcpu;
382 int err;
383 int subfunc;
384 struct kvmppc_vpa *vpap;
385
386 tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
387 if (!tvcpu)
388 return H_PARAMETER;
389
390 subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK;
391 if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL ||
392 subfunc == H_VPA_REG_SLB) {
393
394 if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa)
395 return H_PARAMETER;
396
397
398 va = kvmppc_pin_guest_page(kvm, vpa, &nb);
399 if (va == NULL)
400 return H_PARAMETER;
401 if (subfunc == H_VPA_REG_VPA)
402 len = be16_to_cpu(((struct reg_vpa *)va)->length.hword);
403 else
404 len = be32_to_cpu(((struct reg_vpa *)va)->length.word);
405 kvmppc_unpin_guest_page(kvm, va, vpa, false);
406
407
408 if (len > nb || len < sizeof(struct reg_vpa))
409 return H_PARAMETER;
410 } else {
411 vpa = 0;
412 len = 0;
413 }
414
415 err = H_PARAMETER;
416 vpap = NULL;
417 spin_lock(&tvcpu->arch.vpa_update_lock);
418
419 switch (subfunc) {
420 case H_VPA_REG_VPA:
421 if (len < sizeof(struct lppaca))
422 break;
423 vpap = &tvcpu->arch.vpa;
424 err = 0;
425 break;
426
427 case H_VPA_REG_DTL:
428 if (len < sizeof(struct dtl_entry))
429 break;
430 len -= len % sizeof(struct dtl_entry);
431
432
433 err = H_RESOURCE;
434 if (!vpa_is_registered(&tvcpu->arch.vpa))
435 break;
436
437 vpap = &tvcpu->arch.dtl;
438 err = 0;
439 break;
440
441 case H_VPA_REG_SLB:
442
443 err = H_RESOURCE;
444 if (!vpa_is_registered(&tvcpu->arch.vpa))
445 break;
446
447 vpap = &tvcpu->arch.slb_shadow;
448 err = 0;
449 break;
450
451 case H_VPA_DEREG_VPA:
452
453 err = H_RESOURCE;
454 if (vpa_is_registered(&tvcpu->arch.dtl) ||
455 vpa_is_registered(&tvcpu->arch.slb_shadow))
456 break;
457
458 vpap = &tvcpu->arch.vpa;
459 err = 0;
460 break;
461
462 case H_VPA_DEREG_DTL:
463 vpap = &tvcpu->arch.dtl;
464 err = 0;
465 break;
466
467 case H_VPA_DEREG_SLB:
468 vpap = &tvcpu->arch.slb_shadow;
469 err = 0;
470 break;
471 }
472
473 if (vpap) {
474 vpap->next_gpa = vpa;
475 vpap->len = len;
476 vpap->update_pending = 1;
477 }
478
479 spin_unlock(&tvcpu->arch.vpa_update_lock);
480
481 return err;
482}
483
484static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
485{
486 struct kvm *kvm = vcpu->kvm;
487 void *va;
488 unsigned long nb;
489 unsigned long gpa;
490
491
492
493
494
495
496
497
498
499 for (;;) {
500 gpa = vpap->next_gpa;
501 spin_unlock(&vcpu->arch.vpa_update_lock);
502 va = NULL;
503 nb = 0;
504 if (gpa)
505 va = kvmppc_pin_guest_page(kvm, gpa, &nb);
506 spin_lock(&vcpu->arch.vpa_update_lock);
507 if (gpa == vpap->next_gpa)
508 break;
509
510 if (va)
511 kvmppc_unpin_guest_page(kvm, va, gpa, false);
512 }
513
514 vpap->update_pending = 0;
515 if (va && nb < vpap->len) {
516
517
518
519
520
521 kvmppc_unpin_guest_page(kvm, va, gpa, false);
522 va = NULL;
523 }
524 if (vpap->pinned_addr)
525 kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa,
526 vpap->dirty);
527 vpap->gpa = gpa;
528 vpap->pinned_addr = va;
529 vpap->dirty = false;
530 if (va)
531 vpap->pinned_end = va + vpap->len;
532}
533
534static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
535{
536 if (!(vcpu->arch.vpa.update_pending ||
537 vcpu->arch.slb_shadow.update_pending ||
538 vcpu->arch.dtl.update_pending))
539 return;
540
541 spin_lock(&vcpu->arch.vpa_update_lock);
542 if (vcpu->arch.vpa.update_pending) {
543 kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
544 if (vcpu->arch.vpa.pinned_addr)
545 init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
546 }
547 if (vcpu->arch.dtl.update_pending) {
548 kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
549 vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
550 vcpu->arch.dtl_index = 0;
551 }
552 if (vcpu->arch.slb_shadow.update_pending)
553 kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow);
554 spin_unlock(&vcpu->arch.vpa_update_lock);
555}
556
557
558
559
560
561static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
562{
563 u64 p;
564 unsigned long flags;
565
566 spin_lock_irqsave(&vc->stoltb_lock, flags);
567 p = vc->stolen_tb;
568 if (vc->vcore_state != VCORE_INACTIVE &&
569 vc->preempt_tb != TB_NIL)
570 p += now - vc->preempt_tb;
571 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
572 return p;
573}
574
575static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
576 struct kvmppc_vcore *vc)
577{
578 struct dtl_entry *dt;
579 struct lppaca *vpa;
580 unsigned long stolen;
581 unsigned long core_stolen;
582 u64 now;
583
584 dt = vcpu->arch.dtl_ptr;
585 vpa = vcpu->arch.vpa.pinned_addr;
586 now = mftb();
587 core_stolen = vcore_stolen_time(vc, now);
588 stolen = core_stolen - vcpu->arch.stolen_logged;
589 vcpu->arch.stolen_logged = core_stolen;
590 spin_lock_irq(&vcpu->arch.tbacct_lock);
591 stolen += vcpu->arch.busy_stolen;
592 vcpu->arch.busy_stolen = 0;
593 spin_unlock_irq(&vcpu->arch.tbacct_lock);
594 if (!dt || !vpa)
595 return;
596 memset(dt, 0, sizeof(struct dtl_entry));
597 dt->dispatch_reason = 7;
598 dt->processor_id = cpu_to_be16(vc->pcpu + vcpu->arch.ptid);
599 dt->timebase = cpu_to_be64(now + vc->tb_offset);
600 dt->enqueue_to_dispatch_time = cpu_to_be32(stolen);
601 dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu));
602 dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr);
603 ++dt;
604 if (dt == vcpu->arch.dtl.pinned_end)
605 dt = vcpu->arch.dtl.pinned_addr;
606 vcpu->arch.dtl_ptr = dt;
607
608 smp_wmb();
609 vpa->dtl_idx = cpu_to_be64(++vcpu->arch.dtl_index);
610 vcpu->arch.dtl.dirty = true;
611}
612
613static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu)
614{
615 if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207)
616 return true;
617 if ((!vcpu->arch.vcore->arch_compat) &&
618 cpu_has_feature(CPU_FTR_ARCH_207S))
619 return true;
620 return false;
621}
622
623static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
624 unsigned long resource, unsigned long value1,
625 unsigned long value2)
626{
627 switch (resource) {
628 case H_SET_MODE_RESOURCE_SET_CIABR:
629 if (!kvmppc_power8_compatible(vcpu))
630 return H_P2;
631 if (value2)
632 return H_P4;
633 if (mflags)
634 return H_UNSUPPORTED_FLAG_START;
635
636 if ((value1 & CIABR_PRIV) == CIABR_PRIV_HYPER)
637 return H_P3;
638 vcpu->arch.ciabr = value1;
639 return H_SUCCESS;
640 case H_SET_MODE_RESOURCE_SET_DAWR:
641 if (!kvmppc_power8_compatible(vcpu))
642 return H_P2;
643 if (mflags)
644 return H_UNSUPPORTED_FLAG_START;
645 if (value2 & DABRX_HYP)
646 return H_P4;
647 vcpu->arch.dawr = value1;
648 vcpu->arch.dawrx = value2;
649 return H_SUCCESS;
650 default:
651 return H_TOO_HARD;
652 }
653}
654
655static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
656{
657 struct kvmppc_vcore *vcore = target->arch.vcore;
658
659
660
661
662
663
664
665
666
667 spin_lock(&vcore->lock);
668 if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
669 vcore->vcore_state != VCORE_INACTIVE &&
670 vcore->runner)
671 target = vcore->runner;
672 spin_unlock(&vcore->lock);
673
674 return kvm_vcpu_yield_to(target);
675}
676
677static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
678{
679 int yield_count = 0;
680 struct lppaca *lppaca;
681
682 spin_lock(&vcpu->arch.vpa_update_lock);
683 lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr;
684 if (lppaca)
685 yield_count = be32_to_cpu(lppaca->yield_count);
686 spin_unlock(&vcpu->arch.vpa_update_lock);
687 return yield_count;
688}
689
690int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
691{
692 unsigned long req = kvmppc_get_gpr(vcpu, 3);
693 unsigned long target, ret = H_SUCCESS;
694 int yield_count;
695 struct kvm_vcpu *tvcpu;
696 int idx, rc;
697
698 if (req <= MAX_HCALL_OPCODE &&
699 !test_bit(req/4, vcpu->kvm->arch.enabled_hcalls))
700 return RESUME_HOST;
701
702 switch (req) {
703 case H_CEDE:
704 break;
705 case H_PROD:
706 target = kvmppc_get_gpr(vcpu, 4);
707 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
708 if (!tvcpu) {
709 ret = H_PARAMETER;
710 break;
711 }
712 tvcpu->arch.prodded = 1;
713 smp_mb();
714 if (vcpu->arch.ceded) {
715 if (swait_active(&vcpu->wq)) {
716 swake_up(&vcpu->wq);
717 vcpu->stat.halt_wakeup++;
718 }
719 }
720 break;
721 case H_CONFER:
722 target = kvmppc_get_gpr(vcpu, 4);
723 if (target == -1)
724 break;
725 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
726 if (!tvcpu) {
727 ret = H_PARAMETER;
728 break;
729 }
730 yield_count = kvmppc_get_gpr(vcpu, 5);
731 if (kvmppc_get_yield_count(tvcpu) != yield_count)
732 break;
733 kvm_arch_vcpu_yield_to(tvcpu);
734 break;
735 case H_REGISTER_VPA:
736 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
737 kvmppc_get_gpr(vcpu, 5),
738 kvmppc_get_gpr(vcpu, 6));
739 break;
740 case H_RTAS:
741 if (list_empty(&vcpu->kvm->arch.rtas_tokens))
742 return RESUME_HOST;
743
744 idx = srcu_read_lock(&vcpu->kvm->srcu);
745 rc = kvmppc_rtas_hcall(vcpu);
746 srcu_read_unlock(&vcpu->kvm->srcu, idx);
747
748 if (rc == -ENOENT)
749 return RESUME_HOST;
750 else if (rc == 0)
751 break;
752
753
754 return rc;
755 case H_LOGICAL_CI_LOAD:
756 ret = kvmppc_h_logical_ci_load(vcpu);
757 if (ret == H_TOO_HARD)
758 return RESUME_HOST;
759 break;
760 case H_LOGICAL_CI_STORE:
761 ret = kvmppc_h_logical_ci_store(vcpu);
762 if (ret == H_TOO_HARD)
763 return RESUME_HOST;
764 break;
765 case H_SET_MODE:
766 ret = kvmppc_h_set_mode(vcpu, kvmppc_get_gpr(vcpu, 4),
767 kvmppc_get_gpr(vcpu, 5),
768 kvmppc_get_gpr(vcpu, 6),
769 kvmppc_get_gpr(vcpu, 7));
770 if (ret == H_TOO_HARD)
771 return RESUME_HOST;
772 break;
773 case H_XIRR:
774 case H_CPPR:
775 case H_EOI:
776 case H_IPI:
777 case H_IPOLL:
778 case H_XIRR_X:
779 if (kvmppc_xics_enabled(vcpu)) {
780 ret = kvmppc_xics_hcall(vcpu, req);
781 break;
782 }
783 return RESUME_HOST;
784 case H_PUT_TCE:
785 ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
786 kvmppc_get_gpr(vcpu, 5),
787 kvmppc_get_gpr(vcpu, 6));
788 if (ret == H_TOO_HARD)
789 return RESUME_HOST;
790 break;
791 case H_PUT_TCE_INDIRECT:
792 ret = kvmppc_h_put_tce_indirect(vcpu, kvmppc_get_gpr(vcpu, 4),
793 kvmppc_get_gpr(vcpu, 5),
794 kvmppc_get_gpr(vcpu, 6),
795 kvmppc_get_gpr(vcpu, 7));
796 if (ret == H_TOO_HARD)
797 return RESUME_HOST;
798 break;
799 case H_STUFF_TCE:
800 ret = kvmppc_h_stuff_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
801 kvmppc_get_gpr(vcpu, 5),
802 kvmppc_get_gpr(vcpu, 6),
803 kvmppc_get_gpr(vcpu, 7));
804 if (ret == H_TOO_HARD)
805 return RESUME_HOST;
806 break;
807 default:
808 return RESUME_HOST;
809 }
810 kvmppc_set_gpr(vcpu, 3, ret);
811 vcpu->arch.hcall_needed = 0;
812 return RESUME_GUEST;
813}
814
815static int kvmppc_hcall_impl_hv(unsigned long cmd)
816{
817 switch (cmd) {
818 case H_CEDE:
819 case H_PROD:
820 case H_CONFER:
821 case H_REGISTER_VPA:
822 case H_SET_MODE:
823 case H_LOGICAL_CI_LOAD:
824 case H_LOGICAL_CI_STORE:
825#ifdef CONFIG_KVM_XICS
826 case H_XIRR:
827 case H_CPPR:
828 case H_EOI:
829 case H_IPI:
830 case H_IPOLL:
831 case H_XIRR_X:
832#endif
833 return 1;
834 }
835
836
837 return kvmppc_hcall_impl_hv_realmode(cmd);
838}
839
840static int kvmppc_emulate_debug_inst(struct kvm_run *run,
841 struct kvm_vcpu *vcpu)
842{
843 u32 last_inst;
844
845 if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) !=
846 EMULATE_DONE) {
847
848
849
850
851 return RESUME_GUEST;
852 }
853
854 if (last_inst == KVMPPC_INST_SW_BREAKPOINT) {
855 run->exit_reason = KVM_EXIT_DEBUG;
856 run->debug.arch.address = kvmppc_get_pc(vcpu);
857 return RESUME_HOST;
858 } else {
859 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
860 return RESUME_GUEST;
861 }
862}
863
864static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
865 struct task_struct *tsk)
866{
867 int r = RESUME_HOST;
868
869 vcpu->stat.sum_exits++;
870
871
872
873
874
875
876
877
878
879 if (vcpu->arch.shregs.msr & MSR_HV) {
880 printk(KERN_EMERG "KVM trap in HV mode!\n");
881 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
882 vcpu->arch.trap, kvmppc_get_pc(vcpu),
883 vcpu->arch.shregs.msr);
884 kvmppc_dump_regs(vcpu);
885 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
886 run->hw.hardware_exit_reason = vcpu->arch.trap;
887 return RESUME_HOST;
888 }
889 run->exit_reason = KVM_EXIT_UNKNOWN;
890 run->ready_for_interrupt_injection = 1;
891 switch (vcpu->arch.trap) {
892
893 case BOOK3S_INTERRUPT_HV_DECREMENTER:
894 vcpu->stat.dec_exits++;
895 r = RESUME_GUEST;
896 break;
897 case BOOK3S_INTERRUPT_EXTERNAL:
898 case BOOK3S_INTERRUPT_H_DOORBELL:
899 vcpu->stat.ext_intr_exits++;
900 r = RESUME_GUEST;
901 break;
902
903 case BOOK3S_INTERRUPT_HMI:
904 case BOOK3S_INTERRUPT_PERFMON:
905 r = RESUME_GUEST;
906 break;
907 case BOOK3S_INTERRUPT_MACHINE_CHECK:
908
909
910
911
912
913
914 kvmppc_book3s_queue_irqprio(vcpu,
915 BOOK3S_INTERRUPT_MACHINE_CHECK);
916 r = RESUME_GUEST;
917 break;
918 case BOOK3S_INTERRUPT_PROGRAM:
919 {
920 ulong flags;
921
922
923
924
925
926
927 flags = vcpu->arch.shregs.msr & 0x1f0000ull;
928 kvmppc_core_queue_program(vcpu, flags);
929 r = RESUME_GUEST;
930 break;
931 }
932 case BOOK3S_INTERRUPT_SYSCALL:
933 {
934
935 int i;
936
937
938
939
940
941 run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
942 for (i = 0; i < 9; ++i)
943 run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
944 run->exit_reason = KVM_EXIT_PAPR_HCALL;
945 vcpu->arch.hcall_needed = 1;
946 r = RESUME_HOST;
947 break;
948 }
949
950
951
952
953
954
955
956 case BOOK3S_INTERRUPT_H_DATA_STORAGE:
957 r = RESUME_PAGE_FAULT;
958 break;
959 case BOOK3S_INTERRUPT_H_INST_STORAGE:
960 vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
961 vcpu->arch.fault_dsisr = 0;
962 r = RESUME_PAGE_FAULT;
963 break;
964
965
966
967
968
969
970
971 case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
972 if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED)
973 vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ?
974 swab32(vcpu->arch.emul_inst) :
975 vcpu->arch.emul_inst;
976 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
977 r = kvmppc_emulate_debug_inst(run, vcpu);
978 } else {
979 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
980 r = RESUME_GUEST;
981 }
982 break;
983
984
985
986
987
988 case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
989 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
990 r = RESUME_GUEST;
991 break;
992 default:
993 kvmppc_dump_regs(vcpu);
994 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
995 vcpu->arch.trap, kvmppc_get_pc(vcpu),
996 vcpu->arch.shregs.msr);
997 run->hw.hardware_exit_reason = vcpu->arch.trap;
998 r = RESUME_HOST;
999 break;
1000 }
1001
1002 return r;
1003}
1004
1005static int kvm_arch_vcpu_ioctl_get_sregs_hv(struct kvm_vcpu *vcpu,
1006 struct kvm_sregs *sregs)
1007{
1008 int i;
1009
1010 memset(sregs, 0, sizeof(struct kvm_sregs));
1011 sregs->pvr = vcpu->arch.pvr;
1012 for (i = 0; i < vcpu->arch.slb_max; i++) {
1013 sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
1014 sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
1015 }
1016
1017 return 0;
1018}
1019
1020static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
1021 struct kvm_sregs *sregs)
1022{
1023 int i, j;
1024
1025
1026 if (sregs->pvr != vcpu->arch.pvr)
1027 return -EINVAL;
1028
1029 j = 0;
1030 for (i = 0; i < vcpu->arch.slb_nr; i++) {
1031 if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) {
1032 vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe;
1033 vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv;
1034 ++j;
1035 }
1036 }
1037 vcpu->arch.slb_max = j;
1038
1039 return 0;
1040}
1041
1042static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
1043 bool preserve_top32)
1044{
1045 struct kvm *kvm = vcpu->kvm;
1046 struct kvmppc_vcore *vc = vcpu->arch.vcore;
1047 u64 mask;
1048
1049 mutex_lock(&kvm->lock);
1050 spin_lock(&vc->lock);
1051
1052
1053
1054
1055 if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) {
1056 struct kvm_vcpu *vcpu;
1057 int i;
1058
1059 kvm_for_each_vcpu(i, vcpu, kvm) {
1060 if (vcpu->arch.vcore != vc)
1061 continue;
1062 if (new_lpcr & LPCR_ILE)
1063 vcpu->arch.intr_msr |= MSR_LE;
1064 else
1065 vcpu->arch.intr_msr &= ~MSR_LE;
1066 }
1067 }
1068
1069
1070
1071
1072
1073
1074 mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
1075 if (cpu_has_feature(CPU_FTR_ARCH_207S))
1076 mask |= LPCR_AIL;
1077
1078
1079 if (preserve_top32)
1080 mask &= 0xFFFFFFFF;
1081 vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
1082 spin_unlock(&vc->lock);
1083 mutex_unlock(&kvm->lock);
1084}
1085
1086static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1087 union kvmppc_one_reg *val)
1088{
1089 int r = 0;
1090 long int i;
1091
1092 switch (id) {
1093 case KVM_REG_PPC_DEBUG_INST:
1094 *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
1095 break;
1096 case KVM_REG_PPC_HIOR:
1097 *val = get_reg_val(id, 0);
1098 break;
1099 case KVM_REG_PPC_DABR:
1100 *val = get_reg_val(id, vcpu->arch.dabr);
1101 break;
1102 case KVM_REG_PPC_DABRX:
1103 *val = get_reg_val(id, vcpu->arch.dabrx);
1104 break;
1105 case KVM_REG_PPC_DSCR:
1106 *val = get_reg_val(id, vcpu->arch.dscr);
1107 break;
1108 case KVM_REG_PPC_PURR:
1109 *val = get_reg_val(id, vcpu->arch.purr);
1110 break;
1111 case KVM_REG_PPC_SPURR:
1112 *val = get_reg_val(id, vcpu->arch.spurr);
1113 break;
1114 case KVM_REG_PPC_AMR:
1115 *val = get_reg_val(id, vcpu->arch.amr);
1116 break;
1117 case KVM_REG_PPC_UAMOR:
1118 *val = get_reg_val(id, vcpu->arch.uamor);
1119 break;
1120 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:
1121 i = id - KVM_REG_PPC_MMCR0;
1122 *val = get_reg_val(id, vcpu->arch.mmcr[i]);
1123 break;
1124 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
1125 i = id - KVM_REG_PPC_PMC1;
1126 *val = get_reg_val(id, vcpu->arch.pmc[i]);
1127 break;
1128 case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
1129 i = id - KVM_REG_PPC_SPMC1;
1130 *val = get_reg_val(id, vcpu->arch.spmc[i]);
1131 break;
1132 case KVM_REG_PPC_SIAR:
1133 *val = get_reg_val(id, vcpu->arch.siar);
1134 break;
1135 case KVM_REG_PPC_SDAR:
1136 *val = get_reg_val(id, vcpu->arch.sdar);
1137 break;
1138 case KVM_REG_PPC_SIER:
1139 *val = get_reg_val(id, vcpu->arch.sier);
1140 break;
1141 case KVM_REG_PPC_IAMR:
1142 *val = get_reg_val(id, vcpu->arch.iamr);
1143 break;
1144 case KVM_REG_PPC_PSPB:
1145 *val = get_reg_val(id, vcpu->arch.pspb);
1146 break;
1147 case KVM_REG_PPC_DPDES:
1148 *val = get_reg_val(id, vcpu->arch.vcore->dpdes);
1149 break;
1150 case KVM_REG_PPC_DAWR:
1151 *val = get_reg_val(id, vcpu->arch.dawr);
1152 break;
1153 case KVM_REG_PPC_DAWRX:
1154 *val = get_reg_val(id, vcpu->arch.dawrx);
1155 break;
1156 case KVM_REG_PPC_CIABR:
1157 *val = get_reg_val(id, vcpu->arch.ciabr);
1158 break;
1159 case KVM_REG_PPC_CSIGR:
1160 *val = get_reg_val(id, vcpu->arch.csigr);
1161 break;
1162 case KVM_REG_PPC_TACR:
1163 *val = get_reg_val(id, vcpu->arch.tacr);
1164 break;
1165 case KVM_REG_PPC_TCSCR:
1166 *val = get_reg_val(id, vcpu->arch.tcscr);
1167 break;
1168 case KVM_REG_PPC_PID:
1169 *val = get_reg_val(id, vcpu->arch.pid);
1170 break;
1171 case KVM_REG_PPC_ACOP:
1172 *val = get_reg_val(id, vcpu->arch.acop);
1173 break;
1174 case KVM_REG_PPC_WORT:
1175 *val = get_reg_val(id, vcpu->arch.wort);
1176 break;
1177 case KVM_REG_PPC_VPA_ADDR:
1178 spin_lock(&vcpu->arch.vpa_update_lock);
1179 *val = get_reg_val(id, vcpu->arch.vpa.next_gpa);
1180 spin_unlock(&vcpu->arch.vpa_update_lock);
1181 break;
1182 case KVM_REG_PPC_VPA_SLB:
1183 spin_lock(&vcpu->arch.vpa_update_lock);
1184 val->vpaval.addr = vcpu->arch.slb_shadow.next_gpa;
1185 val->vpaval.length = vcpu->arch.slb_shadow.len;
1186 spin_unlock(&vcpu->arch.vpa_update_lock);
1187 break;
1188 case KVM_REG_PPC_VPA_DTL:
1189 spin_lock(&vcpu->arch.vpa_update_lock);
1190 val->vpaval.addr = vcpu->arch.dtl.next_gpa;
1191 val->vpaval.length = vcpu->arch.dtl.len;
1192 spin_unlock(&vcpu->arch.vpa_update_lock);
1193 break;
1194 case KVM_REG_PPC_TB_OFFSET:
1195 *val = get_reg_val(id, vcpu->arch.vcore->tb_offset);
1196 break;
1197 case KVM_REG_PPC_LPCR:
1198 case KVM_REG_PPC_LPCR_64:
1199 *val = get_reg_val(id, vcpu->arch.vcore->lpcr);
1200 break;
1201 case KVM_REG_PPC_PPR:
1202 *val = get_reg_val(id, vcpu->arch.ppr);
1203 break;
1204#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1205 case KVM_REG_PPC_TFHAR:
1206 *val = get_reg_val(id, vcpu->arch.tfhar);
1207 break;
1208 case KVM_REG_PPC_TFIAR:
1209 *val = get_reg_val(id, vcpu->arch.tfiar);
1210 break;
1211 case KVM_REG_PPC_TEXASR:
1212 *val = get_reg_val(id, vcpu->arch.texasr);
1213 break;
1214 case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
1215 i = id - KVM_REG_PPC_TM_GPR0;
1216 *val = get_reg_val(id, vcpu->arch.gpr_tm[i]);
1217 break;
1218 case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
1219 {
1220 int j;
1221 i = id - KVM_REG_PPC_TM_VSR0;
1222 if (i < 32)
1223 for (j = 0; j < TS_FPRWIDTH; j++)
1224 val->vsxval[j] = vcpu->arch.fp_tm.fpr[i][j];
1225 else {
1226 if (cpu_has_feature(CPU_FTR_ALTIVEC))
1227 val->vval = vcpu->arch.vr_tm.vr[i-32];
1228 else
1229 r = -ENXIO;
1230 }
1231 break;
1232 }
1233 case KVM_REG_PPC_TM_CR:
1234 *val = get_reg_val(id, vcpu->arch.cr_tm);
1235 break;
1236 case KVM_REG_PPC_TM_LR:
1237 *val = get_reg_val(id, vcpu->arch.lr_tm);
1238 break;
1239 case KVM_REG_PPC_TM_CTR:
1240 *val = get_reg_val(id, vcpu->arch.ctr_tm);
1241 break;
1242 case KVM_REG_PPC_TM_FPSCR:
1243 *val = get_reg_val(id, vcpu->arch.fp_tm.fpscr);
1244 break;
1245 case KVM_REG_PPC_TM_AMR:
1246 *val = get_reg_val(id, vcpu->arch.amr_tm);
1247 break;
1248 case KVM_REG_PPC_TM_PPR:
1249 *val = get_reg_val(id, vcpu->arch.ppr_tm);
1250 break;
1251 case KVM_REG_PPC_TM_VRSAVE:
1252 *val = get_reg_val(id, vcpu->arch.vrsave_tm);
1253 break;
1254 case KVM_REG_PPC_TM_VSCR:
1255 if (cpu_has_feature(CPU_FTR_ALTIVEC))
1256 *val = get_reg_val(id, vcpu->arch.vr_tm.vscr.u[3]);
1257 else
1258 r = -ENXIO;
1259 break;
1260 case KVM_REG_PPC_TM_DSCR:
1261 *val = get_reg_val(id, vcpu->arch.dscr_tm);
1262 break;
1263 case KVM_REG_PPC_TM_TAR:
1264 *val = get_reg_val(id, vcpu->arch.tar_tm);
1265 break;
1266#endif
1267 case KVM_REG_PPC_ARCH_COMPAT:
1268 *val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
1269 break;
1270 default:
1271 r = -EINVAL;
1272 break;
1273 }
1274
1275 return r;
1276}
1277
1278static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1279 union kvmppc_one_reg *val)
1280{
1281 int r = 0;
1282 long int i;
1283 unsigned long addr, len;
1284
1285 switch (id) {
1286 case KVM_REG_PPC_HIOR:
1287
1288 if (set_reg_val(id, *val))
1289 r = -EINVAL;
1290 break;
1291 case KVM_REG_PPC_DABR:
1292 vcpu->arch.dabr = set_reg_val(id, *val);
1293 break;
1294 case KVM_REG_PPC_DABRX:
1295 vcpu->arch.dabrx = set_reg_val(id, *val) & ~DABRX_HYP;
1296 break;
1297 case KVM_REG_PPC_DSCR:
1298 vcpu->arch.dscr = set_reg_val(id, *val);
1299 break;
1300 case KVM_REG_PPC_PURR:
1301 vcpu->arch.purr = set_reg_val(id, *val);
1302 break;
1303 case KVM_REG_PPC_SPURR:
1304 vcpu->arch.spurr = set_reg_val(id, *val);
1305 break;
1306 case KVM_REG_PPC_AMR:
1307 vcpu->arch.amr = set_reg_val(id, *val);
1308 break;
1309 case KVM_REG_PPC_UAMOR:
1310 vcpu->arch.uamor = set_reg_val(id, *val);
1311 break;
1312 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:
1313 i = id - KVM_REG_PPC_MMCR0;
1314 vcpu->arch.mmcr[i] = set_reg_val(id, *val);
1315 break;
1316 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
1317 i = id - KVM_REG_PPC_PMC1;
1318 vcpu->arch.pmc[i] = set_reg_val(id, *val);
1319 break;
1320 case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
1321 i = id - KVM_REG_PPC_SPMC1;
1322 vcpu->arch.spmc[i] = set_reg_val(id, *val);
1323 break;
1324 case KVM_REG_PPC_SIAR:
1325 vcpu->arch.siar = set_reg_val(id, *val);
1326 break;
1327 case KVM_REG_PPC_SDAR:
1328 vcpu->arch.sdar = set_reg_val(id, *val);
1329 break;
1330 case KVM_REG_PPC_SIER:
1331 vcpu->arch.sier = set_reg_val(id, *val);
1332 break;
1333 case KVM_REG_PPC_IAMR:
1334 vcpu->arch.iamr = set_reg_val(id, *val);
1335 break;
1336 case KVM_REG_PPC_PSPB:
1337 vcpu->arch.pspb = set_reg_val(id, *val);
1338 break;
1339 case KVM_REG_PPC_DPDES:
1340 vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
1341 break;
1342 case KVM_REG_PPC_DAWR:
1343 vcpu->arch.dawr = set_reg_val(id, *val);
1344 break;
1345 case KVM_REG_PPC_DAWRX:
1346 vcpu->arch.dawrx = set_reg_val(id, *val) & ~DAWRX_HYP;
1347 break;
1348 case KVM_REG_PPC_CIABR:
1349 vcpu->arch.ciabr = set_reg_val(id, *val);
1350
1351 if ((vcpu->arch.ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
1352 vcpu->arch.ciabr &= ~CIABR_PRIV;
1353 break;
1354 case KVM_REG_PPC_CSIGR:
1355 vcpu->arch.csigr = set_reg_val(id, *val);
1356 break;
1357 case KVM_REG_PPC_TACR:
1358 vcpu->arch.tacr = set_reg_val(id, *val);
1359 break;
1360 case KVM_REG_PPC_TCSCR:
1361 vcpu->arch.tcscr = set_reg_val(id, *val);
1362 break;
1363 case KVM_REG_PPC_PID:
1364 vcpu->arch.pid = set_reg_val(id, *val);
1365 break;
1366 case KVM_REG_PPC_ACOP:
1367 vcpu->arch.acop = set_reg_val(id, *val);
1368 break;
1369 case KVM_REG_PPC_WORT:
1370 vcpu->arch.wort = set_reg_val(id, *val);
1371 break;
1372 case KVM_REG_PPC_VPA_ADDR:
1373 addr = set_reg_val(id, *val);
1374 r = -EINVAL;
1375 if (!addr && (vcpu->arch.slb_shadow.next_gpa ||
1376 vcpu->arch.dtl.next_gpa))
1377 break;
1378 r = set_vpa(vcpu, &vcpu->arch.vpa, addr, sizeof(struct lppaca));
1379 break;
1380 case KVM_REG_PPC_VPA_SLB:
1381 addr = val->vpaval.addr;
1382 len = val->vpaval.length;
1383 r = -EINVAL;
1384 if (addr && !vcpu->arch.vpa.next_gpa)
1385 break;
1386 r = set_vpa(vcpu, &vcpu->arch.slb_shadow, addr, len);
1387 break;
1388 case KVM_REG_PPC_VPA_DTL:
1389 addr = val->vpaval.addr;
1390 len = val->vpaval.length;
1391 r = -EINVAL;
1392 if (addr && (len < sizeof(struct dtl_entry) ||
1393 !vcpu->arch.vpa.next_gpa))
1394 break;
1395 len -= len % sizeof(struct dtl_entry);
1396 r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
1397 break;
1398 case KVM_REG_PPC_TB_OFFSET:
1399
1400 vcpu->arch.vcore->tb_offset =
1401 ALIGN(set_reg_val(id, *val), 1UL << 24);
1402 break;
1403 case KVM_REG_PPC_LPCR:
1404 kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), true);
1405 break;
1406 case KVM_REG_PPC_LPCR_64:
1407 kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), false);
1408 break;
1409 case KVM_REG_PPC_PPR:
1410 vcpu->arch.ppr = set_reg_val(id, *val);
1411 break;
1412#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1413 case KVM_REG_PPC_TFHAR:
1414 vcpu->arch.tfhar = set_reg_val(id, *val);
1415 break;
1416 case KVM_REG_PPC_TFIAR:
1417 vcpu->arch.tfiar = set_reg_val(id, *val);
1418 break;
1419 case KVM_REG_PPC_TEXASR:
1420 vcpu->arch.texasr = set_reg_val(id, *val);
1421 break;
1422 case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
1423 i = id - KVM_REG_PPC_TM_GPR0;
1424 vcpu->arch.gpr_tm[i] = set_reg_val(id, *val);
1425 break;
1426 case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
1427 {
1428 int j;
1429 i = id - KVM_REG_PPC_TM_VSR0;
1430 if (i < 32)
1431 for (j = 0; j < TS_FPRWIDTH; j++)
1432 vcpu->arch.fp_tm.fpr[i][j] = val->vsxval[j];
1433 else
1434 if (cpu_has_feature(CPU_FTR_ALTIVEC))
1435 vcpu->arch.vr_tm.vr[i-32] = val->vval;
1436 else
1437 r = -ENXIO;
1438 break;
1439 }
1440 case KVM_REG_PPC_TM_CR:
1441 vcpu->arch.cr_tm = set_reg_val(id, *val);
1442 break;
1443 case KVM_REG_PPC_TM_LR:
1444 vcpu->arch.lr_tm = set_reg_val(id, *val);
1445 break;
1446 case KVM_REG_PPC_TM_CTR:
1447 vcpu->arch.ctr_tm = set_reg_val(id, *val);
1448 break;
1449 case KVM_REG_PPC_TM_FPSCR:
1450 vcpu->arch.fp_tm.fpscr = set_reg_val(id, *val);
1451 break;
1452 case KVM_REG_PPC_TM_AMR:
1453 vcpu->arch.amr_tm = set_reg_val(id, *val);
1454 break;
1455 case KVM_REG_PPC_TM_PPR:
1456 vcpu->arch.ppr_tm = set_reg_val(id, *val);
1457 break;
1458 case KVM_REG_PPC_TM_VRSAVE:
1459 vcpu->arch.vrsave_tm = set_reg_val(id, *val);
1460 break;
1461 case KVM_REG_PPC_TM_VSCR:
1462 if (cpu_has_feature(CPU_FTR_ALTIVEC))
1463 vcpu->arch.vr.vscr.u[3] = set_reg_val(id, *val);
1464 else
1465 r = - ENXIO;
1466 break;
1467 case KVM_REG_PPC_TM_DSCR:
1468 vcpu->arch.dscr_tm = set_reg_val(id, *val);
1469 break;
1470 case KVM_REG_PPC_TM_TAR:
1471 vcpu->arch.tar_tm = set_reg_val(id, *val);
1472 break;
1473#endif
1474 case KVM_REG_PPC_ARCH_COMPAT:
1475 r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
1476 break;
1477 default:
1478 r = -EINVAL;
1479 break;
1480 }
1481
1482 return r;
1483}
1484
1485static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
1486{
1487 struct kvmppc_vcore *vcore;
1488
1489 vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
1490
1491 if (vcore == NULL)
1492 return NULL;
1493
1494 INIT_LIST_HEAD(&vcore->runnable_threads);
1495 spin_lock_init(&vcore->lock);
1496 spin_lock_init(&vcore->stoltb_lock);
1497 init_swait_queue_head(&vcore->wq);
1498 vcore->preempt_tb = TB_NIL;
1499 vcore->lpcr = kvm->arch.lpcr;
1500 vcore->first_vcpuid = core * threads_per_subcore;
1501 vcore->kvm = kvm;
1502 INIT_LIST_HEAD(&vcore->preempt_list);
1503
1504 return vcore;
1505}
1506
1507#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1508static struct debugfs_timings_element {
1509 const char *name;
1510 size_t offset;
1511} timings[] = {
1512 {"rm_entry", offsetof(struct kvm_vcpu, arch.rm_entry)},
1513 {"rm_intr", offsetof(struct kvm_vcpu, arch.rm_intr)},
1514 {"rm_exit", offsetof(struct kvm_vcpu, arch.rm_exit)},
1515 {"guest", offsetof(struct kvm_vcpu, arch.guest_time)},
1516 {"cede", offsetof(struct kvm_vcpu, arch.cede_time)},
1517};
1518
1519#define N_TIMINGS (sizeof(timings) / sizeof(timings[0]))
1520
1521struct debugfs_timings_state {
1522 struct kvm_vcpu *vcpu;
1523 unsigned int buflen;
1524 char buf[N_TIMINGS * 100];
1525};
1526
1527static int debugfs_timings_open(struct inode *inode, struct file *file)
1528{
1529 struct kvm_vcpu *vcpu = inode->i_private;
1530 struct debugfs_timings_state *p;
1531
1532 p = kzalloc(sizeof(*p), GFP_KERNEL);
1533 if (!p)
1534 return -ENOMEM;
1535
1536 kvm_get_kvm(vcpu->kvm);
1537 p->vcpu = vcpu;
1538 file->private_data = p;
1539
1540 return nonseekable_open(inode, file);
1541}
1542
1543static int debugfs_timings_release(struct inode *inode, struct file *file)
1544{
1545 struct debugfs_timings_state *p = file->private_data;
1546
1547 kvm_put_kvm(p->vcpu->kvm);
1548 kfree(p);
1549 return 0;
1550}
1551
1552static ssize_t debugfs_timings_read(struct file *file, char __user *buf,
1553 size_t len, loff_t *ppos)
1554{
1555 struct debugfs_timings_state *p = file->private_data;
1556 struct kvm_vcpu *vcpu = p->vcpu;
1557 char *s, *buf_end;
1558 struct kvmhv_tb_accumulator tb;
1559 u64 count;
1560 loff_t pos;
1561 ssize_t n;
1562 int i, loops;
1563 bool ok;
1564
1565 if (!p->buflen) {
1566 s = p->buf;
1567 buf_end = s + sizeof(p->buf);
1568 for (i = 0; i < N_TIMINGS; ++i) {
1569 struct kvmhv_tb_accumulator *acc;
1570
1571 acc = (struct kvmhv_tb_accumulator *)
1572 ((unsigned long)vcpu + timings[i].offset);
1573 ok = false;
1574 for (loops = 0; loops < 1000; ++loops) {
1575 count = acc->seqcount;
1576 if (!(count & 1)) {
1577 smp_rmb();
1578 tb = *acc;
1579 smp_rmb();
1580 if (count == acc->seqcount) {
1581 ok = true;
1582 break;
1583 }
1584 }
1585 udelay(1);
1586 }
1587 if (!ok)
1588 snprintf(s, buf_end - s, "%s: stuck\n",
1589 timings[i].name);
1590 else
1591 snprintf(s, buf_end - s,
1592 "%s: %llu %llu %llu %llu\n",
1593 timings[i].name, count / 2,
1594 tb_to_ns(tb.tb_total),
1595 tb_to_ns(tb.tb_min),
1596 tb_to_ns(tb.tb_max));
1597 s += strlen(s);
1598 }
1599 p->buflen = s - p->buf;
1600 }
1601
1602 pos = *ppos;
1603 if (pos >= p->buflen)
1604 return 0;
1605 if (len > p->buflen - pos)
1606 len = p->buflen - pos;
1607 n = copy_to_user(buf, p->buf + pos, len);
1608 if (n) {
1609 if (n == len)
1610 return -EFAULT;
1611 len -= n;
1612 }
1613 *ppos = pos + len;
1614 return len;
1615}
1616
1617static ssize_t debugfs_timings_write(struct file *file, const char __user *buf,
1618 size_t len, loff_t *ppos)
1619{
1620 return -EACCES;
1621}
1622
1623static const struct file_operations debugfs_timings_ops = {
1624 .owner = THIS_MODULE,
1625 .open = debugfs_timings_open,
1626 .release = debugfs_timings_release,
1627 .read = debugfs_timings_read,
1628 .write = debugfs_timings_write,
1629 .llseek = generic_file_llseek,
1630};
1631
1632
1633static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
1634{
1635 char buf[16];
1636 struct kvm *kvm = vcpu->kvm;
1637
1638 snprintf(buf, sizeof(buf), "vcpu%u", id);
1639 if (IS_ERR_OR_NULL(kvm->arch.debugfs_dir))
1640 return;
1641 vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir);
1642 if (IS_ERR_OR_NULL(vcpu->arch.debugfs_dir))
1643 return;
1644 vcpu->arch.debugfs_timings =
1645 debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir,
1646 vcpu, &debugfs_timings_ops);
1647}
1648
1649#else
1650static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
1651{
1652}
1653#endif
1654
1655static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
1656 unsigned int id)
1657{
1658 struct kvm_vcpu *vcpu;
1659 int err = -EINVAL;
1660 int core;
1661 struct kvmppc_vcore *vcore;
1662
1663 core = id / threads_per_subcore;
1664 if (core >= KVM_MAX_VCORES)
1665 goto out;
1666
1667 err = -ENOMEM;
1668 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1669 if (!vcpu)
1670 goto out;
1671
1672 err = kvm_vcpu_init(vcpu, kvm, id);
1673 if (err)
1674 goto free_vcpu;
1675
1676 vcpu->arch.shared = &vcpu->arch.shregs;
1677#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
1678
1679
1680
1681
1682#ifdef __BIG_ENDIAN__
1683 vcpu->arch.shared_big_endian = true;
1684#else
1685 vcpu->arch.shared_big_endian = false;
1686#endif
1687#endif
1688 vcpu->arch.mmcr[0] = MMCR0_FC;
1689 vcpu->arch.ctrl = CTRL_RUNLATCH;
1690
1691 kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
1692 spin_lock_init(&vcpu->arch.vpa_update_lock);
1693 spin_lock_init(&vcpu->arch.tbacct_lock);
1694 vcpu->arch.busy_preempt = TB_NIL;
1695 vcpu->arch.intr_msr = MSR_SF | MSR_ME;
1696
1697 kvmppc_mmu_book3s_hv_init(vcpu);
1698
1699 vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
1700
1701 init_waitqueue_head(&vcpu->arch.cpu_run);
1702
1703 mutex_lock(&kvm->lock);
1704 vcore = kvm->arch.vcores[core];
1705 if (!vcore) {
1706 vcore = kvmppc_vcore_create(kvm, core);
1707 kvm->arch.vcores[core] = vcore;
1708 kvm->arch.online_vcores++;
1709 }
1710 mutex_unlock(&kvm->lock);
1711
1712 if (!vcore)
1713 goto free_vcpu;
1714
1715 spin_lock(&vcore->lock);
1716 ++vcore->num_threads;
1717 spin_unlock(&vcore->lock);
1718 vcpu->arch.vcore = vcore;
1719 vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;
1720 vcpu->arch.thread_cpu = -1;
1721
1722 vcpu->arch.cpu_type = KVM_CPU_3S_64;
1723 kvmppc_sanity_check(vcpu);
1724
1725 debugfs_vcpu_init(vcpu, id);
1726
1727 return vcpu;
1728
1729free_vcpu:
1730 kmem_cache_free(kvm_vcpu_cache, vcpu);
1731out:
1732 return ERR_PTR(err);
1733}
1734
1735static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
1736{
1737 if (vpa->pinned_addr)
1738 kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa,
1739 vpa->dirty);
1740}
1741
1742static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu)
1743{
1744 spin_lock(&vcpu->arch.vpa_update_lock);
1745 unpin_vpa(vcpu->kvm, &vcpu->arch.dtl);
1746 unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow);
1747 unpin_vpa(vcpu->kvm, &vcpu->arch.vpa);
1748 spin_unlock(&vcpu->arch.vpa_update_lock);
1749 kvm_vcpu_uninit(vcpu);
1750 kmem_cache_free(kvm_vcpu_cache, vcpu);
1751}
1752
1753static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu)
1754{
1755
1756 return 1;
1757}
1758
1759static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
1760{
1761 unsigned long dec_nsec, now;
1762
1763 now = get_tb();
1764 if (now > vcpu->arch.dec_expires) {
1765
1766 kvmppc_core_queue_dec(vcpu);
1767 kvmppc_core_prepare_to_enter(vcpu);
1768 return;
1769 }
1770 dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
1771 / tb_ticks_per_sec;
1772 hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
1773 HRTIMER_MODE_REL);
1774 vcpu->arch.timer_running = 1;
1775}
1776
1777static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
1778{
1779 vcpu->arch.ceded = 0;
1780 if (vcpu->arch.timer_running) {
1781 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
1782 vcpu->arch.timer_running = 0;
1783 }
1784}
1785
1786extern void __kvmppc_vcore_entry(void);
1787
1788static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
1789 struct kvm_vcpu *vcpu)
1790{
1791 u64 now;
1792
1793 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
1794 return;
1795 spin_lock_irq(&vcpu->arch.tbacct_lock);
1796 now = mftb();
1797 vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) -
1798 vcpu->arch.stolen_logged;
1799 vcpu->arch.busy_preempt = now;
1800 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
1801 spin_unlock_irq(&vcpu->arch.tbacct_lock);
1802 --vc->n_runnable;
1803 list_del(&vcpu->arch.run_list);
1804}
1805
1806static int kvmppc_grab_hwthread(int cpu)
1807{
1808 struct paca_struct *tpaca;
1809 long timeout = 10000;
1810
1811 tpaca = &paca[cpu];
1812
1813
1814 tpaca->kvm_hstate.kvm_vcpu = NULL;
1815 tpaca->kvm_hstate.kvm_vcore = NULL;
1816 tpaca->kvm_hstate.napping = 0;
1817 smp_wmb();
1818 tpaca->kvm_hstate.hwthread_req = 1;
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829 smp_mb();
1830 while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) {
1831 if (--timeout <= 0) {
1832 pr_err("KVM: couldn't grab cpu %d\n", cpu);
1833 return -EBUSY;
1834 }
1835 udelay(1);
1836 }
1837 return 0;
1838}
1839
1840static void kvmppc_release_hwthread(int cpu)
1841{
1842 struct paca_struct *tpaca;
1843
1844 tpaca = &paca[cpu];
1845 tpaca->kvm_hstate.hwthread_req = 0;
1846 tpaca->kvm_hstate.kvm_vcpu = NULL;
1847 tpaca->kvm_hstate.kvm_vcore = NULL;
1848 tpaca->kvm_hstate.kvm_split_mode = NULL;
1849}
1850
1851static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
1852{
1853 int cpu;
1854 struct paca_struct *tpaca;
1855 struct kvmppc_vcore *mvc = vc->master_vcore;
1856
1857 cpu = vc->pcpu;
1858 if (vcpu) {
1859 if (vcpu->arch.timer_running) {
1860 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
1861 vcpu->arch.timer_running = 0;
1862 }
1863 cpu += vcpu->arch.ptid;
1864 vcpu->cpu = mvc->pcpu;
1865 vcpu->arch.thread_cpu = cpu;
1866 }
1867 tpaca = &paca[cpu];
1868 tpaca->kvm_hstate.kvm_vcpu = vcpu;
1869 tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
1870
1871 smp_wmb();
1872 tpaca->kvm_hstate.kvm_vcore = mvc;
1873 if (cpu != smp_processor_id())
1874 kvmppc_ipi_thread(cpu);
1875}
1876
1877static void kvmppc_wait_for_nap(void)
1878{
1879 int cpu = smp_processor_id();
1880 int i, loops;
1881
1882 for (loops = 0; loops < 1000000; ++loops) {
1883
1884
1885
1886
1887
1888
1889 for (i = 1; i < threads_per_subcore; ++i)
1890 if (paca[cpu + i].kvm_hstate.kvm_vcore)
1891 break;
1892 if (i == threads_per_subcore) {
1893 HMT_medium();
1894 return;
1895 }
1896 HMT_low();
1897 }
1898 HMT_medium();
1899 for (i = 1; i < threads_per_subcore; ++i)
1900 if (paca[cpu + i].kvm_hstate.kvm_vcore)
1901 pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
1902}
1903
1904
1905
1906
1907
1908
1909static int on_primary_thread(void)
1910{
1911 int cpu = smp_processor_id();
1912 int thr;
1913
1914
1915 if (cpu_thread_in_subcore(cpu))
1916 return 0;
1917
1918 thr = 0;
1919 while (++thr < threads_per_subcore)
1920 if (cpu_online(cpu + thr))
1921 return 0;
1922
1923
1924 for (thr = 1; thr < threads_per_subcore; ++thr) {
1925 if (kvmppc_grab_hwthread(cpu + thr)) {
1926
1927 do {
1928 kvmppc_release_hwthread(cpu + thr);
1929 } while (--thr > 0);
1930 return 0;
1931 }
1932 }
1933 return 1;
1934}
1935
1936
1937
1938
1939
1940
1941struct preempted_vcore_list {
1942 struct list_head list;
1943 spinlock_t lock;
1944};
1945
1946static DEFINE_PER_CPU(struct preempted_vcore_list, preempted_vcores);
1947
1948static void init_vcore_lists(void)
1949{
1950 int cpu;
1951
1952 for_each_possible_cpu(cpu) {
1953 struct preempted_vcore_list *lp = &per_cpu(preempted_vcores, cpu);
1954 spin_lock_init(&lp->lock);
1955 INIT_LIST_HEAD(&lp->list);
1956 }
1957}
1958
1959static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
1960{
1961 struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
1962
1963 vc->vcore_state = VCORE_PREEMPT;
1964 vc->pcpu = smp_processor_id();
1965 if (vc->num_threads < threads_per_subcore) {
1966 spin_lock(&lp->lock);
1967 list_add_tail(&vc->preempt_list, &lp->list);
1968 spin_unlock(&lp->lock);
1969 }
1970
1971
1972 kvmppc_core_start_stolen(vc);
1973}
1974
1975static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc)
1976{
1977 struct preempted_vcore_list *lp;
1978
1979 kvmppc_core_end_stolen(vc);
1980 if (!list_empty(&vc->preempt_list)) {
1981 lp = &per_cpu(preempted_vcores, vc->pcpu);
1982 spin_lock(&lp->lock);
1983 list_del_init(&vc->preempt_list);
1984 spin_unlock(&lp->lock);
1985 }
1986 vc->vcore_state = VCORE_INACTIVE;
1987}
1988
1989
1990
1991
1992
1993struct core_info {
1994 int n_subcores;
1995 int max_subcore_threads;
1996 int total_threads;
1997 int subcore_threads[MAX_SUBCORES];
1998 struct kvm *subcore_vm[MAX_SUBCORES];
1999 struct list_head vcs[MAX_SUBCORES];
2000};
2001
2002
2003
2004
2005
2006static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
2007
2008static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
2009{
2010 int sub;
2011
2012 memset(cip, 0, sizeof(*cip));
2013 cip->n_subcores = 1;
2014 cip->max_subcore_threads = vc->num_threads;
2015 cip->total_threads = vc->num_threads;
2016 cip->subcore_threads[0] = vc->num_threads;
2017 cip->subcore_vm[0] = vc->kvm;
2018 for (sub = 0; sub < MAX_SUBCORES; ++sub)
2019 INIT_LIST_HEAD(&cip->vcs[sub]);
2020 list_add_tail(&vc->preempt_list, &cip->vcs[0]);
2021}
2022
2023static bool subcore_config_ok(int n_subcores, int n_threads)
2024{
2025
2026 if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS)
2027 return false;
2028 if (n_subcores > MAX_SUBCORES)
2029 return false;
2030 if (n_subcores > 1) {
2031 if (!(dynamic_mt_modes & 2))
2032 n_subcores = 4;
2033 if (n_subcores > 2 && !(dynamic_mt_modes & 4))
2034 return false;
2035 }
2036
2037 return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
2038}
2039
2040static void init_master_vcore(struct kvmppc_vcore *vc)
2041{
2042 vc->master_vcore = vc;
2043 vc->entry_exit_map = 0;
2044 vc->in_guest = 0;
2045 vc->napping_threads = 0;
2046 vc->conferring_threads = 0;
2047}
2048
2049
2050
2051
2052
2053
2054static bool can_split_piggybacked_subcores(struct core_info *cip)
2055{
2056 int sub, new_sub;
2057 int large_sub = -1;
2058 int thr;
2059 int n_subcores = cip->n_subcores;
2060 struct kvmppc_vcore *vc, *vcnext;
2061 struct kvmppc_vcore *master_vc = NULL;
2062
2063 for (sub = 0; sub < cip->n_subcores; ++sub) {
2064 if (cip->subcore_threads[sub] <= 2)
2065 continue;
2066 if (large_sub >= 0)
2067 return false;
2068 large_sub = sub;
2069 vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
2070 preempt_list);
2071 if (vc->num_threads > 2)
2072 return false;
2073 n_subcores += (cip->subcore_threads[sub] - 1) >> 1;
2074 }
2075 if (large_sub < 0 || !subcore_config_ok(n_subcores + 1, 2))
2076 return false;
2077
2078
2079
2080
2081
2082
2083 new_sub = cip->n_subcores;
2084 thr = 0;
2085 sub = large_sub;
2086 list_for_each_entry_safe(vc, vcnext, &cip->vcs[sub], preempt_list) {
2087 if (thr >= 2) {
2088 list_del(&vc->preempt_list);
2089 list_add_tail(&vc->preempt_list, &cip->vcs[new_sub]);
2090
2091 if (++cip->subcore_threads[new_sub] == 1) {
2092 cip->subcore_vm[new_sub] = vc->kvm;
2093 init_master_vcore(vc);
2094 master_vc = vc;
2095 ++cip->n_subcores;
2096 } else {
2097 vc->master_vcore = master_vc;
2098 ++new_sub;
2099 }
2100 }
2101 thr += vc->num_threads;
2102 }
2103 cip->subcore_threads[large_sub] = 2;
2104 cip->max_subcore_threads = 2;
2105
2106 return true;
2107}
2108
2109static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
2110{
2111 int n_threads = vc->num_threads;
2112 int sub;
2113
2114 if (!cpu_has_feature(CPU_FTR_ARCH_207S))
2115 return false;
2116
2117 if (n_threads < cip->max_subcore_threads)
2118 n_threads = cip->max_subcore_threads;
2119 if (subcore_config_ok(cip->n_subcores + 1, n_threads)) {
2120 cip->max_subcore_threads = n_threads;
2121 } else if (cip->n_subcores <= 2 && cip->total_threads <= 6 &&
2122 vc->num_threads <= 2) {
2123
2124
2125
2126
2127
2128
2129
2130
2131 if (!can_split_piggybacked_subcores(cip))
2132 return false;
2133 } else {
2134 return false;
2135 }
2136
2137 sub = cip->n_subcores;
2138 ++cip->n_subcores;
2139 cip->total_threads += vc->num_threads;
2140 cip->subcore_threads[sub] = vc->num_threads;
2141 cip->subcore_vm[sub] = vc->kvm;
2142 init_master_vcore(vc);
2143 list_del(&vc->preempt_list);
2144 list_add_tail(&vc->preempt_list, &cip->vcs[sub]);
2145
2146 return true;
2147}
2148
2149static bool can_piggyback_subcore(struct kvmppc_vcore *pvc,
2150 struct core_info *cip, int sub)
2151{
2152 struct kvmppc_vcore *vc;
2153 int n_thr;
2154
2155 vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
2156 preempt_list);
2157
2158
2159 if (pvc->kvm != vc->kvm ||
2160 pvc->tb_offset != vc->tb_offset ||
2161 pvc->pcr != vc->pcr ||
2162 pvc->lpcr != vc->lpcr)
2163 return false;
2164
2165
2166 if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
2167 (vc->num_threads > 1 || pvc->num_threads > 1))
2168 return false;
2169
2170 n_thr = cip->subcore_threads[sub] + pvc->num_threads;
2171 if (n_thr > cip->max_subcore_threads) {
2172 if (!subcore_config_ok(cip->n_subcores, n_thr))
2173 return false;
2174 cip->max_subcore_threads = n_thr;
2175 }
2176
2177 cip->total_threads += pvc->num_threads;
2178 cip->subcore_threads[sub] = n_thr;
2179 pvc->master_vcore = vc;
2180 list_del(&pvc->preempt_list);
2181 list_add_tail(&pvc->preempt_list, &cip->vcs[sub]);
2182
2183 return true;
2184}
2185
2186
2187
2188
2189
2190static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
2191 int target_threads)
2192{
2193 int sub;
2194
2195 if (cip->total_threads + pvc->num_threads > target_threads)
2196 return false;
2197 for (sub = 0; sub < cip->n_subcores; ++sub)
2198 if (cip->subcore_threads[sub] &&
2199 can_piggyback_subcore(pvc, cip, sub))
2200 return true;
2201
2202 if (can_dynamic_split(pvc, cip))
2203 return true;
2204
2205 return false;
2206}
2207
2208static void prepare_threads(struct kvmppc_vcore *vc)
2209{
2210 struct kvm_vcpu *vcpu, *vnext;
2211
2212 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
2213 arch.run_list) {
2214 if (signal_pending(vcpu->arch.run_task))
2215 vcpu->arch.ret = -EINTR;
2216 else if (vcpu->arch.vpa.update_pending ||
2217 vcpu->arch.slb_shadow.update_pending ||
2218 vcpu->arch.dtl.update_pending)
2219 vcpu->arch.ret = RESUME_GUEST;
2220 else
2221 continue;
2222 kvmppc_remove_runnable(vc, vcpu);
2223 wake_up(&vcpu->arch.cpu_run);
2224 }
2225}
2226
2227static void collect_piggybacks(struct core_info *cip, int target_threads)
2228{
2229 struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
2230 struct kvmppc_vcore *pvc, *vcnext;
2231
2232 spin_lock(&lp->lock);
2233 list_for_each_entry_safe(pvc, vcnext, &lp->list, preempt_list) {
2234 if (!spin_trylock(&pvc->lock))
2235 continue;
2236 prepare_threads(pvc);
2237 if (!pvc->n_runnable) {
2238 list_del_init(&pvc->preempt_list);
2239 if (pvc->runner == NULL) {
2240 pvc->vcore_state = VCORE_INACTIVE;
2241 kvmppc_core_end_stolen(pvc);
2242 }
2243 spin_unlock(&pvc->lock);
2244 continue;
2245 }
2246 if (!can_piggyback(pvc, cip, target_threads)) {
2247 spin_unlock(&pvc->lock);
2248 continue;
2249 }
2250 kvmppc_core_end_stolen(pvc);
2251 pvc->vcore_state = VCORE_PIGGYBACK;
2252 if (cip->total_threads >= target_threads)
2253 break;
2254 }
2255 spin_unlock(&lp->lock);
2256}
2257
2258static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
2259{
2260 int still_running = 0;
2261 u64 now;
2262 long ret;
2263 struct kvm_vcpu *vcpu, *vnext;
2264
2265 spin_lock(&vc->lock);
2266 now = get_tb();
2267 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
2268 arch.run_list) {
2269
2270 if (now < vcpu->arch.dec_expires &&
2271 kvmppc_core_pending_dec(vcpu))
2272 kvmppc_core_dequeue_dec(vcpu);
2273
2274 trace_kvm_guest_exit(vcpu);
2275
2276 ret = RESUME_GUEST;
2277 if (vcpu->arch.trap)
2278 ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
2279 vcpu->arch.run_task);
2280
2281 vcpu->arch.ret = ret;
2282 vcpu->arch.trap = 0;
2283
2284 if (is_kvmppc_resume_guest(vcpu->arch.ret)) {
2285 if (vcpu->arch.pending_exceptions)
2286 kvmppc_core_prepare_to_enter(vcpu);
2287 if (vcpu->arch.ceded)
2288 kvmppc_set_timer(vcpu);
2289 else
2290 ++still_running;
2291 } else {
2292 kvmppc_remove_runnable(vc, vcpu);
2293 wake_up(&vcpu->arch.cpu_run);
2294 }
2295 }
2296 list_del_init(&vc->preempt_list);
2297 if (!is_master) {
2298 if (still_running > 0) {
2299 kvmppc_vcore_preempt(vc);
2300 } else if (vc->runner) {
2301 vc->vcore_state = VCORE_PREEMPT;
2302 kvmppc_core_start_stolen(vc);
2303 } else {
2304 vc->vcore_state = VCORE_INACTIVE;
2305 }
2306 if (vc->n_runnable > 0 && vc->runner == NULL) {
2307
2308 vcpu = list_first_entry(&vc->runnable_threads,
2309 struct kvm_vcpu, arch.run_list);
2310 wake_up(&vcpu->arch.cpu_run);
2311 }
2312 }
2313 spin_unlock(&vc->lock);
2314}
2315
2316
2317
2318
2319
2320
2321static inline void kvmppc_clear_host_core(int cpu)
2322{
2323 int core;
2324
2325 if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
2326 return;
2327
2328
2329
2330
2331
2332 core = cpu >> threads_shift;
2333 kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0;
2334}
2335
2336
2337
2338
2339
2340
2341static inline void kvmppc_set_host_core(int cpu)
2342{
2343 int core;
2344
2345 if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
2346 return;
2347
2348
2349
2350
2351
2352 core = cpu >> threads_shift;
2353 kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1;
2354}
2355
2356
2357
2358
2359
2360static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2361{
2362 struct kvm_vcpu *vcpu, *vnext;
2363 int i;
2364 int srcu_idx;
2365 struct core_info core_info;
2366 struct kvmppc_vcore *pvc, *vcnext;
2367 struct kvm_split_mode split_info, *sip;
2368 int split, subcore_size, active;
2369 int sub;
2370 bool thr0_done;
2371 unsigned long cmd_bit, stat_bit;
2372 int pcpu, thr;
2373 int target_threads;
2374
2375
2376
2377
2378
2379 prepare_threads(vc);
2380
2381
2382 if (vc->runner->arch.state != KVMPPC_VCPU_RUNNABLE)
2383 return;
2384
2385
2386
2387
2388 init_master_vcore(vc);
2389 vc->preempt_tb = TB_NIL;
2390
2391
2392
2393
2394
2395
2396 if ((threads_per_core > 1) &&
2397 ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
2398 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
2399 arch.run_list) {
2400 vcpu->arch.ret = -EBUSY;
2401 kvmppc_remove_runnable(vc, vcpu);
2402 wake_up(&vcpu->arch.cpu_run);
2403 }
2404 goto out;
2405 }
2406
2407
2408
2409
2410
2411 init_core_info(&core_info, vc);
2412 pcpu = smp_processor_id();
2413 target_threads = threads_per_subcore;
2414 if (target_smt_mode && target_smt_mode < target_threads)
2415 target_threads = target_smt_mode;
2416 if (vc->num_threads < target_threads)
2417 collect_piggybacks(&core_info, target_threads);
2418
2419
2420 subcore_size = threads_per_subcore;
2421 cmd_bit = stat_bit = 0;
2422 split = core_info.n_subcores;
2423 sip = NULL;
2424 if (split > 1) {
2425
2426 if (split == 2 && (dynamic_mt_modes & 2)) {
2427 cmd_bit = HID0_POWER8_1TO2LPAR;
2428 stat_bit = HID0_POWER8_2LPARMODE;
2429 } else {
2430 split = 4;
2431 cmd_bit = HID0_POWER8_1TO4LPAR;
2432 stat_bit = HID0_POWER8_4LPARMODE;
2433 }
2434 subcore_size = MAX_SMT_THREADS / split;
2435 sip = &split_info;
2436 memset(&split_info, 0, sizeof(split_info));
2437 split_info.rpr = mfspr(SPRN_RPR);
2438 split_info.pmmar = mfspr(SPRN_PMMAR);
2439 split_info.ldbar = mfspr(SPRN_LDBAR);
2440 split_info.subcore_size = subcore_size;
2441 for (sub = 0; sub < core_info.n_subcores; ++sub)
2442 split_info.master_vcs[sub] =
2443 list_first_entry(&core_info.vcs[sub],
2444 struct kvmppc_vcore, preempt_list);
2445
2446 smp_wmb();
2447 }
2448 pcpu = smp_processor_id();
2449 for (thr = 0; thr < threads_per_subcore; ++thr)
2450 paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
2451
2452
2453 if (cmd_bit) {
2454 unsigned long hid0 = mfspr(SPRN_HID0);
2455
2456 hid0 |= cmd_bit | HID0_POWER8_DYNLPARDIS;
2457 mb();
2458 mtspr(SPRN_HID0, hid0);
2459 isync();
2460 for (;;) {
2461 hid0 = mfspr(SPRN_HID0);
2462 if (hid0 & stat_bit)
2463 break;
2464 cpu_relax();
2465 }
2466 }
2467
2468 kvmppc_clear_host_core(pcpu);
2469
2470
2471 active = 0;
2472 for (sub = 0; sub < core_info.n_subcores; ++sub) {
2473 thr = subcore_thread_map[sub];
2474 thr0_done = false;
2475 active |= 1 << thr;
2476 list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
2477 pvc->pcpu = pcpu + thr;
2478 list_for_each_entry(vcpu, &pvc->runnable_threads,
2479 arch.run_list) {
2480 kvmppc_start_thread(vcpu, pvc);
2481 kvmppc_create_dtl_entry(vcpu, pvc);
2482 trace_kvm_guest_enter(vcpu);
2483 if (!vcpu->arch.ptid)
2484 thr0_done = true;
2485 active |= 1 << (thr + vcpu->arch.ptid);
2486 }
2487
2488
2489
2490
2491 if (pvc->master_vcore == pvc && !thr0_done)
2492 kvmppc_start_thread(NULL, pvc);
2493 thr += pvc->num_threads;
2494 }
2495 }
2496
2497
2498
2499
2500
2501 smp_mb();
2502 if (cmd_bit)
2503 split_info.do_nap = 1;
2504
2505
2506
2507
2508
2509
2510 if (split > 1)
2511 for (thr = 1; thr < threads_per_subcore; ++thr)
2512 if (!(active & (1 << thr)))
2513 kvmppc_ipi_thread(pcpu + thr);
2514
2515 vc->vcore_state = VCORE_RUNNING;
2516 preempt_disable();
2517
2518 trace_kvmppc_run_core(vc, 0);
2519
2520 for (sub = 0; sub < core_info.n_subcores; ++sub)
2521 list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
2522 spin_unlock(&pvc->lock);
2523
2524 kvm_guest_enter();
2525
2526 srcu_idx = srcu_read_lock(&vc->kvm->srcu);
2527
2528 __kvmppc_vcore_entry();
2529
2530 srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
2531
2532 spin_lock(&vc->lock);
2533
2534 vc->vcore_state = VCORE_EXITING;
2535
2536
2537 kvmppc_wait_for_nap();
2538
2539
2540 if (split > 1) {
2541 unsigned long hid0 = mfspr(SPRN_HID0);
2542 unsigned long loops = 0;
2543
2544 hid0 &= ~HID0_POWER8_DYNLPARDIS;
2545 stat_bit = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
2546 mb();
2547 mtspr(SPRN_HID0, hid0);
2548 isync();
2549 for (;;) {
2550 hid0 = mfspr(SPRN_HID0);
2551 if (!(hid0 & stat_bit))
2552 break;
2553 cpu_relax();
2554 ++loops;
2555 }
2556 split_info.do_nap = 0;
2557 }
2558
2559
2560 for (i = 0; i < threads_per_subcore; ++i) {
2561 kvmppc_release_hwthread(pcpu + i);
2562 if (sip && sip->napped[i])
2563 kvmppc_ipi_thread(pcpu + i);
2564 }
2565
2566 kvmppc_set_host_core(pcpu);
2567
2568 spin_unlock(&vc->lock);
2569
2570
2571 smp_mb();
2572 kvm_guest_exit();
2573
2574 for (sub = 0; sub < core_info.n_subcores; ++sub)
2575 list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
2576 preempt_list)
2577 post_guest_process(pvc, pvc == vc);
2578
2579 spin_lock(&vc->lock);
2580 preempt_enable();
2581
2582 out:
2583 vc->vcore_state = VCORE_INACTIVE;
2584 trace_kvmppc_run_core(vc, 1);
2585}
2586
2587
2588
2589
2590
2591static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
2592 struct kvm_vcpu *vcpu, int wait_state)
2593{
2594 DEFINE_WAIT(wait);
2595
2596 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
2597 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
2598 spin_unlock(&vc->lock);
2599 schedule();
2600 spin_lock(&vc->lock);
2601 }
2602 finish_wait(&vcpu->arch.cpu_run, &wait);
2603}
2604
2605
2606
2607
2608
2609static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
2610{
2611 struct kvm_vcpu *vcpu;
2612 int do_sleep = 1;
2613 DECLARE_SWAITQUEUE(wait);
2614
2615 prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
2616
2617
2618
2619
2620
2621 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
2622 if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) {
2623 do_sleep = 0;
2624 break;
2625 }
2626 }
2627
2628 if (!do_sleep) {
2629 finish_swait(&vc->wq, &wait);
2630 return;
2631 }
2632
2633 vc->vcore_state = VCORE_SLEEPING;
2634 trace_kvmppc_vcore_blocked(vc, 0);
2635 spin_unlock(&vc->lock);
2636 schedule();
2637 finish_swait(&vc->wq, &wait);
2638 spin_lock(&vc->lock);
2639 vc->vcore_state = VCORE_INACTIVE;
2640 trace_kvmppc_vcore_blocked(vc, 1);
2641}
2642
2643static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2644{
2645 int n_ceded;
2646 struct kvmppc_vcore *vc;
2647 struct kvm_vcpu *v, *vn;
2648
2649 trace_kvmppc_run_vcpu_enter(vcpu);
2650
2651 kvm_run->exit_reason = 0;
2652 vcpu->arch.ret = RESUME_GUEST;
2653 vcpu->arch.trap = 0;
2654 kvmppc_update_vpas(vcpu);
2655
2656
2657
2658
2659 vc = vcpu->arch.vcore;
2660 spin_lock(&vc->lock);
2661 vcpu->arch.ceded = 0;
2662 vcpu->arch.run_task = current;
2663 vcpu->arch.kvm_run = kvm_run;
2664 vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
2665 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
2666 vcpu->arch.busy_preempt = TB_NIL;
2667 list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
2668 ++vc->n_runnable;
2669
2670
2671
2672
2673
2674
2675 if (!signal_pending(current)) {
2676 if (vc->vcore_state == VCORE_PIGGYBACK) {
2677 struct kvmppc_vcore *mvc = vc->master_vcore;
2678 if (spin_trylock(&mvc->lock)) {
2679 if (mvc->vcore_state == VCORE_RUNNING &&
2680 !VCORE_IS_EXITING(mvc)) {
2681 kvmppc_create_dtl_entry(vcpu, vc);
2682 kvmppc_start_thread(vcpu, vc);
2683 trace_kvm_guest_enter(vcpu);
2684 }
2685 spin_unlock(&mvc->lock);
2686 }
2687 } else if (vc->vcore_state == VCORE_RUNNING &&
2688 !VCORE_IS_EXITING(vc)) {
2689 kvmppc_create_dtl_entry(vcpu, vc);
2690 kvmppc_start_thread(vcpu, vc);
2691 trace_kvm_guest_enter(vcpu);
2692 } else if (vc->vcore_state == VCORE_SLEEPING) {
2693 swake_up(&vc->wq);
2694 }
2695
2696 }
2697
2698 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
2699 !signal_pending(current)) {
2700 if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
2701 kvmppc_vcore_end_preempt(vc);
2702
2703 if (vc->vcore_state != VCORE_INACTIVE) {
2704 kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE);
2705 continue;
2706 }
2707 list_for_each_entry_safe(v, vn, &vc->runnable_threads,
2708 arch.run_list) {
2709 kvmppc_core_prepare_to_enter(v);
2710 if (signal_pending(v->arch.run_task)) {
2711 kvmppc_remove_runnable(vc, v);
2712 v->stat.signal_exits++;
2713 v->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
2714 v->arch.ret = -EINTR;
2715 wake_up(&v->arch.cpu_run);
2716 }
2717 }
2718 if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
2719 break;
2720 n_ceded = 0;
2721 list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
2722 if (!v->arch.pending_exceptions)
2723 n_ceded += v->arch.ceded;
2724 else
2725 v->arch.ceded = 0;
2726 }
2727 vc->runner = vcpu;
2728 if (n_ceded == vc->n_runnable) {
2729 kvmppc_vcore_blocked(vc);
2730 } else if (need_resched()) {
2731 kvmppc_vcore_preempt(vc);
2732
2733 cond_resched_lock(&vc->lock);
2734 if (vc->vcore_state == VCORE_PREEMPT)
2735 kvmppc_vcore_end_preempt(vc);
2736 } else {
2737 kvmppc_run_core(vc);
2738 }
2739 vc->runner = NULL;
2740 }
2741
2742 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
2743 (vc->vcore_state == VCORE_RUNNING ||
2744 vc->vcore_state == VCORE_EXITING ||
2745 vc->vcore_state == VCORE_PIGGYBACK))
2746 kvmppc_wait_for_exec(vc, vcpu, TASK_UNINTERRUPTIBLE);
2747
2748 if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
2749 kvmppc_vcore_end_preempt(vc);
2750
2751 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
2752 kvmppc_remove_runnable(vc, vcpu);
2753 vcpu->stat.signal_exits++;
2754 kvm_run->exit_reason = KVM_EXIT_INTR;
2755 vcpu->arch.ret = -EINTR;
2756 }
2757
2758 if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) {
2759
2760 v = list_first_entry(&vc->runnable_threads,
2761 struct kvm_vcpu, arch.run_list);
2762 wake_up(&v->arch.cpu_run);
2763 }
2764
2765 trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
2766 spin_unlock(&vc->lock);
2767 return vcpu->arch.ret;
2768}
2769
2770static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
2771{
2772 int r;
2773 int srcu_idx;
2774
2775 if (!vcpu->arch.sane) {
2776 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
2777 return -EINVAL;
2778 }
2779
2780 kvmppc_core_prepare_to_enter(vcpu);
2781
2782
2783 if (signal_pending(current)) {
2784 run->exit_reason = KVM_EXIT_INTR;
2785 return -EINTR;
2786 }
2787
2788 atomic_inc(&vcpu->kvm->arch.vcpus_running);
2789
2790 smp_mb();
2791
2792
2793 if (!vcpu->kvm->arch.hpte_setup_done) {
2794 r = kvmppc_hv_setup_htab_rma(vcpu);
2795 if (r)
2796 goto out;
2797 }
2798
2799 flush_all_to_thread(current);
2800
2801 vcpu->arch.wqp = &vcpu->arch.vcore->wq;
2802 vcpu->arch.pgdir = current->mm->pgd;
2803 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
2804
2805 do {
2806 r = kvmppc_run_vcpu(run, vcpu);
2807
2808 if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
2809 !(vcpu->arch.shregs.msr & MSR_PR)) {
2810 trace_kvm_hcall_enter(vcpu);
2811 r = kvmppc_pseries_do_hcall(vcpu);
2812 trace_kvm_hcall_exit(vcpu, r);
2813 kvmppc_core_prepare_to_enter(vcpu);
2814 } else if (r == RESUME_PAGE_FAULT) {
2815 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2816 r = kvmppc_book3s_hv_page_fault(run, vcpu,
2817 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
2818 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2819 }
2820 } while (is_kvmppc_resume_guest(r));
2821
2822 out:
2823 vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
2824 atomic_dec(&vcpu->kvm->arch.vcpus_running);
2825 return r;
2826}
2827
2828static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
2829 int linux_psize)
2830{
2831 struct mmu_psize_def *def = &mmu_psize_defs[linux_psize];
2832
2833 if (!def->shift)
2834 return;
2835 (*sps)->page_shift = def->shift;
2836 (*sps)->slb_enc = def->sllp;
2837 (*sps)->enc[0].page_shift = def->shift;
2838 (*sps)->enc[0].pte_enc = def->penc[linux_psize];
2839
2840
2841
2842 if (linux_psize != MMU_PAGE_16M && def->penc[MMU_PAGE_16M] != -1) {
2843 (*sps)->enc[1].page_shift = 24;
2844 (*sps)->enc[1].pte_enc = def->penc[MMU_PAGE_16M];
2845 }
2846 (*sps)++;
2847}
2848
2849static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
2850 struct kvm_ppc_smmu_info *info)
2851{
2852 struct kvm_ppc_one_seg_page_size *sps;
2853
2854 info->flags = KVM_PPC_PAGE_SIZES_REAL;
2855 if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
2856 info->flags |= KVM_PPC_1T_SEGMENTS;
2857 info->slb_size = mmu_slb_size;
2858
2859
2860 sps = &info->sps[0];
2861 kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K);
2862 kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K);
2863 kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M);
2864
2865 return 0;
2866}
2867
2868
2869
2870
2871static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
2872 struct kvm_dirty_log *log)
2873{
2874 struct kvm_memslots *slots;
2875 struct kvm_memory_slot *memslot;
2876 int r;
2877 unsigned long n;
2878
2879 mutex_lock(&kvm->slots_lock);
2880
2881 r = -EINVAL;
2882 if (log->slot >= KVM_USER_MEM_SLOTS)
2883 goto out;
2884
2885 slots = kvm_memslots(kvm);
2886 memslot = id_to_memslot(slots, log->slot);
2887 r = -ENOENT;
2888 if (!memslot->dirty_bitmap)
2889 goto out;
2890
2891 n = kvm_dirty_bitmap_bytes(memslot);
2892 memset(memslot->dirty_bitmap, 0, n);
2893
2894 r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap);
2895 if (r)
2896 goto out;
2897
2898 r = -EFAULT;
2899 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
2900 goto out;
2901
2902 r = 0;
2903out:
2904 mutex_unlock(&kvm->slots_lock);
2905 return r;
2906}
2907
2908static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
2909 struct kvm_memory_slot *dont)
2910{
2911 if (!dont || free->arch.rmap != dont->arch.rmap) {
2912 vfree(free->arch.rmap);
2913 free->arch.rmap = NULL;
2914 }
2915}
2916
2917static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
2918 unsigned long npages)
2919{
2920 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
2921 if (!slot->arch.rmap)
2922 return -ENOMEM;
2923
2924 return 0;
2925}
2926
2927static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
2928 struct kvm_memory_slot *memslot,
2929 const struct kvm_userspace_memory_region *mem)
2930{
2931 return 0;
2932}
2933
2934static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
2935 const struct kvm_userspace_memory_region *mem,
2936 const struct kvm_memory_slot *old,
2937 const struct kvm_memory_slot *new)
2938{
2939 unsigned long npages = mem->memory_size >> PAGE_SHIFT;
2940 struct kvm_memslots *slots;
2941 struct kvm_memory_slot *memslot;
2942
2943 if (npages && old->npages) {
2944
2945
2946
2947
2948
2949
2950 slots = kvm_memslots(kvm);
2951 memslot = id_to_memslot(slots, mem->slot);
2952 kvmppc_hv_get_dirty_log(kvm, memslot, NULL);
2953 }
2954}
2955
2956
2957
2958
2959
2960void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
2961{
2962 long int i;
2963 u32 cores_done = 0;
2964
2965 if ((kvm->arch.lpcr & mask) == lpcr)
2966 return;
2967
2968 kvm->arch.lpcr = (kvm->arch.lpcr & ~mask) | lpcr;
2969
2970 for (i = 0; i < KVM_MAX_VCORES; ++i) {
2971 struct kvmppc_vcore *vc = kvm->arch.vcores[i];
2972 if (!vc)
2973 continue;
2974 spin_lock(&vc->lock);
2975 vc->lpcr = (vc->lpcr & ~mask) | lpcr;
2976 spin_unlock(&vc->lock);
2977 if (++cores_done >= kvm->arch.online_vcores)
2978 break;
2979 }
2980}
2981
2982static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu)
2983{
2984 return;
2985}
2986
2987static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
2988{
2989 int err = 0;
2990 struct kvm *kvm = vcpu->kvm;
2991 unsigned long hva;
2992 struct kvm_memory_slot *memslot;
2993 struct vm_area_struct *vma;
2994 unsigned long lpcr = 0, senc;
2995 unsigned long psize, porder;
2996 int srcu_idx;
2997
2998 mutex_lock(&kvm->lock);
2999 if (kvm->arch.hpte_setup_done)
3000 goto out;
3001
3002
3003 if (!kvm->arch.hpt_virt) {
3004 err = kvmppc_alloc_hpt(kvm, NULL);
3005 if (err) {
3006 pr_err("KVM: Couldn't alloc HPT\n");
3007 goto out;
3008 }
3009 }
3010
3011
3012 srcu_idx = srcu_read_lock(&kvm->srcu);
3013 memslot = gfn_to_memslot(kvm, 0);
3014
3015
3016 err = -EINVAL;
3017 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
3018 goto out_srcu;
3019
3020
3021 hva = memslot->userspace_addr;
3022 down_read(¤t->mm->mmap_sem);
3023 vma = find_vma(current->mm, hva);
3024 if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
3025 goto up_out;
3026
3027 psize = vma_kernel_pagesize(vma);
3028 porder = __ilog2(psize);
3029
3030 up_read(¤t->mm->mmap_sem);
3031
3032
3033 err = -EINVAL;
3034 if (!(psize == 0x1000 || psize == 0x10000 ||
3035 psize == 0x1000000))
3036 goto out_srcu;
3037
3038
3039 senc = slb_pgsize_encoding(psize);
3040 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
3041 (VRMA_VSID << SLB_VSID_SHIFT_1T);
3042
3043 lpcr = senc << (LPCR_VRMASD_SH - 4);
3044
3045
3046 kvmppc_map_vrma(vcpu, memslot, porder);
3047
3048 kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
3049
3050
3051 smp_wmb();
3052 kvm->arch.hpte_setup_done = 1;
3053 err = 0;
3054 out_srcu:
3055 srcu_read_unlock(&kvm->srcu, srcu_idx);
3056 out:
3057 mutex_unlock(&kvm->lock);
3058 return err;
3059
3060 up_out:
3061 up_read(¤t->mm->mmap_sem);
3062 goto out_srcu;
3063}
3064
3065#ifdef CONFIG_KVM_XICS
3066static int kvmppc_cpu_notify(struct notifier_block *self, unsigned long action,
3067 void *hcpu)
3068{
3069 unsigned long cpu = (long)hcpu;
3070
3071 switch (action) {
3072 case CPU_UP_PREPARE:
3073 case CPU_UP_PREPARE_FROZEN:
3074 kvmppc_set_host_core(cpu);
3075 break;
3076
3077#ifdef CONFIG_HOTPLUG_CPU
3078 case CPU_DEAD:
3079 case CPU_DEAD_FROZEN:
3080 case CPU_UP_CANCELED:
3081 case CPU_UP_CANCELED_FROZEN:
3082 kvmppc_clear_host_core(cpu);
3083 break;
3084#endif
3085 default:
3086 break;
3087 }
3088
3089 return NOTIFY_OK;
3090}
3091
3092static struct notifier_block kvmppc_cpu_notifier = {
3093 .notifier_call = kvmppc_cpu_notify,
3094};
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106void kvmppc_alloc_host_rm_ops(void)
3107{
3108 struct kvmppc_host_rm_ops *ops;
3109 unsigned long l_ops;
3110 int cpu, core;
3111 int size;
3112
3113
3114 if (kvmppc_host_rm_ops_hv != NULL)
3115 return;
3116
3117 ops = kzalloc(sizeof(struct kvmppc_host_rm_ops), GFP_KERNEL);
3118 if (!ops)
3119 return;
3120
3121 size = cpu_nr_cores() * sizeof(struct kvmppc_host_rm_core);
3122 ops->rm_core = kzalloc(size, GFP_KERNEL);
3123
3124 if (!ops->rm_core) {
3125 kfree(ops);
3126 return;
3127 }
3128
3129 get_online_cpus();
3130
3131 for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
3132 if (!cpu_online(cpu))
3133 continue;
3134
3135 core = cpu >> threads_shift;
3136 ops->rm_core[core].rm_state.in_host = 1;
3137 }
3138
3139 ops->vcpu_kick = kvmppc_fast_vcpu_kick_hv;
3140
3141
3142
3143
3144
3145
3146
3147 smp_wmb();
3148 l_ops = (unsigned long) ops;
3149
3150 if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
3151 put_online_cpus();
3152 kfree(ops->rm_core);
3153 kfree(ops);
3154 return;
3155 }
3156
3157 register_cpu_notifier(&kvmppc_cpu_notifier);
3158
3159 put_online_cpus();
3160}
3161
3162void kvmppc_free_host_rm_ops(void)
3163{
3164 if (kvmppc_host_rm_ops_hv) {
3165 unregister_cpu_notifier(&kvmppc_cpu_notifier);
3166 kfree(kvmppc_host_rm_ops_hv->rm_core);
3167 kfree(kvmppc_host_rm_ops_hv);
3168 kvmppc_host_rm_ops_hv = NULL;
3169 }
3170}
3171#endif
3172
3173static int kvmppc_core_init_vm_hv(struct kvm *kvm)
3174{
3175 unsigned long lpcr, lpid;
3176 char buf[32];
3177
3178
3179
3180 lpid = kvmppc_alloc_lpid();
3181 if ((long)lpid < 0)
3182 return -ENOMEM;
3183 kvm->arch.lpid = lpid;
3184
3185 kvmppc_alloc_host_rm_ops();
3186
3187
3188
3189
3190
3191
3192 cpumask_setall(&kvm->arch.need_tlb_flush);
3193
3194
3195 memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
3196 sizeof(kvm->arch.enabled_hcalls));
3197
3198 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
3199
3200
3201 kvm->arch.host_lpid = mfspr(SPRN_LPID);
3202 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
3203 lpcr &= LPCR_PECE | LPCR_LPES;
3204 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
3205 LPCR_VPM0 | LPCR_VPM1;
3206 kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
3207 (VRMA_VSID << SLB_VSID_SHIFT_1T);
3208
3209 if (cpu_has_feature(CPU_FTR_ARCH_207S))
3210 lpcr |= LPCR_ONL;
3211 kvm->arch.lpcr = lpcr;
3212
3213
3214
3215
3216
3217 kvm_hv_vm_activated();
3218
3219
3220
3221
3222 snprintf(buf, sizeof(buf), "vm%d", current->pid);
3223 kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir);
3224 if (!IS_ERR_OR_NULL(kvm->arch.debugfs_dir))
3225 kvmppc_mmu_debugfs_init(kvm);
3226
3227 return 0;
3228}
3229
3230static void kvmppc_free_vcores(struct kvm *kvm)
3231{
3232 long int i;
3233
3234 for (i = 0; i < KVM_MAX_VCORES; ++i)
3235 kfree(kvm->arch.vcores[i]);
3236 kvm->arch.online_vcores = 0;
3237}
3238
3239static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
3240{
3241 debugfs_remove_recursive(kvm->arch.debugfs_dir);
3242
3243 kvm_hv_vm_deactivated();
3244
3245 kvmppc_free_vcores(kvm);
3246
3247 kvmppc_free_hpt(kvm);
3248}
3249
3250
3251static int kvmppc_core_emulate_op_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
3252 unsigned int inst, int *advance)
3253{
3254 return EMULATE_FAIL;
3255}
3256
3257static int kvmppc_core_emulate_mtspr_hv(struct kvm_vcpu *vcpu, int sprn,
3258 ulong spr_val)
3259{
3260 return EMULATE_FAIL;
3261}
3262
3263static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
3264 ulong *spr_val)
3265{
3266 return EMULATE_FAIL;
3267}
3268
3269static int kvmppc_core_check_processor_compat_hv(void)
3270{
3271 if (!cpu_has_feature(CPU_FTR_HVMODE) ||
3272 !cpu_has_feature(CPU_FTR_ARCH_206))
3273 return -EIO;
3274 return 0;
3275}
3276
3277static long kvm_arch_vm_ioctl_hv(struct file *filp,
3278 unsigned int ioctl, unsigned long arg)
3279{
3280 struct kvm *kvm __maybe_unused = filp->private_data;
3281 void __user *argp = (void __user *)arg;
3282 long r;
3283
3284 switch (ioctl) {
3285
3286 case KVM_PPC_ALLOCATE_HTAB: {
3287 u32 htab_order;
3288
3289 r = -EFAULT;
3290 if (get_user(htab_order, (u32 __user *)argp))
3291 break;
3292 r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
3293 if (r)
3294 break;
3295 r = -EFAULT;
3296 if (put_user(htab_order, (u32 __user *)argp))
3297 break;
3298 r = 0;
3299 break;
3300 }
3301
3302 case KVM_PPC_GET_HTAB_FD: {
3303 struct kvm_get_htab_fd ghf;
3304
3305 r = -EFAULT;
3306 if (copy_from_user(&ghf, argp, sizeof(ghf)))
3307 break;
3308 r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf);
3309 break;
3310 }
3311
3312 default:
3313 r = -ENOTTY;
3314 }
3315
3316 return r;
3317}
3318
3319
3320
3321
3322
3323
3324
3325static unsigned int default_hcall_list[] = {
3326 H_REMOVE,
3327 H_ENTER,
3328 H_READ,
3329 H_PROTECT,
3330 H_BULK_REMOVE,
3331 H_GET_TCE,
3332 H_PUT_TCE,
3333 H_SET_DABR,
3334 H_SET_XDABR,
3335 H_CEDE,
3336 H_PROD,
3337 H_CONFER,
3338 H_REGISTER_VPA,
3339#ifdef CONFIG_KVM_XICS
3340 H_EOI,
3341 H_CPPR,
3342 H_IPI,
3343 H_IPOLL,
3344 H_XIRR,
3345 H_XIRR_X,
3346#endif
3347 0
3348};
3349
3350static void init_default_hcalls(void)
3351{
3352 int i;
3353 unsigned int hcall;
3354
3355 for (i = 0; default_hcall_list[i]; ++i) {
3356 hcall = default_hcall_list[i];
3357 WARN_ON(!kvmppc_hcall_impl_hv(hcall));
3358 __set_bit(hcall / 4, default_enabled_hcalls);
3359 }
3360}
3361
3362static struct kvmppc_ops kvm_ops_hv = {
3363 .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
3364 .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
3365 .get_one_reg = kvmppc_get_one_reg_hv,
3366 .set_one_reg = kvmppc_set_one_reg_hv,
3367 .vcpu_load = kvmppc_core_vcpu_load_hv,
3368 .vcpu_put = kvmppc_core_vcpu_put_hv,
3369 .set_msr = kvmppc_set_msr_hv,
3370 .vcpu_run = kvmppc_vcpu_run_hv,
3371 .vcpu_create = kvmppc_core_vcpu_create_hv,
3372 .vcpu_free = kvmppc_core_vcpu_free_hv,
3373 .check_requests = kvmppc_core_check_requests_hv,
3374 .get_dirty_log = kvm_vm_ioctl_get_dirty_log_hv,
3375 .flush_memslot = kvmppc_core_flush_memslot_hv,
3376 .prepare_memory_region = kvmppc_core_prepare_memory_region_hv,
3377 .commit_memory_region = kvmppc_core_commit_memory_region_hv,
3378 .unmap_hva = kvm_unmap_hva_hv,
3379 .unmap_hva_range = kvm_unmap_hva_range_hv,
3380 .age_hva = kvm_age_hva_hv,
3381 .test_age_hva = kvm_test_age_hva_hv,
3382 .set_spte_hva = kvm_set_spte_hva_hv,
3383 .mmu_destroy = kvmppc_mmu_destroy_hv,
3384 .free_memslot = kvmppc_core_free_memslot_hv,
3385 .create_memslot = kvmppc_core_create_memslot_hv,
3386 .init_vm = kvmppc_core_init_vm_hv,
3387 .destroy_vm = kvmppc_core_destroy_vm_hv,
3388 .get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv,
3389 .emulate_op = kvmppc_core_emulate_op_hv,
3390 .emulate_mtspr = kvmppc_core_emulate_mtspr_hv,
3391 .emulate_mfspr = kvmppc_core_emulate_mfspr_hv,
3392 .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
3393 .arch_vm_ioctl = kvm_arch_vm_ioctl_hv,
3394 .hcall_implemented = kvmppc_hcall_impl_hv,
3395};
3396
3397static int kvmppc_book3s_init_hv(void)
3398{
3399 int r;
3400
3401
3402
3403 r = kvmppc_core_check_processor_compat_hv();
3404 if (r < 0)
3405 return -ENODEV;
3406
3407 kvm_ops_hv.owner = THIS_MODULE;
3408 kvmppc_hv_ops = &kvm_ops_hv;
3409
3410 init_default_hcalls();
3411
3412 init_vcore_lists();
3413
3414 r = kvmppc_mmu_hv_init();
3415 return r;
3416}
3417
3418static void kvmppc_book3s_exit_hv(void)
3419{
3420 kvmppc_free_host_rm_ops();
3421 kvmppc_hv_ops = NULL;
3422}
3423
3424module_init(kvmppc_book3s_init_hv);
3425module_exit(kvmppc_book3s_exit_hv);
3426MODULE_LICENSE("GPL");
3427MODULE_ALIAS_MISCDEV(KVM_MINOR);
3428MODULE_ALIAS("devname:kvm");
3429