1
2
3
4
5
6
7
8
9
10
11#include <linux/kernel.h>
12#include <linux/kvm_host.h>
13#include <linux/llist.h>
14
15#include <asm/kvm_ppc.h>
16#include <asm/kvm_book3s.h>
17#include <asm/mmu.h>
18#include <asm/pgtable.h>
19#include <asm/pgalloc.h>
20#include <asm/pte-walk.h>
21#include <asm/reg.h>
22
23static struct patb_entry *pseries_partition_tb;
24
25static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp);
26static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free);
27
28void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
29{
30 struct kvmppc_vcore *vc = vcpu->arch.vcore;
31
32 hr->pcr = vc->pcr;
33 hr->dpdes = vc->dpdes;
34 hr->hfscr = vcpu->arch.hfscr;
35 hr->tb_offset = vc->tb_offset;
36 hr->dawr0 = vcpu->arch.dawr;
37 hr->dawrx0 = vcpu->arch.dawrx;
38 hr->ciabr = vcpu->arch.ciabr;
39 hr->purr = vcpu->arch.purr;
40 hr->spurr = vcpu->arch.spurr;
41 hr->ic = vcpu->arch.ic;
42 hr->vtb = vc->vtb;
43 hr->srr0 = vcpu->arch.shregs.srr0;
44 hr->srr1 = vcpu->arch.shregs.srr1;
45 hr->sprg[0] = vcpu->arch.shregs.sprg0;
46 hr->sprg[1] = vcpu->arch.shregs.sprg1;
47 hr->sprg[2] = vcpu->arch.shregs.sprg2;
48 hr->sprg[3] = vcpu->arch.shregs.sprg3;
49 hr->pidr = vcpu->arch.pid;
50 hr->cfar = vcpu->arch.cfar;
51 hr->ppr = vcpu->arch.ppr;
52}
53
54static void byteswap_pt_regs(struct pt_regs *regs)
55{
56 unsigned long *addr = (unsigned long *) regs;
57
58 for (; addr < ((unsigned long *) (regs + 1)); addr++)
59 *addr = swab64(*addr);
60}
61
62static void byteswap_hv_regs(struct hv_guest_state *hr)
63{
64 hr->version = swab64(hr->version);
65 hr->lpid = swab32(hr->lpid);
66 hr->vcpu_token = swab32(hr->vcpu_token);
67 hr->lpcr = swab64(hr->lpcr);
68 hr->pcr = swab64(hr->pcr);
69 hr->amor = swab64(hr->amor);
70 hr->dpdes = swab64(hr->dpdes);
71 hr->hfscr = swab64(hr->hfscr);
72 hr->tb_offset = swab64(hr->tb_offset);
73 hr->dawr0 = swab64(hr->dawr0);
74 hr->dawrx0 = swab64(hr->dawrx0);
75 hr->ciabr = swab64(hr->ciabr);
76 hr->hdec_expiry = swab64(hr->hdec_expiry);
77 hr->purr = swab64(hr->purr);
78 hr->spurr = swab64(hr->spurr);
79 hr->ic = swab64(hr->ic);
80 hr->vtb = swab64(hr->vtb);
81 hr->hdar = swab64(hr->hdar);
82 hr->hdsisr = swab64(hr->hdsisr);
83 hr->heir = swab64(hr->heir);
84 hr->asdr = swab64(hr->asdr);
85 hr->srr0 = swab64(hr->srr0);
86 hr->srr1 = swab64(hr->srr1);
87 hr->sprg[0] = swab64(hr->sprg[0]);
88 hr->sprg[1] = swab64(hr->sprg[1]);
89 hr->sprg[2] = swab64(hr->sprg[2]);
90 hr->sprg[3] = swab64(hr->sprg[3]);
91 hr->pidr = swab64(hr->pidr);
92 hr->cfar = swab64(hr->cfar);
93 hr->ppr = swab64(hr->ppr);
94}
95
96static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
97 struct hv_guest_state *hr)
98{
99 struct kvmppc_vcore *vc = vcpu->arch.vcore;
100
101 hr->dpdes = vc->dpdes;
102 hr->hfscr = vcpu->arch.hfscr;
103 hr->purr = vcpu->arch.purr;
104 hr->spurr = vcpu->arch.spurr;
105 hr->ic = vcpu->arch.ic;
106 hr->vtb = vc->vtb;
107 hr->srr0 = vcpu->arch.shregs.srr0;
108 hr->srr1 = vcpu->arch.shregs.srr1;
109 hr->sprg[0] = vcpu->arch.shregs.sprg0;
110 hr->sprg[1] = vcpu->arch.shregs.sprg1;
111 hr->sprg[2] = vcpu->arch.shregs.sprg2;
112 hr->sprg[3] = vcpu->arch.shregs.sprg3;
113 hr->pidr = vcpu->arch.pid;
114 hr->cfar = vcpu->arch.cfar;
115 hr->ppr = vcpu->arch.ppr;
116 switch (trap) {
117 case BOOK3S_INTERRUPT_H_DATA_STORAGE:
118 hr->hdar = vcpu->arch.fault_dar;
119 hr->hdsisr = vcpu->arch.fault_dsisr;
120 hr->asdr = vcpu->arch.fault_gpa;
121 break;
122 case BOOK3S_INTERRUPT_H_INST_STORAGE:
123 hr->asdr = vcpu->arch.fault_gpa;
124 break;
125 case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
126 hr->heir = vcpu->arch.emul_inst;
127 break;
128 }
129}
130
131static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
132{
133
134
135
136
137 hr->hfscr &= (HFSCR_INTR_CAUSE | vcpu->arch.hfscr);
138
139
140 hr->dawrx0 &= ~DAWRX_HYP;
141
142
143 if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
144 hr->ciabr &= ~CIABR_PRIV;
145}
146
147static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
148{
149 struct kvmppc_vcore *vc = vcpu->arch.vcore;
150
151 vc->pcr = hr->pcr;
152 vc->dpdes = hr->dpdes;
153 vcpu->arch.hfscr = hr->hfscr;
154 vcpu->arch.dawr = hr->dawr0;
155 vcpu->arch.dawrx = hr->dawrx0;
156 vcpu->arch.ciabr = hr->ciabr;
157 vcpu->arch.purr = hr->purr;
158 vcpu->arch.spurr = hr->spurr;
159 vcpu->arch.ic = hr->ic;
160 vc->vtb = hr->vtb;
161 vcpu->arch.shregs.srr0 = hr->srr0;
162 vcpu->arch.shregs.srr1 = hr->srr1;
163 vcpu->arch.shregs.sprg0 = hr->sprg[0];
164 vcpu->arch.shregs.sprg1 = hr->sprg[1];
165 vcpu->arch.shregs.sprg2 = hr->sprg[2];
166 vcpu->arch.shregs.sprg3 = hr->sprg[3];
167 vcpu->arch.pid = hr->pidr;
168 vcpu->arch.cfar = hr->cfar;
169 vcpu->arch.ppr = hr->ppr;
170}
171
172void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
173 struct hv_guest_state *hr)
174{
175 struct kvmppc_vcore *vc = vcpu->arch.vcore;
176
177 vc->dpdes = hr->dpdes;
178 vcpu->arch.hfscr = hr->hfscr;
179 vcpu->arch.purr = hr->purr;
180 vcpu->arch.spurr = hr->spurr;
181 vcpu->arch.ic = hr->ic;
182 vc->vtb = hr->vtb;
183 vcpu->arch.fault_dar = hr->hdar;
184 vcpu->arch.fault_dsisr = hr->hdsisr;
185 vcpu->arch.fault_gpa = hr->asdr;
186 vcpu->arch.emul_inst = hr->heir;
187 vcpu->arch.shregs.srr0 = hr->srr0;
188 vcpu->arch.shregs.srr1 = hr->srr1;
189 vcpu->arch.shregs.sprg0 = hr->sprg[0];
190 vcpu->arch.shregs.sprg1 = hr->sprg[1];
191 vcpu->arch.shregs.sprg2 = hr->sprg[2];
192 vcpu->arch.shregs.sprg3 = hr->sprg[3];
193 vcpu->arch.pid = hr->pidr;
194 vcpu->arch.cfar = hr->cfar;
195 vcpu->arch.ppr = hr->ppr;
196}
197
198long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
199{
200 long int err, r;
201 struct kvm_nested_guest *l2;
202 struct pt_regs l2_regs, saved_l1_regs;
203 struct hv_guest_state l2_hv, saved_l1_hv;
204 struct kvmppc_vcore *vc = vcpu->arch.vcore;
205 u64 hv_ptr, regs_ptr;
206 u64 hdec_exp;
207 s64 delta_purr, delta_spurr, delta_ic, delta_vtb;
208 u64 mask;
209 unsigned long lpcr;
210
211 if (vcpu->kvm->arch.l1_ptcr == 0)
212 return H_NOT_AVAILABLE;
213
214
215 hv_ptr = kvmppc_get_gpr(vcpu, 4);
216 err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv,
217 sizeof(struct hv_guest_state));
218 if (err)
219 return H_PARAMETER;
220 if (kvmppc_need_byteswap(vcpu))
221 byteswap_hv_regs(&l2_hv);
222 if (l2_hv.version != HV_GUEST_STATE_VERSION)
223 return H_P2;
224
225 regs_ptr = kvmppc_get_gpr(vcpu, 5);
226 err = kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs,
227 sizeof(struct pt_regs));
228 if (err)
229 return H_PARAMETER;
230 if (kvmppc_need_byteswap(vcpu))
231 byteswap_pt_regs(&l2_regs);
232 if (l2_hv.vcpu_token >= NR_CPUS)
233 return H_PARAMETER;
234
235
236 l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true);
237 if (!l2)
238 return H_PARAMETER;
239 if (!l2->l1_gr_to_hr) {
240 mutex_lock(&l2->tlb_lock);
241 kvmhv_update_ptbl_cache(l2);
242 mutex_unlock(&l2->tlb_lock);
243 }
244
245
246 vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
247 saved_l1_regs = vcpu->arch.regs;
248 kvmhv_save_hv_regs(vcpu, &saved_l1_hv);
249
250
251 hdec_exp = l2_hv.hdec_expiry - vc->tb_offset;
252 vc->tb_offset += l2_hv.tb_offset;
253
254
255 vcpu->arch.nested = l2;
256 vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
257 vcpu->arch.regs = l2_regs;
258 vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
259 mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
260 LPCR_LPES | LPCR_MER;
261 lpcr = (vc->lpcr & ~mask) | (l2_hv.lpcr & mask);
262 sanitise_hv_regs(vcpu, &l2_hv);
263 restore_hv_regs(vcpu, &l2_hv);
264
265 vcpu->arch.ret = RESUME_GUEST;
266 vcpu->arch.trap = 0;
267 do {
268 if (mftb() >= hdec_exp) {
269 vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
270 r = RESUME_HOST;
271 break;
272 }
273 r = kvmhv_run_single_vcpu(vcpu->arch.kvm_run, vcpu, hdec_exp,
274 lpcr);
275 } while (is_kvmppc_resume_guest(r));
276
277
278 l2_regs = vcpu->arch.regs;
279 l2_regs.msr = vcpu->arch.shregs.msr;
280 delta_purr = vcpu->arch.purr - l2_hv.purr;
281 delta_spurr = vcpu->arch.spurr - l2_hv.spurr;
282 delta_ic = vcpu->arch.ic - l2_hv.ic;
283 delta_vtb = vc->vtb - l2_hv.vtb;
284 save_hv_return_state(vcpu, vcpu->arch.trap, &l2_hv);
285
286
287 vcpu->arch.nested = NULL;
288 vcpu->arch.regs = saved_l1_regs;
289 vcpu->arch.shregs.msr = saved_l1_regs.msr & ~MSR_TS_MASK;
290
291 if (l2_regs.msr & MSR_TS_MASK)
292 vcpu->arch.shregs.msr |= MSR_TS_S;
293 vc->tb_offset = saved_l1_hv.tb_offset;
294 restore_hv_regs(vcpu, &saved_l1_hv);
295 vcpu->arch.purr += delta_purr;
296 vcpu->arch.spurr += delta_spurr;
297 vcpu->arch.ic += delta_ic;
298 vc->vtb += delta_vtb;
299
300 kvmhv_put_nested(l2);
301
302
303 if (kvmppc_need_byteswap(vcpu)) {
304 byteswap_hv_regs(&l2_hv);
305 byteswap_pt_regs(&l2_regs);
306 }
307 err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv,
308 sizeof(struct hv_guest_state));
309 if (err)
310 return H_AUTHORITY;
311 err = kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs,
312 sizeof(struct pt_regs));
313 if (err)
314 return H_AUTHORITY;
315
316 if (r == -EINTR)
317 return H_INTERRUPT;
318
319 return vcpu->arch.trap;
320}
321
322long kvmhv_nested_init(void)
323{
324 long int ptb_order;
325 unsigned long ptcr;
326 long rc;
327
328 if (!kvmhv_on_pseries())
329 return 0;
330 if (!radix_enabled())
331 return -ENODEV;
332
333
334 ptb_order = __ilog2(KVMPPC_NR_LPIDS - 1) + 1;
335 if (ptb_order < 8)
336 ptb_order = 8;
337 pseries_partition_tb = kmalloc(sizeof(struct patb_entry) << ptb_order,
338 GFP_KERNEL);
339 if (!pseries_partition_tb) {
340 pr_err("kvm-hv: failed to allocated nested partition table\n");
341 return -ENOMEM;
342 }
343
344 ptcr = __pa(pseries_partition_tb) | (ptb_order - 8);
345 rc = plpar_hcall_norets(H_SET_PARTITION_TABLE, ptcr);
346 if (rc != H_SUCCESS) {
347 pr_err("kvm-hv: Parent hypervisor does not support nesting (rc=%ld)\n",
348 rc);
349 kfree(pseries_partition_tb);
350 pseries_partition_tb = NULL;
351 return -ENODEV;
352 }
353
354 return 0;
355}
356
357void kvmhv_nested_exit(void)
358{
359
360
361
362
363
364 if (kvmhv_on_pseries() && pseries_partition_tb) {
365 plpar_hcall_norets(H_SET_PARTITION_TABLE, 0);
366 kfree(pseries_partition_tb);
367 pseries_partition_tb = NULL;
368 }
369}
370
371static void kvmhv_flush_lpid(unsigned int lpid)
372{
373 long rc;
374
375 if (!kvmhv_on_pseries()) {
376 radix__flush_tlb_lpid(lpid);
377 return;
378 }
379
380 rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
381 lpid, TLBIEL_INVAL_SET_LPID);
382 if (rc)
383 pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
384}
385
386void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1)
387{
388 if (!kvmhv_on_pseries()) {
389 mmu_partition_table_set_entry(lpid, dw0, dw1);
390 return;
391 }
392
393 pseries_partition_tb[lpid].patb0 = cpu_to_be64(dw0);
394 pseries_partition_tb[lpid].patb1 = cpu_to_be64(dw1);
395
396 kvmhv_flush_lpid(lpid);
397}
398
399static void kvmhv_set_nested_ptbl(struct kvm_nested_guest *gp)
400{
401 unsigned long dw0;
402
403 dw0 = PATB_HR | radix__get_tree_size() |
404 __pa(gp->shadow_pgtable) | RADIX_PGD_INDEX_SIZE;
405 kvmhv_set_ptbl_entry(gp->shadow_lpid, dw0, gp->process_table);
406}
407
408void kvmhv_vm_nested_init(struct kvm *kvm)
409{
410 kvm->arch.max_nested_lpid = -1;
411}
412
413
414
415
416
417
418long kvmhv_set_partition_table(struct kvm_vcpu *vcpu)
419{
420 struct kvm *kvm = vcpu->kvm;
421 unsigned long ptcr = kvmppc_get_gpr(vcpu, 4);
422 int srcu_idx;
423 long ret = H_SUCCESS;
424
425 srcu_idx = srcu_read_lock(&kvm->srcu);
426
427
428
429
430 if ((ptcr & PRTS_MASK) > 12 - 8 ||
431 !kvm_is_visible_gfn(vcpu->kvm, (ptcr & PRTB_MASK) >> PAGE_SHIFT))
432 ret = H_PARAMETER;
433 srcu_read_unlock(&kvm->srcu, srcu_idx);
434 if (ret == H_SUCCESS)
435 kvm->arch.l1_ptcr = ptcr;
436 return ret;
437}
438
439
440
441
442
443static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp)
444{
445 int ret;
446 struct patb_entry ptbl_entry;
447 unsigned long ptbl_addr;
448 struct kvm *kvm = gp->l1_host;
449
450 ret = -EFAULT;
451 ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4);
452 if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8)))
453 ret = kvm_read_guest(kvm, ptbl_addr,
454 &ptbl_entry, sizeof(ptbl_entry));
455 if (ret) {
456 gp->l1_gr_to_hr = 0;
457 gp->process_table = 0;
458 } else {
459 gp->l1_gr_to_hr = be64_to_cpu(ptbl_entry.patb0);
460 gp->process_table = be64_to_cpu(ptbl_entry.patb1);
461 }
462 kvmhv_set_nested_ptbl(gp);
463}
464
465struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid)
466{
467 struct kvm_nested_guest *gp;
468 long shadow_lpid;
469
470 gp = kzalloc(sizeof(*gp), GFP_KERNEL);
471 if (!gp)
472 return NULL;
473 gp->l1_host = kvm;
474 gp->l1_lpid = lpid;
475 mutex_init(&gp->tlb_lock);
476 gp->shadow_pgtable = pgd_alloc(kvm->mm);
477 if (!gp->shadow_pgtable)
478 goto out_free;
479 shadow_lpid = kvmppc_alloc_lpid();
480 if (shadow_lpid < 0)
481 goto out_free2;
482 gp->shadow_lpid = shadow_lpid;
483
484 memset(gp->prev_cpu, -1, sizeof(gp->prev_cpu));
485
486 return gp;
487
488 out_free2:
489 pgd_free(kvm->mm, gp->shadow_pgtable);
490 out_free:
491 kfree(gp);
492 return NULL;
493}
494
495
496
497
498static void kvmhv_release_nested(struct kvm_nested_guest *gp)
499{
500 struct kvm *kvm = gp->l1_host;
501
502 if (gp->shadow_pgtable) {
503
504
505
506
507
508 kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
509 gp->shadow_lpid);
510 pgd_free(kvm->mm, gp->shadow_pgtable);
511 }
512 kvmhv_set_ptbl_entry(gp->shadow_lpid, 0, 0);
513 kvmppc_free_lpid(gp->shadow_lpid);
514 kfree(gp);
515}
516
517static void kvmhv_remove_nested(struct kvm_nested_guest *gp)
518{
519 struct kvm *kvm = gp->l1_host;
520 int lpid = gp->l1_lpid;
521 long ref;
522
523 spin_lock(&kvm->mmu_lock);
524 if (gp == kvm->arch.nested_guests[lpid]) {
525 kvm->arch.nested_guests[lpid] = NULL;
526 if (lpid == kvm->arch.max_nested_lpid) {
527 while (--lpid >= 0 && !kvm->arch.nested_guests[lpid])
528 ;
529 kvm->arch.max_nested_lpid = lpid;
530 }
531 --gp->refcnt;
532 }
533 ref = gp->refcnt;
534 spin_unlock(&kvm->mmu_lock);
535 if (ref == 0)
536 kvmhv_release_nested(gp);
537}
538
539
540
541
542
543
544
545void kvmhv_release_all_nested(struct kvm *kvm)
546{
547 int i;
548 struct kvm_nested_guest *gp;
549 struct kvm_nested_guest *freelist = NULL;
550 struct kvm_memory_slot *memslot;
551 int srcu_idx;
552
553 spin_lock(&kvm->mmu_lock);
554 for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
555 gp = kvm->arch.nested_guests[i];
556 if (!gp)
557 continue;
558 kvm->arch.nested_guests[i] = NULL;
559 if (--gp->refcnt == 0) {
560 gp->next = freelist;
561 freelist = gp;
562 }
563 }
564 kvm->arch.max_nested_lpid = -1;
565 spin_unlock(&kvm->mmu_lock);
566 while ((gp = freelist) != NULL) {
567 freelist = gp->next;
568 kvmhv_release_nested(gp);
569 }
570
571 srcu_idx = srcu_read_lock(&kvm->srcu);
572 kvm_for_each_memslot(memslot, kvm_memslots(kvm))
573 kvmhv_free_memslot_nest_rmap(memslot);
574 srcu_read_unlock(&kvm->srcu, srcu_idx);
575}
576
577
578static void kvmhv_flush_nested(struct kvm_nested_guest *gp)
579{
580 struct kvm *kvm = gp->l1_host;
581
582 spin_lock(&kvm->mmu_lock);
583 kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, gp->shadow_lpid);
584 spin_unlock(&kvm->mmu_lock);
585 kvmhv_flush_lpid(gp->shadow_lpid);
586 kvmhv_update_ptbl_cache(gp);
587 if (gp->l1_gr_to_hr == 0)
588 kvmhv_remove_nested(gp);
589}
590
591struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
592 bool create)
593{
594 struct kvm_nested_guest *gp, *newgp;
595
596 if (l1_lpid >= KVM_MAX_NESTED_GUESTS ||
597 l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4)))
598 return NULL;
599
600 spin_lock(&kvm->mmu_lock);
601 gp = kvm->arch.nested_guests[l1_lpid];
602 if (gp)
603 ++gp->refcnt;
604 spin_unlock(&kvm->mmu_lock);
605
606 if (gp || !create)
607 return gp;
608
609 newgp = kvmhv_alloc_nested(kvm, l1_lpid);
610 if (!newgp)
611 return NULL;
612 spin_lock(&kvm->mmu_lock);
613 if (kvm->arch.nested_guests[l1_lpid]) {
614
615 gp = kvm->arch.nested_guests[l1_lpid];
616 } else {
617 kvm->arch.nested_guests[l1_lpid] = newgp;
618 ++newgp->refcnt;
619 gp = newgp;
620 newgp = NULL;
621 if (l1_lpid > kvm->arch.max_nested_lpid)
622 kvm->arch.max_nested_lpid = l1_lpid;
623 }
624 ++gp->refcnt;
625 spin_unlock(&kvm->mmu_lock);
626
627 if (newgp)
628 kvmhv_release_nested(newgp);
629
630 return gp;
631}
632
633void kvmhv_put_nested(struct kvm_nested_guest *gp)
634{
635 struct kvm *kvm = gp->l1_host;
636 long ref;
637
638 spin_lock(&kvm->mmu_lock);
639 ref = --gp->refcnt;
640 spin_unlock(&kvm->mmu_lock);
641 if (ref == 0)
642 kvmhv_release_nested(gp);
643}
644
645static struct kvm_nested_guest *kvmhv_find_nested(struct kvm *kvm, int lpid)
646{
647 if (lpid > kvm->arch.max_nested_lpid)
648 return NULL;
649 return kvm->arch.nested_guests[lpid];
650}
651
652static inline bool kvmhv_n_rmap_is_equal(u64 rmap_1, u64 rmap_2)
653{
654 return !((rmap_1 ^ rmap_2) & (RMAP_NESTED_LPID_MASK |
655 RMAP_NESTED_GPA_MASK));
656}
657
658void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
659 struct rmap_nested **n_rmap)
660{
661 struct llist_node *entry = ((struct llist_head *) rmapp)->first;
662 struct rmap_nested *cursor;
663 u64 rmap, new_rmap = (*n_rmap)->rmap;
664
665
666 if (!(*rmapp)) {
667
668 *rmapp = new_rmap | RMAP_NESTED_IS_SINGLE_ENTRY;
669 return;
670 }
671
672
673 for_each_nest_rmap_safe(cursor, entry, &rmap) {
674 if (kvmhv_n_rmap_is_equal(rmap, new_rmap))
675 return;
676 }
677
678
679 rmap = *rmapp;
680 if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY)
681 *rmapp = 0UL;
682 llist_add(&((*n_rmap)->list), (struct llist_head *) rmapp);
683 if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY)
684 (*n_rmap)->list.next = (struct llist_node *) rmap;
685
686
687 *n_rmap = NULL;
688}
689
690static void kvmhv_update_nest_rmap_rc(struct kvm *kvm, u64 n_rmap,
691 unsigned long clr, unsigned long set,
692 unsigned long hpa, unsigned long mask)
693{
694 struct kvm_nested_guest *gp;
695 unsigned long gpa;
696 unsigned int shift, lpid;
697 pte_t *ptep;
698
699 gpa = n_rmap & RMAP_NESTED_GPA_MASK;
700 lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
701 gp = kvmhv_find_nested(kvm, lpid);
702 if (!gp)
703 return;
704
705
706 ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
707
708
709
710
711
712
713 if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa)) {
714 __radix_pte_update(ptep, clr, set);
715 kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid);
716 }
717}
718
719
720
721
722
723void kvmhv_update_nest_rmap_rc_list(struct kvm *kvm, unsigned long *rmapp,
724 unsigned long clr, unsigned long set,
725 unsigned long hpa, unsigned long nbytes)
726{
727 struct llist_node *entry = ((struct llist_head *) rmapp)->first;
728 struct rmap_nested *cursor;
729 unsigned long rmap, mask;
730
731 if ((clr | set) & ~(_PAGE_DIRTY | _PAGE_ACCESSED))
732 return;
733
734 mask = PTE_RPN_MASK & ~(nbytes - 1);
735 hpa &= mask;
736
737 for_each_nest_rmap_safe(cursor, entry, &rmap)
738 kvmhv_update_nest_rmap_rc(kvm, rmap, clr, set, hpa, mask);
739}
740
741static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap,
742 unsigned long hpa, unsigned long mask)
743{
744 struct kvm_nested_guest *gp;
745 unsigned long gpa;
746 unsigned int shift, lpid;
747 pte_t *ptep;
748
749 gpa = n_rmap & RMAP_NESTED_GPA_MASK;
750 lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
751 gp = kvmhv_find_nested(kvm, lpid);
752 if (!gp)
753 return;
754
755
756 ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
757
758 if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa))
759 kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
760}
761
762static void kvmhv_remove_nest_rmap_list(struct kvm *kvm, unsigned long *rmapp,
763 unsigned long hpa, unsigned long mask)
764{
765 struct llist_node *entry = llist_del_all((struct llist_head *) rmapp);
766 struct rmap_nested *cursor;
767 unsigned long rmap;
768
769 for_each_nest_rmap_safe(cursor, entry, &rmap) {
770 kvmhv_remove_nest_rmap(kvm, rmap, hpa, mask);
771 kfree(cursor);
772 }
773}
774
775
776void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
777 const struct kvm_memory_slot *memslot,
778 unsigned long gpa, unsigned long hpa,
779 unsigned long nbytes)
780{
781 unsigned long gfn, end_gfn;
782 unsigned long addr_mask;
783
784 if (!memslot)
785 return;
786 gfn = (gpa >> PAGE_SHIFT) - memslot->base_gfn;
787 end_gfn = gfn + (nbytes >> PAGE_SHIFT);
788
789 addr_mask = PTE_RPN_MASK & ~(nbytes - 1);
790 hpa &= addr_mask;
791
792 for (; gfn < end_gfn; gfn++) {
793 unsigned long *rmap = &memslot->arch.rmap[gfn];
794 kvmhv_remove_nest_rmap_list(kvm, rmap, hpa, addr_mask);
795 }
796}
797
798static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free)
799{
800 unsigned long page;
801
802 for (page = 0; page < free->npages; page++) {
803 unsigned long rmap, *rmapp = &free->arch.rmap[page];
804 struct rmap_nested *cursor;
805 struct llist_node *entry;
806
807 entry = llist_del_all((struct llist_head *) rmapp);
808 for_each_nest_rmap_safe(cursor, entry, &rmap)
809 kfree(cursor);
810 }
811}
812
813static bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu,
814 struct kvm_nested_guest *gp,
815 long gpa, int *shift_ret)
816{
817 struct kvm *kvm = vcpu->kvm;
818 bool ret = false;
819 pte_t *ptep;
820 int shift;
821
822 spin_lock(&kvm->mmu_lock);
823 ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
824 if (!shift)
825 shift = PAGE_SHIFT;
826 if (ptep && pte_present(*ptep)) {
827 kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
828 ret = true;
829 }
830 spin_unlock(&kvm->mmu_lock);
831
832 if (shift_ret)
833 *shift_ret = shift;
834 return ret;
835}
836
837static inline int get_ric(unsigned int instr)
838{
839 return (instr >> 18) & 0x3;
840}
841
842static inline int get_prs(unsigned int instr)
843{
844 return (instr >> 17) & 0x1;
845}
846
847static inline int get_r(unsigned int instr)
848{
849 return (instr >> 16) & 0x1;
850}
851
852static inline int get_lpid(unsigned long r_val)
853{
854 return r_val & 0xffffffff;
855}
856
857static inline int get_is(unsigned long r_val)
858{
859 return (r_val >> 10) & 0x3;
860}
861
862static inline int get_ap(unsigned long r_val)
863{
864 return (r_val >> 5) & 0x7;
865}
866
867static inline long get_epn(unsigned long r_val)
868{
869 return r_val >> 12;
870}
871
872static int kvmhv_emulate_tlbie_tlb_addr(struct kvm_vcpu *vcpu, int lpid,
873 int ap, long epn)
874{
875 struct kvm *kvm = vcpu->kvm;
876 struct kvm_nested_guest *gp;
877 long npages;
878 int shift, shadow_shift;
879 unsigned long addr;
880
881 shift = ap_to_shift(ap);
882 addr = epn << 12;
883 if (shift < 0)
884
885 return -EINVAL;
886
887 addr &= ~((1UL << shift) - 1);
888 npages = 1UL << (shift - PAGE_SHIFT);
889
890 gp = kvmhv_get_nested(kvm, lpid, false);
891 if (!gp)
892 return 0;
893 mutex_lock(&gp->tlb_lock);
894
895
896 do {
897 kvmhv_invalidate_shadow_pte(vcpu, gp, addr, &shadow_shift);
898
899 npages -= 1UL << (shadow_shift - PAGE_SHIFT);
900 addr += 1UL << shadow_shift;
901 } while (npages > 0);
902
903 mutex_unlock(&gp->tlb_lock);
904 kvmhv_put_nested(gp);
905 return 0;
906}
907
908static void kvmhv_emulate_tlbie_lpid(struct kvm_vcpu *vcpu,
909 struct kvm_nested_guest *gp, int ric)
910{
911 struct kvm *kvm = vcpu->kvm;
912
913 mutex_lock(&gp->tlb_lock);
914 switch (ric) {
915 case 0:
916
917 spin_lock(&kvm->mmu_lock);
918 kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
919 gp->shadow_lpid);
920 kvmhv_flush_lpid(gp->shadow_lpid);
921 spin_unlock(&kvm->mmu_lock);
922 break;
923 case 1:
924
925
926
927
928 break;
929 case 2:
930
931 kvmhv_flush_nested(gp);
932 break;
933 default:
934 break;
935 }
936 mutex_unlock(&gp->tlb_lock);
937}
938
939static void kvmhv_emulate_tlbie_all_lpid(struct kvm_vcpu *vcpu, int ric)
940{
941 struct kvm *kvm = vcpu->kvm;
942 struct kvm_nested_guest *gp;
943 int i;
944
945 spin_lock(&kvm->mmu_lock);
946 for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
947 gp = kvm->arch.nested_guests[i];
948 if (gp) {
949 spin_unlock(&kvm->mmu_lock);
950 kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
951 spin_lock(&kvm->mmu_lock);
952 }
953 }
954 spin_unlock(&kvm->mmu_lock);
955}
956
957static int kvmhv_emulate_priv_tlbie(struct kvm_vcpu *vcpu, unsigned int instr,
958 unsigned long rsval, unsigned long rbval)
959{
960 struct kvm *kvm = vcpu->kvm;
961 struct kvm_nested_guest *gp;
962 int r, ric, prs, is, ap;
963 int lpid;
964 long epn;
965 int ret = 0;
966
967 ric = get_ric(instr);
968 prs = get_prs(instr);
969 r = get_r(instr);
970 lpid = get_lpid(rsval);
971 is = get_is(rbval);
972
973
974
975
976
977
978
979
980
981 if ((!r) || (prs) || (ric == 3) || (is == 1) ||
982 ((!is) && (ric == 1 || ric == 2)))
983 return -EINVAL;
984
985 switch (is) {
986 case 0:
987
988
989
990
991 epn = get_epn(rbval);
992 ap = get_ap(rbval);
993 ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap, epn);
994 break;
995 case 2:
996
997 gp = kvmhv_get_nested(kvm, lpid, false);
998 if (gp) {
999 kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
1000 kvmhv_put_nested(gp);
1001 }
1002 break;
1003 case 3:
1004
1005 kvmhv_emulate_tlbie_all_lpid(vcpu, ric);
1006 break;
1007 default:
1008 ret = -EINVAL;
1009 break;
1010 }
1011
1012 return ret;
1013}
1014
1015
1016
1017
1018
1019
1020long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
1021{
1022 int ret;
1023
1024 ret = kvmhv_emulate_priv_tlbie(vcpu, kvmppc_get_gpr(vcpu, 4),
1025 kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6));
1026 if (ret)
1027 return H_PARAMETER;
1028 return H_SUCCESS;
1029}
1030
1031
1032static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
1033 struct kvm_nested_guest *gp,
1034 unsigned long n_gpa, unsigned long dsisr,
1035 struct kvmppc_pte *gpte_p)
1036{
1037 u64 fault_addr, flags = dsisr & DSISR_ISSTORE;
1038 int ret;
1039
1040 ret = kvmppc_mmu_walk_radix_tree(vcpu, n_gpa, gpte_p, gp->l1_gr_to_hr,
1041 &fault_addr);
1042
1043 if (ret) {
1044
1045 if (ret == -EINVAL) {
1046
1047 flags |= DSISR_UNSUPP_MMU;
1048 } else if (ret == -ENOENT) {
1049
1050 flags |= DSISR_NOHPTE;
1051 } else if (ret == -EFAULT) {
1052
1053 flags |= DSISR_PRTABLE_FAULT;
1054 vcpu->arch.fault_gpa = fault_addr;
1055 } else {
1056
1057 return ret;
1058 }
1059 goto forward_to_l1;
1060 } else {
1061
1062 if (dsisr & DSISR_ISSTORE) {
1063
1064 if (!gpte_p->may_write) {
1065 flags |= DSISR_PROTFAULT;
1066 goto forward_to_l1;
1067 }
1068 } else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
1069
1070 if (!gpte_p->may_execute) {
1071 flags |= SRR1_ISI_N_OR_G;
1072 goto forward_to_l1;
1073 }
1074 } else {
1075
1076 if (!gpte_p->may_read && !gpte_p->may_write) {
1077 flags |= DSISR_PROTFAULT;
1078 goto forward_to_l1;
1079 }
1080 }
1081 }
1082
1083 return 0;
1084
1085forward_to_l1:
1086 vcpu->arch.fault_dsisr = flags;
1087 if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
1088 vcpu->arch.shregs.msr &= ~0x783f0000ul;
1089 vcpu->arch.shregs.msr |= flags;
1090 }
1091 return RESUME_HOST;
1092}
1093
1094static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu,
1095 struct kvm_nested_guest *gp,
1096 unsigned long n_gpa,
1097 struct kvmppc_pte gpte,
1098 unsigned long dsisr)
1099{
1100 struct kvm *kvm = vcpu->kvm;
1101 bool writing = !!(dsisr & DSISR_ISSTORE);
1102 u64 pgflags;
1103 long ret;
1104
1105
1106 pgflags = _PAGE_ACCESSED;
1107 if (writing)
1108 pgflags |= _PAGE_DIRTY;
1109 if (pgflags & ~gpte.rc)
1110 return RESUME_HOST;
1111
1112 spin_lock(&kvm->mmu_lock);
1113
1114 ret = kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, writing,
1115 gpte.raddr, kvm->arch.lpid);
1116 if (!ret) {
1117 ret = -EINVAL;
1118 goto out_unlock;
1119 }
1120
1121
1122 ret = kvmppc_hv_handle_set_rc(kvm, gp->shadow_pgtable, writing, n_gpa,
1123 gp->shadow_lpid);
1124 if (!ret)
1125 ret = -EINVAL;
1126 else
1127 ret = 0;
1128
1129out_unlock:
1130 spin_unlock(&kvm->mmu_lock);
1131 return ret;
1132}
1133
1134static inline int kvmppc_radix_level_to_shift(int level)
1135{
1136 switch (level) {
1137 case 2:
1138 return PUD_SHIFT;
1139 case 1:
1140 return PMD_SHIFT;
1141 default:
1142 return PAGE_SHIFT;
1143 }
1144}
1145
1146static inline int kvmppc_radix_shift_to_level(int shift)
1147{
1148 if (shift == PUD_SHIFT)
1149 return 2;
1150 if (shift == PMD_SHIFT)
1151 return 1;
1152 if (shift == PAGE_SHIFT)
1153 return 0;
1154 WARN_ON_ONCE(1);
1155 return 0;
1156}
1157
1158
1159static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
1160 struct kvm_nested_guest *gp)
1161{
1162 struct kvm *kvm = vcpu->kvm;
1163 struct kvm_memory_slot *memslot;
1164 struct rmap_nested *n_rmap;
1165 struct kvmppc_pte gpte;
1166 pte_t pte, *pte_p;
1167 unsigned long mmu_seq;
1168 unsigned long dsisr = vcpu->arch.fault_dsisr;
1169 unsigned long ea = vcpu->arch.fault_dar;
1170 unsigned long *rmapp;
1171 unsigned long n_gpa, gpa, gfn, perm = 0UL;
1172 unsigned int shift, l1_shift, level;
1173 bool writing = !!(dsisr & DSISR_ISSTORE);
1174 bool kvm_ro = false;
1175 long int ret;
1176
1177 if (!gp->l1_gr_to_hr) {
1178 kvmhv_update_ptbl_cache(gp);
1179 if (!gp->l1_gr_to_hr)
1180 return RESUME_HOST;
1181 }
1182
1183
1184
1185 n_gpa = vcpu->arch.fault_gpa & ~0xF000000000000FFFULL;
1186 if (!(dsisr & DSISR_PRTABLE_FAULT))
1187 n_gpa |= ea & 0xFFF;
1188 ret = kvmhv_translate_addr_nested(vcpu, gp, n_gpa, dsisr, &gpte);
1189
1190
1191
1192
1193
1194
1195 if (ret == RESUME_HOST &&
1196 (dsisr & (DSISR_PROTFAULT | DSISR_BADACCESS | DSISR_NOEXEC_OR_G |
1197 DSISR_BAD_COPYPASTE)))
1198 goto inval;
1199 if (ret)
1200 return ret;
1201
1202
1203 if (dsisr & DSISR_SET_RC) {
1204 ret = kvmhv_handle_nested_set_rc(vcpu, gp, n_gpa, gpte, dsisr);
1205 if (ret == RESUME_HOST)
1206 return ret;
1207 if (ret)
1208 goto inval;
1209 dsisr &= ~DSISR_SET_RC;
1210 if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
1211 DSISR_PROTFAULT)))
1212 return RESUME_GUEST;
1213 }
1214
1215
1216
1217
1218
1219
1220
1221 l1_shift = gpte.page_shift;
1222 if (l1_shift < PAGE_SHIFT) {
1223
1224 pr_err("KVM: L1 guest page shift (%d) less than our own (%d)\n",
1225 l1_shift, PAGE_SHIFT);
1226 return -EINVAL;
1227 }
1228 gpa = gpte.raddr;
1229 gfn = gpa >> PAGE_SHIFT;
1230
1231
1232
1233 memslot = gfn_to_memslot(kvm, gfn);
1234 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
1235 if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS)) {
1236
1237 kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
1238 return RESUME_GUEST;
1239 }
1240
1241 pr_err("emulated MMIO passthrough?\n");
1242 return -EINVAL;
1243 }
1244 if (memslot->flags & KVM_MEM_READONLY) {
1245 if (writing) {
1246
1247 kvmppc_core_queue_data_storage(vcpu, ea,
1248 DSISR_ISSTORE | DSISR_PROTFAULT);
1249 return RESUME_GUEST;
1250 }
1251 kvm_ro = true;
1252 }
1253
1254
1255
1256
1257 mmu_seq = kvm->mmu_notifier_seq;
1258 smp_rmb();
1259
1260
1261 pte = __pte(0);
1262 spin_lock(&kvm->mmu_lock);
1263 pte_p = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
1264 if (!shift)
1265 shift = PAGE_SHIFT;
1266 if (pte_p)
1267 pte = *pte_p;
1268 spin_unlock(&kvm->mmu_lock);
1269
1270 if (!pte_present(pte) || (writing && !(pte_val(pte) & _PAGE_WRITE))) {
1271
1272 ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot,
1273 writing, kvm_ro, &pte, &level);
1274 if (ret == -EAGAIN)
1275 return RESUME_GUEST;
1276 else if (ret)
1277 return ret;
1278 shift = kvmppc_radix_level_to_shift(level);
1279 }
1280
1281 gfn = (gpa & ~((1UL << shift) - 1)) >> PAGE_SHIFT;
1282
1283
1284
1285
1286 perm |= gpte.may_read ? 0UL : _PAGE_READ;
1287 perm |= gpte.may_write ? 0UL : _PAGE_WRITE;
1288 perm |= gpte.may_execute ? 0UL : _PAGE_EXEC;
1289
1290 perm |= (gpte.rc & _PAGE_ACCESSED) ? 0UL : _PAGE_ACCESSED;
1291 perm |= ((gpte.rc & _PAGE_DIRTY) && writing) ? 0UL : _PAGE_DIRTY;
1292 pte = __pte(pte_val(pte) & ~perm);
1293
1294
1295 if (shift > l1_shift) {
1296 u64 mask;
1297 unsigned int actual_shift = PAGE_SHIFT;
1298 if (PMD_SHIFT < l1_shift)
1299 actual_shift = PMD_SHIFT;
1300 mask = (1UL << shift) - (1UL << actual_shift);
1301 pte = __pte(pte_val(pte) | (gpa & mask));
1302 shift = actual_shift;
1303 }
1304 level = kvmppc_radix_shift_to_level(shift);
1305 n_gpa &= ~((1UL << shift) - 1);
1306
1307
1308
1309 n_rmap = kzalloc(sizeof(*n_rmap), GFP_KERNEL);
1310 if (!n_rmap)
1311 return RESUME_GUEST;
1312 n_rmap->rmap = (n_gpa & RMAP_NESTED_GPA_MASK) |
1313 (((unsigned long) gp->l1_lpid) << RMAP_NESTED_LPID_SHIFT);
1314 rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
1315 ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level,
1316 mmu_seq, gp->shadow_lpid, rmapp, &n_rmap);
1317 if (n_rmap)
1318 kfree(n_rmap);
1319 if (ret == -EAGAIN)
1320 ret = RESUME_GUEST;
1321
1322 return ret;
1323
1324 inval:
1325 kvmhv_invalidate_shadow_pte(vcpu, gp, n_gpa, NULL);
1326 return RESUME_GUEST;
1327}
1328
1329long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu)
1330{
1331 struct kvm_nested_guest *gp = vcpu->arch.nested;
1332 long int ret;
1333
1334 mutex_lock(&gp->tlb_lock);
1335 ret = __kvmhv_nested_page_fault(vcpu, gp);
1336 mutex_unlock(&gp->tlb_lock);
1337 return ret;
1338}
1339
1340int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid)
1341{
1342 int ret = -1;
1343
1344 spin_lock(&kvm->mmu_lock);
1345 while (++lpid <= kvm->arch.max_nested_lpid) {
1346 if (kvm->arch.nested_guests[lpid]) {
1347 ret = lpid;
1348 break;
1349 }
1350 }
1351 spin_unlock(&kvm->mmu_lock);
1352 return ret;
1353}
1354