linux/arch/powerpc/kvm/book3s_pr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
   4 *
   5 * Authors:
   6 *    Alexander Graf <agraf@suse.de>
   7 *    Kevin Wolf <mail@kevin-wolf.de>
   8 *    Paul Mackerras <paulus@samba.org>
   9 *
  10 * Description:
  11 * Functions relating to running KVM on Book 3S processors where
  12 * we don't have access to hypervisor mode, and we run the guest
  13 * in problem state (user mode).
  14 *
  15 * This file is derived from arch/powerpc/kvm/44x.c,
  16 * by Hollis Blanchard <hollisb@us.ibm.com>.
  17 */
  18
  19#include <linux/kvm_host.h>
  20#include <linux/export.h>
  21#include <linux/err.h>
  22#include <linux/slab.h>
  23
  24#include <asm/reg.h>
  25#include <asm/cputable.h>
  26#include <asm/cacheflush.h>
  27#include <linux/uaccess.h>
  28#include <asm/interrupt.h>
  29#include <asm/io.h>
  30#include <asm/kvm_ppc.h>
  31#include <asm/kvm_book3s.h>
  32#include <asm/mmu_context.h>
  33#include <asm/switch_to.h>
  34#include <asm/firmware.h>
  35#include <asm/setup.h>
  36#include <linux/gfp.h>
  37#include <linux/sched.h>
  38#include <linux/vmalloc.h>
  39#include <linux/highmem.h>
  40#include <linux/module.h>
  41#include <linux/miscdevice.h>
  42#include <asm/asm-prototypes.h>
  43#include <asm/tm.h>
  44
  45#include "book3s.h"
  46
  47#define CREATE_TRACE_POINTS
  48#include "trace_pr.h"
  49
  50/* #define EXIT_DEBUG */
  51/* #define DEBUG_EXT */
  52
  53static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
  54                             ulong msr);
  55#ifdef CONFIG_PPC_BOOK3S_64
  56static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac);
  57#endif
  58
  59/* Some compatibility defines */
  60#ifdef CONFIG_PPC_BOOK3S_32
  61#define MSR_USER32 MSR_USER
  62#define MSR_USER64 MSR_USER
  63#define HW_PAGE_SIZE PAGE_SIZE
  64#define HPTE_R_M   _PAGE_COHERENT
  65#endif
  66
  67static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu)
  68{
  69        ulong msr = kvmppc_get_msr(vcpu);
  70        return (msr & (MSR_IR|MSR_DR)) == MSR_DR;
  71}
  72
  73static void kvmppc_fixup_split_real(struct kvm_vcpu *vcpu)
  74{
  75        ulong msr = kvmppc_get_msr(vcpu);
  76        ulong pc = kvmppc_get_pc(vcpu);
  77
  78        /* We are in DR only split real mode */
  79        if ((msr & (MSR_IR|MSR_DR)) != MSR_DR)
  80                return;
  81
  82        /* We have not fixed up the guest already */
  83        if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK)
  84                return;
  85
  86        /* The code is in fixupable address space */
  87        if (pc & SPLIT_HACK_MASK)
  88                return;
  89
  90        vcpu->arch.hflags |= BOOK3S_HFLAG_SPLIT_HACK;
  91        kvmppc_set_pc(vcpu, pc | SPLIT_HACK_OFFS);
  92}
  93
  94static void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
  95{
  96        if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
  97                ulong pc = kvmppc_get_pc(vcpu);
  98                ulong lr = kvmppc_get_lr(vcpu);
  99                if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
 100                        kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
 101                if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
 102                        kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK);
 103                vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
 104        }
 105}
 106
 107static void kvmppc_inject_interrupt_pr(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
 108{
 109        unsigned long msr, pc, new_msr, new_pc;
 110
 111        kvmppc_unfixup_split_real(vcpu);
 112
 113        msr = kvmppc_get_msr(vcpu);
 114        pc = kvmppc_get_pc(vcpu);
 115        new_msr = vcpu->arch.intr_msr;
 116        new_pc = to_book3s(vcpu)->hior + vec;
 117
 118#ifdef CONFIG_PPC_BOOK3S_64
 119        /* If transactional, change to suspend mode on IRQ delivery */
 120        if (MSR_TM_TRANSACTIONAL(msr))
 121                new_msr |= MSR_TS_S;
 122        else
 123                new_msr |= msr & MSR_TS_MASK;
 124#endif
 125
 126        kvmppc_set_srr0(vcpu, pc);
 127        kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
 128        kvmppc_set_pc(vcpu, new_pc);
 129        kvmppc_set_msr(vcpu, new_msr);
 130}
 131
 132static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
 133{
 134#ifdef CONFIG_PPC_BOOK3S_64
 135        struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
 136        memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb));
 137        svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
 138        svcpu->in_use = 0;
 139        svcpu_put(svcpu);
 140#endif
 141
 142        /* Disable AIL if supported */
 143        if (cpu_has_feature(CPU_FTR_HVMODE) &&
 144            cpu_has_feature(CPU_FTR_ARCH_207S))
 145                mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL);
 146
 147        vcpu->cpu = smp_processor_id();
 148#ifdef CONFIG_PPC_BOOK3S_32
 149        current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu;
 150#endif
 151
 152        if (kvmppc_is_split_real(vcpu))
 153                kvmppc_fixup_split_real(vcpu);
 154
 155        kvmppc_restore_tm_pr(vcpu);
 156}
 157
 158static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
 159{
 160#ifdef CONFIG_PPC_BOOK3S_64
 161        struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
 162        if (svcpu->in_use) {
 163                kvmppc_copy_from_svcpu(vcpu);
 164        }
 165        memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
 166        to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
 167        svcpu_put(svcpu);
 168#endif
 169
 170        if (kvmppc_is_split_real(vcpu))
 171                kvmppc_unfixup_split_real(vcpu);
 172
 173        kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
 174        kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
 175        kvmppc_save_tm_pr(vcpu);
 176
 177        /* Enable AIL if supported */
 178        if (cpu_has_feature(CPU_FTR_HVMODE) &&
 179            cpu_has_feature(CPU_FTR_ARCH_207S))
 180                mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3);
 181
 182        vcpu->cpu = -1;
 183}
 184
 185/* Copy data needed by real-mode code from vcpu to shadow vcpu */
 186void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu)
 187{
 188        struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
 189
 190        svcpu->gpr[0] = vcpu->arch.regs.gpr[0];
 191        svcpu->gpr[1] = vcpu->arch.regs.gpr[1];
 192        svcpu->gpr[2] = vcpu->arch.regs.gpr[2];
 193        svcpu->gpr[3] = vcpu->arch.regs.gpr[3];
 194        svcpu->gpr[4] = vcpu->arch.regs.gpr[4];
 195        svcpu->gpr[5] = vcpu->arch.regs.gpr[5];
 196        svcpu->gpr[6] = vcpu->arch.regs.gpr[6];
 197        svcpu->gpr[7] = vcpu->arch.regs.gpr[7];
 198        svcpu->gpr[8] = vcpu->arch.regs.gpr[8];
 199        svcpu->gpr[9] = vcpu->arch.regs.gpr[9];
 200        svcpu->gpr[10] = vcpu->arch.regs.gpr[10];
 201        svcpu->gpr[11] = vcpu->arch.regs.gpr[11];
 202        svcpu->gpr[12] = vcpu->arch.regs.gpr[12];
 203        svcpu->gpr[13] = vcpu->arch.regs.gpr[13];
 204        svcpu->cr  = vcpu->arch.regs.ccr;
 205        svcpu->xer = vcpu->arch.regs.xer;
 206        svcpu->ctr = vcpu->arch.regs.ctr;
 207        svcpu->lr  = vcpu->arch.regs.link;
 208        svcpu->pc  = vcpu->arch.regs.nip;
 209#ifdef CONFIG_PPC_BOOK3S_64
 210        svcpu->shadow_fscr = vcpu->arch.shadow_fscr;
 211#endif
 212        /*
 213         * Now also save the current time base value. We use this
 214         * to find the guest purr and spurr value.
 215         */
 216        vcpu->arch.entry_tb = get_tb();
 217        vcpu->arch.entry_vtb = get_vtb();
 218        if (cpu_has_feature(CPU_FTR_ARCH_207S))
 219                vcpu->arch.entry_ic = mfspr(SPRN_IC);
 220        svcpu->in_use = true;
 221
 222        svcpu_put(svcpu);
 223}
 224
 225static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
 226{
 227        ulong guest_msr = kvmppc_get_msr(vcpu);
 228        ulong smsr = guest_msr;
 229
 230        /* Guest MSR values */
 231#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 232        smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE |
 233                MSR_TM | MSR_TS_MASK;
 234#else
 235        smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE;
 236#endif
 237        /* Process MSR values */
 238        smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
 239        /* External providers the guest reserved */
 240        smsr |= (guest_msr & vcpu->arch.guest_owned_ext);
 241        /* 64-bit Process MSR values */
 242#ifdef CONFIG_PPC_BOOK3S_64
 243        smsr |= MSR_HV;
 244#endif
 245#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 246        /*
 247         * in guest privileged state, we want to fail all TM transactions.
 248         * So disable MSR TM bit so that all tbegin. will be able to be
 249         * trapped into host.
 250         */
 251        if (!(guest_msr & MSR_PR))
 252                smsr &= ~MSR_TM;
 253#endif
 254        vcpu->arch.shadow_msr = smsr;
 255}
 256
 257/* Copy data touched by real-mode code from shadow vcpu back to vcpu */
 258void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu)
 259{
 260        struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
 261#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 262        ulong old_msr;
 263#endif
 264
 265        /*
 266         * Maybe we were already preempted and synced the svcpu from
 267         * our preempt notifiers. Don't bother touching this svcpu then.
 268         */
 269        if (!svcpu->in_use)
 270                goto out;
 271
 272        vcpu->arch.regs.gpr[0] = svcpu->gpr[0];
 273        vcpu->arch.regs.gpr[1] = svcpu->gpr[1];
 274        vcpu->arch.regs.gpr[2] = svcpu->gpr[2];
 275        vcpu->arch.regs.gpr[3] = svcpu->gpr[3];
 276        vcpu->arch.regs.gpr[4] = svcpu->gpr[4];
 277        vcpu->arch.regs.gpr[5] = svcpu->gpr[5];
 278        vcpu->arch.regs.gpr[6] = svcpu->gpr[6];
 279        vcpu->arch.regs.gpr[7] = svcpu->gpr[7];
 280        vcpu->arch.regs.gpr[8] = svcpu->gpr[8];
 281        vcpu->arch.regs.gpr[9] = svcpu->gpr[9];
 282        vcpu->arch.regs.gpr[10] = svcpu->gpr[10];
 283        vcpu->arch.regs.gpr[11] = svcpu->gpr[11];
 284        vcpu->arch.regs.gpr[12] = svcpu->gpr[12];
 285        vcpu->arch.regs.gpr[13] = svcpu->gpr[13];
 286        vcpu->arch.regs.ccr  = svcpu->cr;
 287        vcpu->arch.regs.xer = svcpu->xer;
 288        vcpu->arch.regs.ctr = svcpu->ctr;
 289        vcpu->arch.regs.link  = svcpu->lr;
 290        vcpu->arch.regs.nip  = svcpu->pc;
 291        vcpu->arch.shadow_srr1 = svcpu->shadow_srr1;
 292        vcpu->arch.fault_dar   = svcpu->fault_dar;
 293        vcpu->arch.fault_dsisr = svcpu->fault_dsisr;
 294        vcpu->arch.last_inst   = svcpu->last_inst;
 295#ifdef CONFIG_PPC_BOOK3S_64
 296        vcpu->arch.shadow_fscr = svcpu->shadow_fscr;
 297#endif
 298        /*
 299         * Update purr and spurr using time base on exit.
 300         */
 301        vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb;
 302        vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb;
 303        to_book3s(vcpu)->vtb += get_vtb() - vcpu->arch.entry_vtb;
 304        if (cpu_has_feature(CPU_FTR_ARCH_207S))
 305                vcpu->arch.ic += mfspr(SPRN_IC) - vcpu->arch.entry_ic;
 306
 307#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 308        /*
 309         * Unlike other MSR bits, MSR[TS]bits can be changed at guest without
 310         * notifying host:
 311         *  modified by unprivileged instructions like "tbegin"/"tend"/
 312         * "tresume"/"tsuspend" in PR KVM guest.
 313         *
 314         * It is necessary to sync here to calculate a correct shadow_msr.
 315         *
 316         * privileged guest's tbegin will be failed at present. So we
 317         * only take care of problem state guest.
 318         */
 319        old_msr = kvmppc_get_msr(vcpu);
 320        if (unlikely((old_msr & MSR_PR) &&
 321                (vcpu->arch.shadow_srr1 & (MSR_TS_MASK)) !=
 322                                (old_msr & (MSR_TS_MASK)))) {
 323                old_msr &= ~(MSR_TS_MASK);
 324                old_msr |= (vcpu->arch.shadow_srr1 & (MSR_TS_MASK));
 325                kvmppc_set_msr_fast(vcpu, old_msr);
 326                kvmppc_recalc_shadow_msr(vcpu);
 327        }
 328#endif
 329
 330        svcpu->in_use = false;
 331
 332out:
 333        svcpu_put(svcpu);
 334}
 335
 336#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 337void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu)
 338{
 339        tm_enable();
 340        vcpu->arch.tfhar = mfspr(SPRN_TFHAR);
 341        vcpu->arch.texasr = mfspr(SPRN_TEXASR);
 342        vcpu->arch.tfiar = mfspr(SPRN_TFIAR);
 343        tm_disable();
 344}
 345
 346void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu)
 347{
 348        tm_enable();
 349        mtspr(SPRN_TFHAR, vcpu->arch.tfhar);
 350        mtspr(SPRN_TEXASR, vcpu->arch.texasr);
 351        mtspr(SPRN_TFIAR, vcpu->arch.tfiar);
 352        tm_disable();
 353}
 354
 355/* loadup math bits which is enabled at kvmppc_get_msr() but not enabled at
 356 * hardware.
 357 */
 358static void kvmppc_handle_lost_math_exts(struct kvm_vcpu *vcpu)
 359{
 360        ulong exit_nr;
 361        ulong ext_diff = (kvmppc_get_msr(vcpu) & ~vcpu->arch.guest_owned_ext) &
 362                (MSR_FP | MSR_VEC | MSR_VSX);
 363
 364        if (!ext_diff)
 365                return;
 366
 367        if (ext_diff == MSR_FP)
 368                exit_nr = BOOK3S_INTERRUPT_FP_UNAVAIL;
 369        else if (ext_diff == MSR_VEC)
 370                exit_nr = BOOK3S_INTERRUPT_ALTIVEC;
 371        else
 372                exit_nr = BOOK3S_INTERRUPT_VSX;
 373
 374        kvmppc_handle_ext(vcpu, exit_nr, ext_diff);
 375}
 376
 377void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu)
 378{
 379        if (!(MSR_TM_ACTIVE(kvmppc_get_msr(vcpu)))) {
 380                kvmppc_save_tm_sprs(vcpu);
 381                return;
 382        }
 383
 384        kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
 385        kvmppc_giveup_ext(vcpu, MSR_VSX);
 386
 387        preempt_disable();
 388        _kvmppc_save_tm_pr(vcpu, mfmsr());
 389        preempt_enable();
 390}
 391
 392void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu)
 393{
 394        if (!MSR_TM_ACTIVE(kvmppc_get_msr(vcpu))) {
 395                kvmppc_restore_tm_sprs(vcpu);
 396                if (kvmppc_get_msr(vcpu) & MSR_TM) {
 397                        kvmppc_handle_lost_math_exts(vcpu);
 398                        if (vcpu->arch.fscr & FSCR_TAR)
 399                                kvmppc_handle_fac(vcpu, FSCR_TAR_LG);
 400                }
 401                return;
 402        }
 403
 404        preempt_disable();
 405        _kvmppc_restore_tm_pr(vcpu, kvmppc_get_msr(vcpu));
 406        preempt_enable();
 407
 408        if (kvmppc_get_msr(vcpu) & MSR_TM) {
 409                kvmppc_handle_lost_math_exts(vcpu);
 410                if (vcpu->arch.fscr & FSCR_TAR)
 411                        kvmppc_handle_fac(vcpu, FSCR_TAR_LG);
 412        }
 413}
 414#endif
 415
 416static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
 417{
 418        int r = 1; /* Indicate we want to get back into the guest */
 419
 420        /* We misuse TLB_FLUSH to indicate that we want to clear
 421           all shadow cache entries */
 422        if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
 423                kvmppc_mmu_pte_flush(vcpu, 0, 0);
 424
 425        return r;
 426}
 427
 428/************* MMU Notifiers *************/
 429static bool do_kvm_unmap_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 430{
 431        long i;
 432        struct kvm_vcpu *vcpu;
 433
 434        kvm_for_each_vcpu(i, vcpu, kvm)
 435                kvmppc_mmu_pte_pflush(vcpu, range->start << PAGE_SHIFT,
 436                                      range->end << PAGE_SHIFT);
 437
 438        return false;
 439}
 440
 441static bool kvm_unmap_gfn_range_pr(struct kvm *kvm, struct kvm_gfn_range *range)
 442{
 443        return do_kvm_unmap_gfn(kvm, range);
 444}
 445
 446static bool kvm_age_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range)
 447{
 448        /* XXX could be more clever ;) */
 449        return false;
 450}
 451
 452static bool kvm_test_age_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range)
 453{
 454        /* XXX could be more clever ;) */
 455        return false;
 456}
 457
 458static bool kvm_set_spte_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range)
 459{
 460        /* The page will get remapped properly on its next fault */
 461        return do_kvm_unmap_gfn(kvm, range);
 462}
 463
 464/*****************************************/
 465
 466static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
 467{
 468        ulong old_msr;
 469
 470        /* For PAPR guest, make sure MSR reflects guest mode */
 471        if (vcpu->arch.papr_enabled)
 472                msr = (msr & ~MSR_HV) | MSR_ME;
 473
 474#ifdef EXIT_DEBUG
 475        printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
 476#endif
 477
 478#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 479        /* We should never target guest MSR to TS=10 && PR=0,
 480         * since we always fail transaction for guest privilege
 481         * state.
 482         */
 483        if (!(msr & MSR_PR) && MSR_TM_TRANSACTIONAL(msr))
 484                kvmppc_emulate_tabort(vcpu,
 485                        TM_CAUSE_KVM_FAC_UNAV | TM_CAUSE_PERSISTENT);
 486#endif
 487
 488        old_msr = kvmppc_get_msr(vcpu);
 489        msr &= to_book3s(vcpu)->msr_mask;
 490        kvmppc_set_msr_fast(vcpu, msr);
 491        kvmppc_recalc_shadow_msr(vcpu);
 492
 493        if (msr & MSR_POW) {
 494                if (!vcpu->arch.pending_exceptions) {
 495                        kvm_vcpu_block(vcpu);
 496                        kvm_clear_request(KVM_REQ_UNHALT, vcpu);
 497                        vcpu->stat.generic.halt_wakeup++;
 498
 499                        /* Unset POW bit after we woke up */
 500                        msr &= ~MSR_POW;
 501                        kvmppc_set_msr_fast(vcpu, msr);
 502                }
 503        }
 504
 505        if (kvmppc_is_split_real(vcpu))
 506                kvmppc_fixup_split_real(vcpu);
 507        else
 508                kvmppc_unfixup_split_real(vcpu);
 509
 510        if ((kvmppc_get_msr(vcpu) & (MSR_PR|MSR_IR|MSR_DR)) !=
 511                   (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
 512                kvmppc_mmu_flush_segments(vcpu);
 513                kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
 514
 515                /* Preload magic page segment when in kernel mode */
 516                if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) {
 517                        struct kvm_vcpu_arch *a = &vcpu->arch;
 518
 519                        if (msr & MSR_DR)
 520                                kvmppc_mmu_map_segment(vcpu, a->magic_page_ea);
 521                        else
 522                                kvmppc_mmu_map_segment(vcpu, a->magic_page_pa);
 523                }
 524        }
 525
 526        /*
 527         * When switching from 32 to 64-bit, we may have a stale 32-bit
 528         * magic page around, we need to flush it. Typically 32-bit magic
 529         * page will be instantiated when calling into RTAS. Note: We
 530         * assume that such transition only happens while in kernel mode,
 531         * ie, we never transition from user 32-bit to kernel 64-bit with
 532         * a 32-bit magic page around.
 533         */
 534        if (vcpu->arch.magic_page_pa &&
 535            !(old_msr & MSR_PR) && !(old_msr & MSR_SF) && (msr & MSR_SF)) {
 536                /* going from RTAS to normal kernel code */
 537                kvmppc_mmu_pte_flush(vcpu, (uint32_t)vcpu->arch.magic_page_pa,
 538                                     ~0xFFFUL);
 539        }
 540
 541        /* Preload FPU if it's enabled */
 542        if (kvmppc_get_msr(vcpu) & MSR_FP)
 543                kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
 544
 545#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 546        if (kvmppc_get_msr(vcpu) & MSR_TM)
 547                kvmppc_handle_lost_math_exts(vcpu);
 548#endif
 549}
 550
 551static void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
 552{
 553        u32 host_pvr;
 554
 555        vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB;
 556        vcpu->arch.pvr = pvr;
 557#ifdef CONFIG_PPC_BOOK3S_64
 558        if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
 559                kvmppc_mmu_book3s_64_init(vcpu);
 560                if (!to_book3s(vcpu)->hior_explicit)
 561                        to_book3s(vcpu)->hior = 0xfff00000;
 562                to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
 563                vcpu->arch.cpu_type = KVM_CPU_3S_64;
 564        } else
 565#endif
 566        {
 567                kvmppc_mmu_book3s_32_init(vcpu);
 568                if (!to_book3s(vcpu)->hior_explicit)
 569                        to_book3s(vcpu)->hior = 0;
 570                to_book3s(vcpu)->msr_mask = 0xffffffffULL;
 571                vcpu->arch.cpu_type = KVM_CPU_3S_32;
 572        }
 573
 574        kvmppc_sanity_check(vcpu);
 575
 576        /* If we are in hypervisor level on 970, we can tell the CPU to
 577         * treat DCBZ as 32 bytes store */
 578        vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32;
 579        if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) &&
 580            !strcmp(cur_cpu_spec->platform, "ppc970"))
 581                vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
 582
 583        /* Cell performs badly if MSR_FEx are set. So let's hope nobody
 584           really needs them in a VM on Cell and force disable them. */
 585        if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
 586                to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
 587
 588        /*
 589         * If they're asking for POWER6 or later, set the flag
 590         * indicating that we can do multiple large page sizes
 591         * and 1TB segments.
 592         * Also set the flag that indicates that tlbie has the large
 593         * page bit in the RB operand instead of the instruction.
 594         */
 595        switch (PVR_VER(pvr)) {
 596        case PVR_POWER6:
 597        case PVR_POWER7:
 598        case PVR_POWER7p:
 599        case PVR_POWER8:
 600        case PVR_POWER8E:
 601        case PVR_POWER8NVL:
 602        case PVR_POWER9:
 603                vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE |
 604                        BOOK3S_HFLAG_NEW_TLBIE;
 605                break;
 606        }
 607
 608#ifdef CONFIG_PPC_BOOK3S_32
 609        /* 32 bit Book3S always has 32 byte dcbz */
 610        vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
 611#endif
 612
 613        /* On some CPUs we can execute paired single operations natively */
 614        asm ( "mfpvr %0" : "=r"(host_pvr));
 615        switch (host_pvr) {
 616        case 0x00080200:        /* lonestar 2.0 */
 617        case 0x00088202:        /* lonestar 2.2 */
 618        case 0x70000100:        /* gekko 1.0 */
 619        case 0x00080100:        /* gekko 2.0 */
 620        case 0x00083203:        /* gekko 2.3a */
 621        case 0x00083213:        /* gekko 2.3b */
 622        case 0x00083204:        /* gekko 2.4 */
 623        case 0x00083214:        /* gekko 2.4e (8SE) - retail HW2 */
 624        case 0x00087200:        /* broadway */
 625                vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS;
 626                /* Enable HID2.PSE - in case we need it later */
 627                mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29));
 628        }
 629}
 630
 631/* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
 632 * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to
 633 * emulate 32 bytes dcbz length.
 634 *
 635 * The Book3s_64 inventors also realized this case and implemented a special bit
 636 * in the HID5 register, which is a hypervisor ressource. Thus we can't use it.
 637 *
 638 * My approach here is to patch the dcbz instruction on executing pages.
 639 */
 640static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
 641{
 642        struct page *hpage;
 643        u64 hpage_offset;
 644        u32 *page;
 645        int i;
 646
 647        hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
 648        if (is_error_page(hpage))
 649                return;
 650
 651        hpage_offset = pte->raddr & ~PAGE_MASK;
 652        hpage_offset &= ~0xFFFULL;
 653        hpage_offset /= 4;
 654
 655        get_page(hpage);
 656        page = kmap_atomic(hpage);
 657
 658        /* patch dcbz into reserved instruction, so we trap */
 659        for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
 660                if ((be32_to_cpu(page[i]) & 0xff0007ff) == INS_DCBZ)
 661                        page[i] &= cpu_to_be32(0xfffffff7);
 662
 663        kunmap_atomic(page);
 664        put_page(hpage);
 665}
 666
 667static bool kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
 668{
 669        ulong mp_pa = vcpu->arch.magic_page_pa;
 670
 671        if (!(kvmppc_get_msr(vcpu) & MSR_SF))
 672                mp_pa = (uint32_t)mp_pa;
 673
 674        gpa &= ~0xFFFULL;
 675        if (unlikely(mp_pa) && unlikely((mp_pa & KVM_PAM) == (gpa & KVM_PAM))) {
 676                return true;
 677        }
 678
 679        return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT);
 680}
 681
 682static int kvmppc_handle_pagefault(struct kvm_vcpu *vcpu,
 683                            ulong eaddr, int vec)
 684{
 685        bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
 686        bool iswrite = false;
 687        int r = RESUME_GUEST;
 688        int relocated;
 689        int page_found = 0;
 690        struct kvmppc_pte pte = { 0 };
 691        bool dr = (kvmppc_get_msr(vcpu) & MSR_DR) ? true : false;
 692        bool ir = (kvmppc_get_msr(vcpu) & MSR_IR) ? true : false;
 693        u64 vsid;
 694
 695        relocated = data ? dr : ir;
 696        if (data && (vcpu->arch.fault_dsisr & DSISR_ISSTORE))
 697                iswrite = true;
 698
 699        /* Resolve real address if translation turned on */
 700        if (relocated) {
 701                page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data, iswrite);
 702        } else {
 703                pte.may_execute = true;
 704                pte.may_read = true;
 705                pte.may_write = true;
 706                pte.raddr = eaddr & KVM_PAM;
 707                pte.eaddr = eaddr;
 708                pte.vpage = eaddr >> 12;
 709                pte.page_size = MMU_PAGE_64K;
 710                pte.wimg = HPTE_R_M;
 711        }
 712
 713        switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
 714        case 0:
 715                pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
 716                break;
 717        case MSR_DR:
 718                if (!data &&
 719                    (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) &&
 720                    ((pte.raddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS))
 721                        pte.raddr &= ~SPLIT_HACK_MASK;
 722                fallthrough;
 723        case MSR_IR:
 724                vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
 725
 726                if ((kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) == MSR_DR)
 727                        pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
 728                else
 729                        pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
 730                pte.vpage |= vsid;
 731
 732                if (vsid == -1)
 733                        page_found = -EINVAL;
 734                break;
 735        }
 736
 737        if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
 738           (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
 739                /*
 740                 * If we do the dcbz hack, we have to NX on every execution,
 741                 * so we can patch the executing code. This renders our guest
 742                 * NX-less.
 743                 */
 744                pte.may_execute = !data;
 745        }
 746
 747        if (page_found == -ENOENT || page_found == -EPERM) {
 748                /* Page not found in guest PTE entries, or protection fault */
 749                u64 flags;
 750
 751                if (page_found == -EPERM)
 752                        flags = DSISR_PROTFAULT;
 753                else
 754                        flags = DSISR_NOHPTE;
 755                if (data) {
 756                        flags |= vcpu->arch.fault_dsisr & DSISR_ISSTORE;
 757                        kvmppc_core_queue_data_storage(vcpu, eaddr, flags);
 758                } else {
 759                        kvmppc_core_queue_inst_storage(vcpu, flags);
 760                }
 761        } else if (page_found == -EINVAL) {
 762                /* Page not found in guest SLB */
 763                kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
 764                kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
 765        } else if (kvmppc_visible_gpa(vcpu, pte.raddr)) {
 766                if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) {
 767                        /*
 768                         * There is already a host HPTE there, presumably
 769                         * a read-only one for a page the guest thinks
 770                         * is writable, so get rid of it first.
 771                         */
 772                        kvmppc_mmu_unmap_page(vcpu, &pte);
 773                }
 774                /* The guest's PTE is not mapped yet. Map on the host */
 775                if (kvmppc_mmu_map_page(vcpu, &pte, iswrite) == -EIO) {
 776                        /* Exit KVM if mapping failed */
 777                        vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 778                        return RESUME_HOST;
 779                }
 780                if (data)
 781                        vcpu->stat.sp_storage++;
 782                else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
 783                         (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
 784                        kvmppc_patch_dcbz(vcpu, &pte);
 785        } else {
 786                /* MMIO */
 787                vcpu->stat.mmio_exits++;
 788                vcpu->arch.paddr_accessed = pte.raddr;
 789                vcpu->arch.vaddr_accessed = pte.eaddr;
 790                r = kvmppc_emulate_mmio(vcpu);
 791                if ( r == RESUME_HOST_NV )
 792                        r = RESUME_HOST;
 793        }
 794
 795        return r;
 796}
 797
 798/* Give up external provider (FPU, Altivec, VSX) */
 799void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 800{
 801        struct thread_struct *t = &current->thread;
 802
 803        /*
 804         * VSX instructions can access FP and vector registers, so if
 805         * we are giving up VSX, make sure we give up FP and VMX as well.
 806         */
 807        if (msr & MSR_VSX)
 808                msr |= MSR_FP | MSR_VEC;
 809
 810        msr &= vcpu->arch.guest_owned_ext;
 811        if (!msr)
 812                return;
 813
 814#ifdef DEBUG_EXT
 815        printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
 816#endif
 817
 818        if (msr & MSR_FP) {
 819                /*
 820                 * Note that on CPUs with VSX, giveup_fpu stores
 821                 * both the traditional FP registers and the added VSX
 822                 * registers into thread.fp_state.fpr[].
 823                 */
 824                if (t->regs->msr & MSR_FP)
 825                        giveup_fpu(current);
 826                t->fp_save_area = NULL;
 827        }
 828
 829#ifdef CONFIG_ALTIVEC
 830        if (msr & MSR_VEC) {
 831                if (current->thread.regs->msr & MSR_VEC)
 832                        giveup_altivec(current);
 833                t->vr_save_area = NULL;
 834        }
 835#endif
 836
 837        vcpu->arch.guest_owned_ext &= ~(msr | MSR_VSX);
 838        kvmppc_recalc_shadow_msr(vcpu);
 839}
 840
 841/* Give up facility (TAR / EBB / DSCR) */
 842void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac)
 843{
 844#ifdef CONFIG_PPC_BOOK3S_64
 845        if (!(vcpu->arch.shadow_fscr & (1ULL << fac))) {
 846                /* Facility not available to the guest, ignore giveup request*/
 847                return;
 848        }
 849
 850        switch (fac) {
 851        case FSCR_TAR_LG:
 852                vcpu->arch.tar = mfspr(SPRN_TAR);
 853                mtspr(SPRN_TAR, current->thread.tar);
 854                vcpu->arch.shadow_fscr &= ~FSCR_TAR;
 855                break;
 856        }
 857#endif
 858}
 859
 860/* Handle external providers (FPU, Altivec, VSX) */
 861static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 862                             ulong msr)
 863{
 864        struct thread_struct *t = &current->thread;
 865
 866        /* When we have paired singles, we emulate in software */
 867        if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
 868                return RESUME_GUEST;
 869
 870        if (!(kvmppc_get_msr(vcpu) & msr)) {
 871                kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
 872                return RESUME_GUEST;
 873        }
 874
 875        if (msr == MSR_VSX) {
 876                /* No VSX?  Give an illegal instruction interrupt */
 877#ifdef CONFIG_VSX
 878                if (!cpu_has_feature(CPU_FTR_VSX))
 879#endif
 880                {
 881                        kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
 882                        return RESUME_GUEST;
 883                }
 884
 885                /*
 886                 * We have to load up all the FP and VMX registers before
 887                 * we can let the guest use VSX instructions.
 888                 */
 889                msr = MSR_FP | MSR_VEC | MSR_VSX;
 890        }
 891
 892        /* See if we already own all the ext(s) needed */
 893        msr &= ~vcpu->arch.guest_owned_ext;
 894        if (!msr)
 895                return RESUME_GUEST;
 896
 897#ifdef DEBUG_EXT
 898        printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
 899#endif
 900
 901        if (msr & MSR_FP) {
 902                preempt_disable();
 903                enable_kernel_fp();
 904                load_fp_state(&vcpu->arch.fp);
 905                disable_kernel_fp();
 906                t->fp_save_area = &vcpu->arch.fp;
 907                preempt_enable();
 908        }
 909
 910        if (msr & MSR_VEC) {
 911#ifdef CONFIG_ALTIVEC
 912                preempt_disable();
 913                enable_kernel_altivec();
 914                load_vr_state(&vcpu->arch.vr);
 915                disable_kernel_altivec();
 916                t->vr_save_area = &vcpu->arch.vr;
 917                preempt_enable();
 918#endif
 919        }
 920
 921        t->regs->msr |= msr;
 922        vcpu->arch.guest_owned_ext |= msr;
 923        kvmppc_recalc_shadow_msr(vcpu);
 924
 925        return RESUME_GUEST;
 926}
 927
 928/*
 929 * Kernel code using FP or VMX could have flushed guest state to
 930 * the thread_struct; if so, get it back now.
 931 */
 932static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
 933{
 934        unsigned long lost_ext;
 935
 936        lost_ext = vcpu->arch.guest_owned_ext & ~current->thread.regs->msr;
 937        if (!lost_ext)
 938                return;
 939
 940        if (lost_ext & MSR_FP) {
 941                preempt_disable();
 942                enable_kernel_fp();
 943                load_fp_state(&vcpu->arch.fp);
 944                disable_kernel_fp();
 945                preempt_enable();
 946        }
 947#ifdef CONFIG_ALTIVEC
 948        if (lost_ext & MSR_VEC) {
 949                preempt_disable();
 950                enable_kernel_altivec();
 951                load_vr_state(&vcpu->arch.vr);
 952                disable_kernel_altivec();
 953                preempt_enable();
 954        }
 955#endif
 956        current->thread.regs->msr |= lost_ext;
 957}
 958
 959#ifdef CONFIG_PPC_BOOK3S_64
 960
 961void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac)
 962{
 963        /* Inject the Interrupt Cause field and trigger a guest interrupt */
 964        vcpu->arch.fscr &= ~(0xffULL << 56);
 965        vcpu->arch.fscr |= (fac << 56);
 966        kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL);
 967}
 968
 969static void kvmppc_emulate_fac(struct kvm_vcpu *vcpu, ulong fac)
 970{
 971        enum emulation_result er = EMULATE_FAIL;
 972
 973        if (!(kvmppc_get_msr(vcpu) & MSR_PR))
 974                er = kvmppc_emulate_instruction(vcpu);
 975
 976        if ((er != EMULATE_DONE) && (er != EMULATE_AGAIN)) {
 977                /* Couldn't emulate, trigger interrupt in guest */
 978                kvmppc_trigger_fac_interrupt(vcpu, fac);
 979        }
 980}
 981
 982/* Enable facilities (TAR, EBB, DSCR) for the guest */
 983static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac)
 984{
 985        bool guest_fac_enabled;
 986        BUG_ON(!cpu_has_feature(CPU_FTR_ARCH_207S));
 987
 988        /*
 989         * Not every facility is enabled by FSCR bits, check whether the
 990         * guest has this facility enabled at all.
 991         */
 992        switch (fac) {
 993        case FSCR_TAR_LG:
 994        case FSCR_EBB_LG:
 995                guest_fac_enabled = (vcpu->arch.fscr & (1ULL << fac));
 996                break;
 997        case FSCR_TM_LG:
 998                guest_fac_enabled = kvmppc_get_msr(vcpu) & MSR_TM;
 999                break;
1000        default:
1001                guest_fac_enabled = false;
1002                break;
1003        }
1004
1005        if (!guest_fac_enabled) {
1006                /* Facility not enabled by the guest */
1007                kvmppc_trigger_fac_interrupt(vcpu, fac);
1008                return RESUME_GUEST;
1009        }
1010
1011        switch (fac) {
1012        case FSCR_TAR_LG:
1013                /* TAR switching isn't lazy in Linux yet */
1014                current->thread.tar = mfspr(SPRN_TAR);
1015                mtspr(SPRN_TAR, vcpu->arch.tar);
1016                vcpu->arch.shadow_fscr |= FSCR_TAR;
1017                break;
1018        default:
1019                kvmppc_emulate_fac(vcpu, fac);
1020                break;
1021        }
1022
1023#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1024        /* Since we disabled MSR_TM at privilege state, the mfspr instruction
1025         * for TM spr can trigger TM fac unavailable. In this case, the
1026         * emulation is handled by kvmppc_emulate_fac(), which invokes
1027         * kvmppc_emulate_mfspr() finally. But note the mfspr can include
1028         * RT for NV registers. So it need to restore those NV reg to reflect
1029         * the update.
1030         */
1031        if ((fac == FSCR_TM_LG) && !(kvmppc_get_msr(vcpu) & MSR_PR))
1032                return RESUME_GUEST_NV;
1033#endif
1034
1035        return RESUME_GUEST;
1036}
1037
1038void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr)
1039{
1040        if ((vcpu->arch.fscr & FSCR_TAR) && !(fscr & FSCR_TAR)) {
1041                /* TAR got dropped, drop it in shadow too */
1042                kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
1043        } else if (!(vcpu->arch.fscr & FSCR_TAR) && (fscr & FSCR_TAR)) {
1044                vcpu->arch.fscr = fscr;
1045                kvmppc_handle_fac(vcpu, FSCR_TAR_LG);
1046                return;
1047        }
1048
1049        vcpu->arch.fscr = fscr;
1050}
1051#endif
1052
1053static void kvmppc_setup_debug(struct kvm_vcpu *vcpu)
1054{
1055        if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
1056                u64 msr = kvmppc_get_msr(vcpu);
1057
1058                kvmppc_set_msr(vcpu, msr | MSR_SE);
1059        }
1060}
1061
1062static void kvmppc_clear_debug(struct kvm_vcpu *vcpu)
1063{
1064        if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
1065                u64 msr = kvmppc_get_msr(vcpu);
1066
1067                kvmppc_set_msr(vcpu, msr & ~MSR_SE);
1068        }
1069}
1070
1071static int kvmppc_exit_pr_progint(struct kvm_vcpu *vcpu, unsigned int exit_nr)
1072{
1073        enum emulation_result er;
1074        ulong flags;
1075        u32 last_inst;
1076        int emul, r;
1077
1078        /*
1079         * shadow_srr1 only contains valid flags if we came here via a program
1080         * exception. The other exceptions (emulation assist, FP unavailable,
1081         * etc.) do not provide flags in SRR1, so use an illegal-instruction
1082         * exception when injecting a program interrupt into the guest.
1083         */
1084        if (exit_nr == BOOK3S_INTERRUPT_PROGRAM)
1085                flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
1086        else
1087                flags = SRR1_PROGILL;
1088
1089        emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
1090        if (emul != EMULATE_DONE)
1091                return RESUME_GUEST;
1092
1093        if (kvmppc_get_msr(vcpu) & MSR_PR) {
1094#ifdef EXIT_DEBUG
1095                pr_info("Userspace triggered 0x700 exception at\n 0x%lx (0x%x)\n",
1096                        kvmppc_get_pc(vcpu), last_inst);
1097#endif
1098                if ((last_inst & 0xff0007ff) != (INS_DCBZ & 0xfffffff7)) {
1099                        kvmppc_core_queue_program(vcpu, flags);
1100                        return RESUME_GUEST;
1101                }
1102        }
1103
1104        vcpu->stat.emulated_inst_exits++;
1105        er = kvmppc_emulate_instruction(vcpu);
1106        switch (er) {
1107        case EMULATE_DONE:
1108                r = RESUME_GUEST_NV;
1109                break;
1110        case EMULATE_AGAIN:
1111                r = RESUME_GUEST;
1112                break;
1113        case EMULATE_FAIL:
1114                pr_crit("%s: emulation at %lx failed (%08x)\n",
1115                        __func__, kvmppc_get_pc(vcpu), last_inst);
1116                kvmppc_core_queue_program(vcpu, flags);
1117                r = RESUME_GUEST;
1118                break;
1119        case EMULATE_DO_MMIO:
1120                vcpu->run->exit_reason = KVM_EXIT_MMIO;
1121                r = RESUME_HOST_NV;
1122                break;
1123        case EMULATE_EXIT_USER:
1124                r = RESUME_HOST_NV;
1125                break;
1126        default:
1127                BUG();
1128        }
1129
1130        return r;
1131}
1132
1133int kvmppc_handle_exit_pr(struct kvm_vcpu *vcpu, unsigned int exit_nr)
1134{
1135        struct kvm_run *run = vcpu->run;
1136        int r = RESUME_HOST;
1137        int s;
1138
1139        vcpu->stat.sum_exits++;
1140
1141        run->exit_reason = KVM_EXIT_UNKNOWN;
1142        run->ready_for_interrupt_injection = 1;
1143
1144        /* We get here with MSR.EE=1 */
1145
1146        trace_kvm_exit(exit_nr, vcpu);
1147        guest_exit();
1148
1149        switch (exit_nr) {
1150        case BOOK3S_INTERRUPT_INST_STORAGE:
1151        {
1152                ulong shadow_srr1 = vcpu->arch.shadow_srr1;
1153                vcpu->stat.pf_instruc++;
1154
1155                if (kvmppc_is_split_real(vcpu))
1156                        kvmppc_fixup_split_real(vcpu);
1157
1158#ifdef CONFIG_PPC_BOOK3S_32
1159                /* We set segments as unused segments when invalidating them. So
1160                 * treat the respective fault as segment fault. */
1161                {
1162                        struct kvmppc_book3s_shadow_vcpu *svcpu;
1163                        u32 sr;
1164
1165                        svcpu = svcpu_get(vcpu);
1166                        sr = svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT];
1167                        svcpu_put(svcpu);
1168                        if (sr == SR_INVALID) {
1169                                kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
1170                                r = RESUME_GUEST;
1171                                break;
1172                        }
1173                }
1174#endif
1175
1176                /* only care about PTEG not found errors, but leave NX alone */
1177                if (shadow_srr1 & 0x40000000) {
1178                        int idx = srcu_read_lock(&vcpu->kvm->srcu);
1179                        r = kvmppc_handle_pagefault(vcpu, kvmppc_get_pc(vcpu), exit_nr);
1180                        srcu_read_unlock(&vcpu->kvm->srcu, idx);
1181                        vcpu->stat.sp_instruc++;
1182                } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
1183                          (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
1184                        /*
1185                         * XXX If we do the dcbz hack we use the NX bit to flush&patch the page,
1186                         *     so we can't use the NX bit inside the guest. Let's cross our fingers,
1187                         *     that no guest that needs the dcbz hack does NX.
1188                         */
1189                        kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
1190                        r = RESUME_GUEST;
1191                } else {
1192                        kvmppc_core_queue_inst_storage(vcpu,
1193                                                shadow_srr1 & 0x58000000);
1194                        r = RESUME_GUEST;
1195                }
1196                break;
1197        }
1198        case BOOK3S_INTERRUPT_DATA_STORAGE:
1199        {
1200                ulong dar = kvmppc_get_fault_dar(vcpu);
1201                u32 fault_dsisr = vcpu->arch.fault_dsisr;
1202                vcpu->stat.pf_storage++;
1203
1204#ifdef CONFIG_PPC_BOOK3S_32
1205                /* We set segments as unused segments when invalidating them. So
1206                 * treat the respective fault as segment fault. */
1207                {
1208                        struct kvmppc_book3s_shadow_vcpu *svcpu;
1209                        u32 sr;
1210
1211                        svcpu = svcpu_get(vcpu);
1212                        sr = svcpu->sr[dar >> SID_SHIFT];
1213                        svcpu_put(svcpu);
1214                        if (sr == SR_INVALID) {
1215                                kvmppc_mmu_map_segment(vcpu, dar);
1216                                r = RESUME_GUEST;
1217                                break;
1218                        }
1219                }
1220#endif
1221
1222                /*
1223                 * We need to handle missing shadow PTEs, and
1224                 * protection faults due to us mapping a page read-only
1225                 * when the guest thinks it is writable.
1226                 */
1227                if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) {
1228                        int idx = srcu_read_lock(&vcpu->kvm->srcu);
1229                        r = kvmppc_handle_pagefault(vcpu, dar, exit_nr);
1230                        srcu_read_unlock(&vcpu->kvm->srcu, idx);
1231                } else {
1232                        kvmppc_core_queue_data_storage(vcpu, dar, fault_dsisr);
1233                        r = RESUME_GUEST;
1234                }
1235                break;
1236        }
1237        case BOOK3S_INTERRUPT_DATA_SEGMENT:
1238                if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
1239                        kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
1240                        kvmppc_book3s_queue_irqprio(vcpu,
1241                                BOOK3S_INTERRUPT_DATA_SEGMENT);
1242                }
1243                r = RESUME_GUEST;
1244                break;
1245        case BOOK3S_INTERRUPT_INST_SEGMENT:
1246                if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) {
1247                        kvmppc_book3s_queue_irqprio(vcpu,
1248                                BOOK3S_INTERRUPT_INST_SEGMENT);
1249                }
1250                r = RESUME_GUEST;
1251                break;
1252        /* We're good on these - the host merely wanted to get our attention */
1253        case BOOK3S_INTERRUPT_DECREMENTER:
1254        case BOOK3S_INTERRUPT_HV_DECREMENTER:
1255        case BOOK3S_INTERRUPT_DOORBELL:
1256        case BOOK3S_INTERRUPT_H_DOORBELL:
1257                vcpu->stat.dec_exits++;
1258                r = RESUME_GUEST;
1259                break;
1260        case BOOK3S_INTERRUPT_EXTERNAL:
1261        case BOOK3S_INTERRUPT_EXTERNAL_HV:
1262        case BOOK3S_INTERRUPT_H_VIRT:
1263                vcpu->stat.ext_intr_exits++;
1264                r = RESUME_GUEST;
1265                break;
1266        case BOOK3S_INTERRUPT_HMI:
1267        case BOOK3S_INTERRUPT_PERFMON:
1268        case BOOK3S_INTERRUPT_SYSTEM_RESET:
1269                r = RESUME_GUEST;
1270                break;
1271        case BOOK3S_INTERRUPT_PROGRAM:
1272        case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
1273                r = kvmppc_exit_pr_progint(vcpu, exit_nr);
1274                break;
1275        case BOOK3S_INTERRUPT_SYSCALL:
1276        {
1277                u32 last_sc;
1278                int emul;
1279
1280                /* Get last sc for papr */
1281                if (vcpu->arch.papr_enabled) {
1282                        /* The sc instuction points SRR0 to the next inst */
1283                        emul = kvmppc_get_last_inst(vcpu, INST_SC, &last_sc);
1284                        if (emul != EMULATE_DONE) {
1285                                kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) - 4);
1286                                r = RESUME_GUEST;
1287                                break;
1288                        }
1289                }
1290
1291                if (vcpu->arch.papr_enabled &&
1292                    (last_sc == 0x44000022) &&
1293                    !(kvmppc_get_msr(vcpu) & MSR_PR)) {
1294                        /* SC 1 papr hypercalls */
1295                        ulong cmd = kvmppc_get_gpr(vcpu, 3);
1296                        int i;
1297
1298#ifdef CONFIG_PPC_BOOK3S_64
1299                        if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {
1300                                r = RESUME_GUEST;
1301                                break;
1302                        }
1303#endif
1304
1305                        run->papr_hcall.nr = cmd;
1306                        for (i = 0; i < 9; ++i) {
1307                                ulong gpr = kvmppc_get_gpr(vcpu, 4 + i);
1308                                run->papr_hcall.args[i] = gpr;
1309                        }
1310                        run->exit_reason = KVM_EXIT_PAPR_HCALL;
1311                        vcpu->arch.hcall_needed = 1;
1312                        r = RESUME_HOST;
1313                } else if (vcpu->arch.osi_enabled &&
1314                    (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
1315                    (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
1316                        /* MOL hypercalls */
1317                        u64 *gprs = run->osi.gprs;
1318                        int i;
1319
1320                        run->exit_reason = KVM_EXIT_OSI;
1321                        for (i = 0; i < 32; i++)
1322                                gprs[i] = kvmppc_get_gpr(vcpu, i);
1323                        vcpu->arch.osi_needed = 1;
1324                        r = RESUME_HOST_NV;
1325                } else if (!(kvmppc_get_msr(vcpu) & MSR_PR) &&
1326                    (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
1327                        /* KVM PV hypercalls */
1328                        kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
1329                        r = RESUME_GUEST;
1330                } else {
1331                        /* Guest syscalls */
1332                        vcpu->stat.syscall_exits++;
1333                        kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
1334                        r = RESUME_GUEST;
1335                }
1336                break;
1337        }
1338        case BOOK3S_INTERRUPT_FP_UNAVAIL:
1339        case BOOK3S_INTERRUPT_ALTIVEC:
1340        case BOOK3S_INTERRUPT_VSX:
1341        {
1342                int ext_msr = 0;
1343                int emul;
1344                u32 last_inst;
1345
1346                if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) {
1347                        /* Do paired single instruction emulation */
1348                        emul = kvmppc_get_last_inst(vcpu, INST_GENERIC,
1349                                                    &last_inst);
1350                        if (emul == EMULATE_DONE)
1351                                r = kvmppc_exit_pr_progint(vcpu, exit_nr);
1352                        else
1353                                r = RESUME_GUEST;
1354
1355                        break;
1356                }
1357
1358                /* Enable external provider */
1359                switch (exit_nr) {
1360                case BOOK3S_INTERRUPT_FP_UNAVAIL:
1361                        ext_msr = MSR_FP;
1362                        break;
1363
1364                case BOOK3S_INTERRUPT_ALTIVEC:
1365                        ext_msr = MSR_VEC;
1366                        break;
1367
1368                case BOOK3S_INTERRUPT_VSX:
1369                        ext_msr = MSR_VSX;
1370                        break;
1371                }
1372
1373                r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr);
1374                break;
1375        }
1376        case BOOK3S_INTERRUPT_ALIGNMENT:
1377        {
1378                u32 last_inst;
1379                int emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
1380
1381                if (emul == EMULATE_DONE) {
1382                        u32 dsisr;
1383                        u64 dar;
1384
1385                        dsisr = kvmppc_alignment_dsisr(vcpu, last_inst);
1386                        dar = kvmppc_alignment_dar(vcpu, last_inst);
1387
1388                        kvmppc_set_dsisr(vcpu, dsisr);
1389                        kvmppc_set_dar(vcpu, dar);
1390
1391                        kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
1392                }
1393                r = RESUME_GUEST;
1394                break;
1395        }
1396#ifdef CONFIG_PPC_BOOK3S_64
1397        case BOOK3S_INTERRUPT_FAC_UNAVAIL:
1398                r = kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56);
1399                break;
1400#endif
1401        case BOOK3S_INTERRUPT_MACHINE_CHECK:
1402                kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
1403                r = RESUME_GUEST;
1404                break;
1405        case BOOK3S_INTERRUPT_TRACE:
1406                if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
1407                        run->exit_reason = KVM_EXIT_DEBUG;
1408                        r = RESUME_HOST;
1409                } else {
1410                        kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
1411                        r = RESUME_GUEST;
1412                }
1413                break;
1414        default:
1415        {
1416                ulong shadow_srr1 = vcpu->arch.shadow_srr1;
1417                /* Ugh - bork here! What did we get? */
1418                printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
1419                        exit_nr, kvmppc_get_pc(vcpu), shadow_srr1);
1420                r = RESUME_HOST;
1421                BUG();
1422                break;
1423        }
1424        }
1425
1426        if (!(r & RESUME_HOST)) {
1427                /* To avoid clobbering exit_reason, only check for signals if
1428                 * we aren't already exiting to userspace for some other
1429                 * reason. */
1430
1431                /*
1432                 * Interrupts could be timers for the guest which we have to
1433                 * inject again, so let's postpone them until we're in the guest
1434                 * and if we really did time things so badly, then we just exit
1435                 * again due to a host external interrupt.
1436                 */
1437                s = kvmppc_prepare_to_enter(vcpu);
1438                if (s <= 0)
1439                        r = s;
1440                else {
1441                        /* interrupts now hard-disabled */
1442                        kvmppc_fix_ee_before_entry();
1443                }
1444
1445                kvmppc_handle_lost_ext(vcpu);
1446        }
1447
1448        trace_kvm_book3s_reenter(r, vcpu);
1449
1450        return r;
1451}
1452
1453static int kvm_arch_vcpu_ioctl_get_sregs_pr(struct kvm_vcpu *vcpu,
1454                                            struct kvm_sregs *sregs)
1455{
1456        struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
1457        int i;
1458
1459        sregs->pvr = vcpu->arch.pvr;
1460
1461        sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1;
1462        if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
1463                for (i = 0; i < 64; i++) {
1464                        sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige | i;
1465                        sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
1466                }
1467        } else {
1468                for (i = 0; i < 16; i++)
1469                        sregs->u.s.ppc32.sr[i] = kvmppc_get_sr(vcpu, i);
1470
1471                for (i = 0; i < 8; i++) {
1472                        sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
1473                        sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
1474                }
1475        }
1476
1477        return 0;
1478}
1479
1480static int kvm_arch_vcpu_ioctl_set_sregs_pr(struct kvm_vcpu *vcpu,
1481                                            struct kvm_sregs *sregs)
1482{
1483        struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
1484        int i;
1485
1486        kvmppc_set_pvr_pr(vcpu, sregs->pvr);
1487
1488        vcpu3s->sdr1 = sregs->u.s.sdr1;
1489#ifdef CONFIG_PPC_BOOK3S_64
1490        if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
1491                /* Flush all SLB entries */
1492                vcpu->arch.mmu.slbmte(vcpu, 0, 0);
1493                vcpu->arch.mmu.slbia(vcpu);
1494
1495                for (i = 0; i < 64; i++) {
1496                        u64 rb = sregs->u.s.ppc64.slb[i].slbe;
1497                        u64 rs = sregs->u.s.ppc64.slb[i].slbv;
1498
1499                        if (rb & SLB_ESID_V)
1500                                vcpu->arch.mmu.slbmte(vcpu, rs, rb);
1501                }
1502        } else
1503#endif
1504        {
1505                for (i = 0; i < 16; i++) {
1506                        vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]);
1507                }
1508                for (i = 0; i < 8; i++) {
1509                        kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false,
1510                                       (u32)sregs->u.s.ppc32.ibat[i]);
1511                        kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true,
1512                                       (u32)(sregs->u.s.ppc32.ibat[i] >> 32));
1513                        kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false,
1514                                       (u32)sregs->u.s.ppc32.dbat[i]);
1515                        kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true,
1516                                       (u32)(sregs->u.s.ppc32.dbat[i] >> 32));
1517                }
1518        }
1519
1520        /* Flush the MMU after messing with the segments */
1521        kvmppc_mmu_pte_flush(vcpu, 0, 0);
1522
1523        return 0;
1524}
1525
1526static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
1527                                 union kvmppc_one_reg *val)
1528{
1529        int r = 0;
1530
1531        switch (id) {
1532        case KVM_REG_PPC_DEBUG_INST:
1533                *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
1534                break;
1535        case KVM_REG_PPC_HIOR:
1536                *val = get_reg_val(id, to_book3s(vcpu)->hior);
1537                break;
1538        case KVM_REG_PPC_VTB:
1539                *val = get_reg_val(id, to_book3s(vcpu)->vtb);
1540                break;
1541        case KVM_REG_PPC_LPCR:
1542        case KVM_REG_PPC_LPCR_64:
1543                /*
1544                 * We are only interested in the LPCR_ILE bit
1545                 */
1546                if (vcpu->arch.intr_msr & MSR_LE)
1547                        *val = get_reg_val(id, LPCR_ILE);
1548                else
1549                        *val = get_reg_val(id, 0);
1550                break;
1551#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1552        case KVM_REG_PPC_TFHAR:
1553                *val = get_reg_val(id, vcpu->arch.tfhar);
1554                break;
1555        case KVM_REG_PPC_TFIAR:
1556                *val = get_reg_val(id, vcpu->arch.tfiar);
1557                break;
1558        case KVM_REG_PPC_TEXASR:
1559                *val = get_reg_val(id, vcpu->arch.texasr);
1560                break;
1561        case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
1562                *val = get_reg_val(id,
1563                                vcpu->arch.gpr_tm[id-KVM_REG_PPC_TM_GPR0]);
1564                break;
1565        case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
1566        {
1567                int i, j;
1568
1569                i = id - KVM_REG_PPC_TM_VSR0;
1570                if (i < 32)
1571                        for (j = 0; j < TS_FPRWIDTH; j++)
1572                                val->vsxval[j] = vcpu->arch.fp_tm.fpr[i][j];
1573                else {
1574                        if (cpu_has_feature(CPU_FTR_ALTIVEC))
1575                                val->vval = vcpu->arch.vr_tm.vr[i-32];
1576                        else
1577                                r = -ENXIO;
1578                }
1579                break;
1580        }
1581        case KVM_REG_PPC_TM_CR:
1582                *val = get_reg_val(id, vcpu->arch.cr_tm);
1583                break;
1584        case KVM_REG_PPC_TM_XER:
1585                *val = get_reg_val(id, vcpu->arch.xer_tm);
1586                break;
1587        case KVM_REG_PPC_TM_LR:
1588                *val = get_reg_val(id, vcpu->arch.lr_tm);
1589                break;
1590        case KVM_REG_PPC_TM_CTR:
1591                *val = get_reg_val(id, vcpu->arch.ctr_tm);
1592                break;
1593        case KVM_REG_PPC_TM_FPSCR:
1594                *val = get_reg_val(id, vcpu->arch.fp_tm.fpscr);
1595                break;
1596        case KVM_REG_PPC_TM_AMR:
1597                *val = get_reg_val(id, vcpu->arch.amr_tm);
1598                break;
1599        case KVM_REG_PPC_TM_PPR:
1600                *val = get_reg_val(id, vcpu->arch.ppr_tm);
1601                break;
1602        case KVM_REG_PPC_TM_VRSAVE:
1603                *val = get_reg_val(id, vcpu->arch.vrsave_tm);
1604                break;
1605        case KVM_REG_PPC_TM_VSCR:
1606                if (cpu_has_feature(CPU_FTR_ALTIVEC))
1607                        *val = get_reg_val(id, vcpu->arch.vr_tm.vscr.u[3]);
1608                else
1609                        r = -ENXIO;
1610                break;
1611        case KVM_REG_PPC_TM_DSCR:
1612                *val = get_reg_val(id, vcpu->arch.dscr_tm);
1613                break;
1614        case KVM_REG_PPC_TM_TAR:
1615                *val = get_reg_val(id, vcpu->arch.tar_tm);
1616                break;
1617#endif
1618        default:
1619                r = -EINVAL;
1620                break;
1621        }
1622
1623        return r;
1624}
1625
1626static void kvmppc_set_lpcr_pr(struct kvm_vcpu *vcpu, u64 new_lpcr)
1627{
1628        if (new_lpcr & LPCR_ILE)
1629                vcpu->arch.intr_msr |= MSR_LE;
1630        else
1631                vcpu->arch.intr_msr &= ~MSR_LE;
1632}
1633
1634static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
1635                                 union kvmppc_one_reg *val)
1636{
1637        int r = 0;
1638
1639        switch (id) {
1640        case KVM_REG_PPC_HIOR:
1641                to_book3s(vcpu)->hior = set_reg_val(id, *val);
1642                to_book3s(vcpu)->hior_explicit = true;
1643                break;
1644        case KVM_REG_PPC_VTB:
1645                to_book3s(vcpu)->vtb = set_reg_val(id, *val);
1646                break;
1647        case KVM_REG_PPC_LPCR:
1648        case KVM_REG_PPC_LPCR_64:
1649                kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val));
1650                break;
1651#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1652        case KVM_REG_PPC_TFHAR:
1653                vcpu->arch.tfhar = set_reg_val(id, *val);
1654                break;
1655        case KVM_REG_PPC_TFIAR:
1656                vcpu->arch.tfiar = set_reg_val(id, *val);
1657                break;
1658        case KVM_REG_PPC_TEXASR:
1659                vcpu->arch.texasr = set_reg_val(id, *val);
1660                break;
1661        case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
1662                vcpu->arch.gpr_tm[id - KVM_REG_PPC_TM_GPR0] =
1663                        set_reg_val(id, *val);
1664                break;
1665        case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
1666        {
1667                int i, j;
1668
1669                i = id - KVM_REG_PPC_TM_VSR0;
1670                if (i < 32)
1671                        for (j = 0; j < TS_FPRWIDTH; j++)
1672                                vcpu->arch.fp_tm.fpr[i][j] = val->vsxval[j];
1673                else
1674                        if (cpu_has_feature(CPU_FTR_ALTIVEC))
1675                                vcpu->arch.vr_tm.vr[i-32] = val->vval;
1676                        else
1677                                r = -ENXIO;
1678                break;
1679        }
1680        case KVM_REG_PPC_TM_CR:
1681                vcpu->arch.cr_tm = set_reg_val(id, *val);
1682                break;
1683        case KVM_REG_PPC_TM_XER:
1684                vcpu->arch.xer_tm = set_reg_val(id, *val);
1685                break;
1686        case KVM_REG_PPC_TM_LR:
1687                vcpu->arch.lr_tm = set_reg_val(id, *val);
1688                break;
1689        case KVM_REG_PPC_TM_CTR:
1690                vcpu->arch.ctr_tm = set_reg_val(id, *val);
1691                break;
1692        case KVM_REG_PPC_TM_FPSCR:
1693                vcpu->arch.fp_tm.fpscr = set_reg_val(id, *val);
1694                break;
1695        case KVM_REG_PPC_TM_AMR:
1696                vcpu->arch.amr_tm = set_reg_val(id, *val);
1697                break;
1698        case KVM_REG_PPC_TM_PPR:
1699                vcpu->arch.ppr_tm = set_reg_val(id, *val);
1700                break;
1701        case KVM_REG_PPC_TM_VRSAVE:
1702                vcpu->arch.vrsave_tm = set_reg_val(id, *val);
1703                break;
1704        case KVM_REG_PPC_TM_VSCR:
1705                if (cpu_has_feature(CPU_FTR_ALTIVEC))
1706                        vcpu->arch.vr.vscr.u[3] = set_reg_val(id, *val);
1707                else
1708                        r = -ENXIO;
1709                break;
1710        case KVM_REG_PPC_TM_DSCR:
1711                vcpu->arch.dscr_tm = set_reg_val(id, *val);
1712                break;
1713        case KVM_REG_PPC_TM_TAR:
1714                vcpu->arch.tar_tm = set_reg_val(id, *val);
1715                break;
1716#endif
1717        default:
1718                r = -EINVAL;
1719                break;
1720        }
1721
1722        return r;
1723}
1724
1725static int kvmppc_core_vcpu_create_pr(struct kvm_vcpu *vcpu)
1726{
1727        struct kvmppc_vcpu_book3s *vcpu_book3s;
1728        unsigned long p;
1729        int err;
1730
1731        err = -ENOMEM;
1732
1733        vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
1734        if (!vcpu_book3s)
1735                goto out;
1736        vcpu->arch.book3s = vcpu_book3s;
1737
1738#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
1739        vcpu->arch.shadow_vcpu =
1740                kzalloc(sizeof(*vcpu->arch.shadow_vcpu), GFP_KERNEL);
1741        if (!vcpu->arch.shadow_vcpu)
1742                goto free_vcpu3s;
1743#endif
1744
1745        p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
1746        if (!p)
1747                goto free_shadow_vcpu;
1748        vcpu->arch.shared = (void *)p;
1749#ifdef CONFIG_PPC_BOOK3S_64
1750        /* Always start the shared struct in native endian mode */
1751#ifdef __BIG_ENDIAN__
1752        vcpu->arch.shared_big_endian = true;
1753#else
1754        vcpu->arch.shared_big_endian = false;
1755#endif
1756
1757        /*
1758         * Default to the same as the host if we're on sufficiently
1759         * recent machine that we have 1TB segments;
1760         * otherwise default to PPC970FX.
1761         */
1762        vcpu->arch.pvr = 0x3C0301;
1763        if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
1764                vcpu->arch.pvr = mfspr(SPRN_PVR);
1765        vcpu->arch.intr_msr = MSR_SF;
1766#else
1767        /* default to book3s_32 (750) */
1768        vcpu->arch.pvr = 0x84202;
1769        vcpu->arch.intr_msr = 0;
1770#endif
1771        kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);
1772        vcpu->arch.slb_nr = 64;
1773
1774        vcpu->arch.shadow_msr = MSR_USER64 & ~MSR_LE;
1775
1776        err = kvmppc_mmu_init_pr(vcpu);
1777        if (err < 0)
1778                goto free_shared_page;
1779
1780        return 0;
1781
1782free_shared_page:
1783        free_page((unsigned long)vcpu->arch.shared);
1784free_shadow_vcpu:
1785#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
1786        kfree(vcpu->arch.shadow_vcpu);
1787free_vcpu3s:
1788#endif
1789        vfree(vcpu_book3s);
1790out:
1791        return err;
1792}
1793
1794static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
1795{
1796        struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
1797
1798        kvmppc_mmu_destroy_pr(vcpu);
1799        free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
1800#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
1801        kfree(vcpu->arch.shadow_vcpu);
1802#endif
1803        vfree(vcpu_book3s);
1804}
1805
1806static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu)
1807{
1808        int ret;
1809
1810        /* Check if we can run the vcpu at all */
1811        if (!vcpu->arch.sane) {
1812                vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1813                ret = -EINVAL;
1814                goto out;
1815        }
1816
1817        kvmppc_setup_debug(vcpu);
1818
1819        /*
1820         * Interrupts could be timers for the guest which we have to inject
1821         * again, so let's postpone them until we're in the guest and if we
1822         * really did time things so badly, then we just exit again due to
1823         * a host external interrupt.
1824         */
1825        ret = kvmppc_prepare_to_enter(vcpu);
1826        if (ret <= 0)
1827                goto out;
1828        /* interrupts now hard-disabled */
1829
1830        /* Save FPU, Altivec and VSX state */
1831        giveup_all(current);
1832
1833        /* Preload FPU if it's enabled */
1834        if (kvmppc_get_msr(vcpu) & MSR_FP)
1835                kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
1836
1837        kvmppc_fix_ee_before_entry();
1838
1839        ret = __kvmppc_vcpu_run(vcpu);
1840
1841        kvmppc_clear_debug(vcpu);
1842
1843        /* No need for guest_exit. It's done in handle_exit.
1844           We also get here with interrupts enabled. */
1845
1846        /* Make sure we save the guest FPU/Altivec/VSX state */
1847        kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
1848
1849        /* Make sure we save the guest TAR/EBB/DSCR state */
1850        kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
1851
1852        srr_regs_clobbered();
1853out:
1854        vcpu->mode = OUTSIDE_GUEST_MODE;
1855        return ret;
1856}
1857
1858/*
1859 * Get (and clear) the dirty memory log for a memory slot.
1860 */
1861static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm,
1862                                         struct kvm_dirty_log *log)
1863{
1864        struct kvm_memory_slot *memslot;
1865        struct kvm_vcpu *vcpu;
1866        ulong ga, ga_end;
1867        int is_dirty = 0;
1868        int r;
1869        unsigned long n;
1870
1871        mutex_lock(&kvm->slots_lock);
1872
1873        r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
1874        if (r)
1875                goto out;
1876
1877        /* If nothing is dirty, don't bother messing with page tables. */
1878        if (is_dirty) {
1879                ga = memslot->base_gfn << PAGE_SHIFT;
1880                ga_end = ga + (memslot->npages << PAGE_SHIFT);
1881
1882                kvm_for_each_vcpu(n, vcpu, kvm)
1883                        kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
1884
1885                n = kvm_dirty_bitmap_bytes(memslot);
1886                memset(memslot->dirty_bitmap, 0, n);
1887        }
1888
1889        r = 0;
1890out:
1891        mutex_unlock(&kvm->slots_lock);
1892        return r;
1893}
1894
1895static void kvmppc_core_flush_memslot_pr(struct kvm *kvm,
1896                                         struct kvm_memory_slot *memslot)
1897{
1898        return;
1899}
1900
1901static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm,
1902                                        struct kvm_memory_slot *memslot,
1903                                        const struct kvm_userspace_memory_region *mem,
1904                                        enum kvm_mr_change change)
1905{
1906        return 0;
1907}
1908
1909static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm,
1910                                const struct kvm_userspace_memory_region *mem,
1911                                const struct kvm_memory_slot *old,
1912                                const struct kvm_memory_slot *new,
1913                                enum kvm_mr_change change)
1914{
1915        return;
1916}
1917
1918static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *slot)
1919{
1920        return;
1921}
1922
1923#ifdef CONFIG_PPC64
1924static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
1925                                         struct kvm_ppc_smmu_info *info)
1926{
1927        long int i;
1928        struct kvm_vcpu *vcpu;
1929
1930        info->flags = 0;
1931
1932        /* SLB is always 64 entries */
1933        info->slb_size = 64;
1934
1935        /* Standard 4k base page size segment */
1936        info->sps[0].page_shift = 12;
1937        info->sps[0].slb_enc = 0;
1938        info->sps[0].enc[0].page_shift = 12;
1939        info->sps[0].enc[0].pte_enc = 0;
1940
1941        /*
1942         * 64k large page size.
1943         * We only want to put this in if the CPUs we're emulating
1944         * support it, but unfortunately we don't have a vcpu easily
1945         * to hand here to test.  Just pick the first vcpu, and if
1946         * that doesn't exist yet, report the minimum capability,
1947         * i.e., no 64k pages.
1948         * 1T segment support goes along with 64k pages.
1949         */
1950        i = 1;
1951        vcpu = kvm_get_vcpu(kvm, 0);
1952        if (vcpu && (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
1953                info->flags = KVM_PPC_1T_SEGMENTS;
1954                info->sps[i].page_shift = 16;
1955                info->sps[i].slb_enc = SLB_VSID_L | SLB_VSID_LP_01;
1956                info->sps[i].enc[0].page_shift = 16;
1957                info->sps[i].enc[0].pte_enc = 1;
1958                ++i;
1959        }
1960
1961        /* Standard 16M large page size segment */
1962        info->sps[i].page_shift = 24;
1963        info->sps[i].slb_enc = SLB_VSID_L;
1964        info->sps[i].enc[0].page_shift = 24;
1965        info->sps[i].enc[0].pte_enc = 0;
1966
1967        return 0;
1968}
1969
1970static int kvm_configure_mmu_pr(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
1971{
1972        if (!cpu_has_feature(CPU_FTR_ARCH_300))
1973                return -ENODEV;
1974        /* Require flags and process table base and size to all be zero. */
1975        if (cfg->flags || cfg->process_table)
1976                return -EINVAL;
1977        return 0;
1978}
1979
1980#else
1981static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
1982                                         struct kvm_ppc_smmu_info *info)
1983{
1984        /* We should not get called */
1985        BUG();
1986        return 0;
1987}
1988#endif /* CONFIG_PPC64 */
1989
1990static unsigned int kvm_global_user_count = 0;
1991static DEFINE_SPINLOCK(kvm_global_user_count_lock);
1992
1993static int kvmppc_core_init_vm_pr(struct kvm *kvm)
1994{
1995        mutex_init(&kvm->arch.hpt_mutex);
1996
1997#ifdef CONFIG_PPC_BOOK3S_64
1998        /* Start out with the default set of hcalls enabled */
1999        kvmppc_pr_init_default_hcalls(kvm);
2000#endif
2001
2002        if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
2003                spin_lock(&kvm_global_user_count_lock);
2004                if (++kvm_global_user_count == 1)
2005                        pseries_disable_reloc_on_exc();
2006                spin_unlock(&kvm_global_user_count_lock);
2007        }
2008        return 0;
2009}
2010
2011static void kvmppc_core_destroy_vm_pr(struct kvm *kvm)
2012{
2013#ifdef CONFIG_PPC64
2014        WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
2015#endif
2016
2017        if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
2018                spin_lock(&kvm_global_user_count_lock);
2019                BUG_ON(kvm_global_user_count == 0);
2020                if (--kvm_global_user_count == 0)
2021                        pseries_enable_reloc_on_exc();
2022                spin_unlock(&kvm_global_user_count_lock);
2023        }
2024}
2025
2026static int kvmppc_core_check_processor_compat_pr(void)
2027{
2028        /*
2029         * PR KVM can work on POWER9 inside a guest partition
2030         * running in HPT mode.  It can't work if we are using
2031         * radix translation (because radix provides no way for
2032         * a process to have unique translations in quadrant 3).
2033         */
2034        if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled())
2035                return -EIO;
2036        return 0;
2037}
2038
2039static long kvm_arch_vm_ioctl_pr(struct file *filp,
2040                                 unsigned int ioctl, unsigned long arg)
2041{
2042        return -ENOTTY;
2043}
2044
2045static struct kvmppc_ops kvm_ops_pr = {
2046        .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr,
2047        .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr,
2048        .get_one_reg = kvmppc_get_one_reg_pr,
2049        .set_one_reg = kvmppc_set_one_reg_pr,
2050        .vcpu_load   = kvmppc_core_vcpu_load_pr,
2051        .vcpu_put    = kvmppc_core_vcpu_put_pr,
2052        .inject_interrupt = kvmppc_inject_interrupt_pr,
2053        .set_msr     = kvmppc_set_msr_pr,
2054        .vcpu_run    = kvmppc_vcpu_run_pr,
2055        .vcpu_create = kvmppc_core_vcpu_create_pr,
2056        .vcpu_free   = kvmppc_core_vcpu_free_pr,
2057        .check_requests = kvmppc_core_check_requests_pr,
2058        .get_dirty_log = kvm_vm_ioctl_get_dirty_log_pr,
2059        .flush_memslot = kvmppc_core_flush_memslot_pr,
2060        .prepare_memory_region = kvmppc_core_prepare_memory_region_pr,
2061        .commit_memory_region = kvmppc_core_commit_memory_region_pr,
2062        .unmap_gfn_range = kvm_unmap_gfn_range_pr,
2063        .age_gfn  = kvm_age_gfn_pr,
2064        .test_age_gfn = kvm_test_age_gfn_pr,
2065        .set_spte_gfn = kvm_set_spte_gfn_pr,
2066        .free_memslot = kvmppc_core_free_memslot_pr,
2067        .init_vm = kvmppc_core_init_vm_pr,
2068        .destroy_vm = kvmppc_core_destroy_vm_pr,
2069        .get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr,
2070        .emulate_op = kvmppc_core_emulate_op_pr,
2071        .emulate_mtspr = kvmppc_core_emulate_mtspr_pr,
2072        .emulate_mfspr = kvmppc_core_emulate_mfspr_pr,
2073        .fast_vcpu_kick = kvm_vcpu_kick,
2074        .arch_vm_ioctl  = kvm_arch_vm_ioctl_pr,
2075#ifdef CONFIG_PPC_BOOK3S_64
2076        .hcall_implemented = kvmppc_hcall_impl_pr,
2077        .configure_mmu = kvm_configure_mmu_pr,
2078#endif
2079        .giveup_ext = kvmppc_giveup_ext,
2080};
2081
2082
2083int kvmppc_book3s_init_pr(void)
2084{
2085        int r;
2086
2087        r = kvmppc_core_check_processor_compat_pr();
2088        if (r < 0)
2089                return r;
2090
2091        kvm_ops_pr.owner = THIS_MODULE;
2092        kvmppc_pr_ops = &kvm_ops_pr;
2093
2094        r = kvmppc_mmu_hpte_sysinit();
2095        return r;
2096}
2097
2098void kvmppc_book3s_exit_pr(void)
2099{
2100        kvmppc_pr_ops = NULL;
2101        kvmppc_mmu_hpte_sysexit();
2102}
2103
2104/*
2105 * We only support separate modules for book3s 64
2106 */
2107#ifdef CONFIG_PPC_BOOK3S_64
2108
2109module_init(kvmppc_book3s_init_pr);
2110module_exit(kvmppc_book3s_exit_pr);
2111
2112MODULE_LICENSE("GPL");
2113MODULE_ALIAS_MISCDEV(KVM_MINOR);
2114MODULE_ALIAS("devname:kvm");
2115#endif
2116