linux/arch/powerpc/kvm/book3s_hv_builtin.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
   4 */
   5
   6#include <linux/cpu.h>
   7#include <linux/kvm_host.h>
   8#include <linux/preempt.h>
   9#include <linux/export.h>
  10#include <linux/sched.h>
  11#include <linux/spinlock.h>
  12#include <linux/init.h>
  13#include <linux/memblock.h>
  14#include <linux/sizes.h>
  15#include <linux/cma.h>
  16#include <linux/bitops.h>
  17
  18#include <asm/asm-prototypes.h>
  19#include <asm/cputable.h>
  20#include <asm/interrupt.h>
  21#include <asm/kvm_ppc.h>
  22#include <asm/kvm_book3s.h>
  23#include <asm/archrandom.h>
  24#include <asm/xics.h>
  25#include <asm/xive.h>
  26#include <asm/dbell.h>
  27#include <asm/cputhreads.h>
  28#include <asm/io.h>
  29#include <asm/opal.h>
  30#include <asm/smp.h>
  31
  32#define KVM_CMA_CHUNK_ORDER     18
  33
  34#include "book3s_xics.h"
  35#include "book3s_xive.h"
  36
  37/*
  38 * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
  39 * should be power of 2.
  40 */
  41#define HPT_ALIGN_PAGES         ((1 << 18) >> PAGE_SHIFT) /* 256k */
  42/*
  43 * By default we reserve 5% of memory for hash pagetable allocation.
  44 */
  45static unsigned long kvm_cma_resv_ratio = 5;
  46
  47static struct cma *kvm_cma;
  48
  49static int __init early_parse_kvm_cma_resv(char *p)
  50{
  51        pr_debug("%s(%s)\n", __func__, p);
  52        if (!p)
  53                return -EINVAL;
  54        return kstrtoul(p, 0, &kvm_cma_resv_ratio);
  55}
  56early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
  57
  58struct page *kvm_alloc_hpt_cma(unsigned long nr_pages)
  59{
  60        VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
  61
  62        return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES),
  63                         false);
  64}
  65EXPORT_SYMBOL_GPL(kvm_alloc_hpt_cma);
  66
  67void kvm_free_hpt_cma(struct page *page, unsigned long nr_pages)
  68{
  69        cma_release(kvm_cma, page, nr_pages);
  70}
  71EXPORT_SYMBOL_GPL(kvm_free_hpt_cma);
  72
  73/**
  74 * kvm_cma_reserve() - reserve area for kvm hash pagetable
  75 *
  76 * This function reserves memory from early allocator. It should be
  77 * called by arch specific code once the memblock allocator
  78 * has been activated and all other subsystems have already allocated/reserved
  79 * memory.
  80 */
  81void __init kvm_cma_reserve(void)
  82{
  83        unsigned long align_size;
  84        phys_addr_t selected_size;
  85
  86        /*
  87         * We need CMA reservation only when we are in HV mode
  88         */
  89        if (!cpu_has_feature(CPU_FTR_HVMODE))
  90                return;
  91
  92        selected_size = PAGE_ALIGN(memblock_phys_mem_size() * kvm_cma_resv_ratio / 100);
  93        if (selected_size) {
  94                pr_info("%s: reserving %ld MiB for global area\n", __func__,
  95                         (unsigned long)selected_size / SZ_1M);
  96                align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
  97                cma_declare_contiguous(0, selected_size, 0, align_size,
  98                        KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, "kvm_cma",
  99                        &kvm_cma);
 100        }
 101}
 102
 103/*
 104 * Real-mode H_CONFER implementation.
 105 * We check if we are the only vcpu out of this virtual core
 106 * still running in the guest and not ceded.  If so, we pop up
 107 * to the virtual-mode implementation; if not, just return to
 108 * the guest.
 109 */
 110long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
 111                            unsigned int yield_count)
 112{
 113        struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
 114        int ptid = local_paca->kvm_hstate.ptid;
 115        int threads_running;
 116        int threads_ceded;
 117        int threads_conferring;
 118        u64 stop = get_tb() + 10 * tb_ticks_per_usec;
 119        int rv = H_SUCCESS; /* => don't yield */
 120
 121        set_bit(ptid, &vc->conferring_threads);
 122        while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) {
 123                threads_running = VCORE_ENTRY_MAP(vc);
 124                threads_ceded = vc->napping_threads;
 125                threads_conferring = vc->conferring_threads;
 126                if ((threads_ceded | threads_conferring) == threads_running) {
 127                        rv = H_TOO_HARD; /* => do yield */
 128                        break;
 129                }
 130        }
 131        clear_bit(ptid, &vc->conferring_threads);
 132        return rv;
 133}
 134
 135/*
 136 * When running HV mode KVM we need to block certain operations while KVM VMs
 137 * exist in the system. We use a counter of VMs to track this.
 138 *
 139 * One of the operations we need to block is onlining of secondaries, so we
 140 * protect hv_vm_count with cpus_read_lock/unlock().
 141 */
 142static atomic_t hv_vm_count;
 143
 144void kvm_hv_vm_activated(void)
 145{
 146        cpus_read_lock();
 147        atomic_inc(&hv_vm_count);
 148        cpus_read_unlock();
 149}
 150EXPORT_SYMBOL_GPL(kvm_hv_vm_activated);
 151
 152void kvm_hv_vm_deactivated(void)
 153{
 154        cpus_read_lock();
 155        atomic_dec(&hv_vm_count);
 156        cpus_read_unlock();
 157}
 158EXPORT_SYMBOL_GPL(kvm_hv_vm_deactivated);
 159
 160bool kvm_hv_mode_active(void)
 161{
 162        return atomic_read(&hv_vm_count) != 0;
 163}
 164
 165extern int hcall_real_table[], hcall_real_table_end[];
 166
 167int kvmppc_hcall_impl_hv_realmode(unsigned long cmd)
 168{
 169        cmd /= 4;
 170        if (cmd < hcall_real_table_end - hcall_real_table &&
 171            hcall_real_table[cmd])
 172                return 1;
 173
 174        return 0;
 175}
 176EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
 177
 178int kvmppc_hwrng_present(void)
 179{
 180        return powernv_hwrng_present();
 181}
 182EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
 183
 184long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
 185{
 186        if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
 187                return H_SUCCESS;
 188
 189        return H_HARDWARE;
 190}
 191
 192/*
 193 * Send an interrupt or message to another CPU.
 194 * The caller needs to include any barrier needed to order writes
 195 * to memory vs. the IPI/message.
 196 */
 197void kvmhv_rm_send_ipi(int cpu)
 198{
 199        void __iomem *xics_phys;
 200        unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
 201
 202        /* On POWER9 we can use msgsnd for any destination cpu. */
 203        if (cpu_has_feature(CPU_FTR_ARCH_300)) {
 204                msg |= get_hard_smp_processor_id(cpu);
 205                __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
 206                return;
 207        }
 208
 209        /* On POWER8 for IPIs to threads in the same core, use msgsnd. */
 210        if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
 211            cpu_first_thread_sibling(cpu) ==
 212            cpu_first_thread_sibling(raw_smp_processor_id())) {
 213                msg |= cpu_thread_in_core(cpu);
 214                __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
 215                return;
 216        }
 217
 218        /* We should never reach this */
 219        if (WARN_ON_ONCE(xics_on_xive()))
 220            return;
 221
 222        /* Else poke the target with an IPI */
 223        xics_phys = paca_ptrs[cpu]->kvm_hstate.xics_phys;
 224        if (xics_phys)
 225                __raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR);
 226        else
 227                opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY);
 228}
 229
 230/*
 231 * The following functions are called from the assembly code
 232 * in book3s_hv_rmhandlers.S.
 233 */
 234static void kvmhv_interrupt_vcore(struct kvmppc_vcore *vc, int active)
 235{
 236        int cpu = vc->pcpu;
 237
 238        /* Order setting of exit map vs. msgsnd/IPI */
 239        smp_mb();
 240        for (; active; active >>= 1, ++cpu)
 241                if (active & 1)
 242                        kvmhv_rm_send_ipi(cpu);
 243}
 244
 245void kvmhv_commence_exit(int trap)
 246{
 247        struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
 248        int ptid = local_paca->kvm_hstate.ptid;
 249        struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode;
 250        int me, ee, i;
 251
 252        /* Set our bit in the threads-exiting-guest map in the 0xff00
 253           bits of vcore->entry_exit_map */
 254        me = 0x100 << ptid;
 255        do {
 256                ee = vc->entry_exit_map;
 257        } while (cmpxchg(&vc->entry_exit_map, ee, ee | me) != ee);
 258
 259        /* Are we the first here? */
 260        if ((ee >> 8) != 0)
 261                return;
 262
 263        /*
 264         * Trigger the other threads in this vcore to exit the guest.
 265         * If this is a hypervisor decrementer interrupt then they
 266         * will be already on their way out of the guest.
 267         */
 268        if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER)
 269                kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid));
 270
 271        /*
 272         * If we are doing dynamic micro-threading, interrupt the other
 273         * subcores to pull them out of their guests too.
 274         */
 275        if (!sip)
 276                return;
 277
 278        for (i = 0; i < MAX_SUBCORES; ++i) {
 279                vc = sip->vc[i];
 280                if (!vc)
 281                        break;
 282                do {
 283                        ee = vc->entry_exit_map;
 284                        /* Already asked to exit? */
 285                        if ((ee >> 8) != 0)
 286                                break;
 287                } while (cmpxchg(&vc->entry_exit_map, ee,
 288                                 ee | VCORE_EXIT_REQ) != ee);
 289                if ((ee >> 8) == 0)
 290                        kvmhv_interrupt_vcore(vc, ee);
 291        }
 292}
 293
 294struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
 295EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv);
 296
 297#ifdef CONFIG_KVM_XICS
 298static struct kvmppc_irq_map *get_irqmap(struct kvmppc_passthru_irqmap *pimap,
 299                                         u32 xisr)
 300{
 301        int i;
 302
 303        /*
 304         * We access the mapped array here without a lock.  That
 305         * is safe because we never reduce the number of entries
 306         * in the array and we never change the v_hwirq field of
 307         * an entry once it is set.
 308         *
 309         * We have also carefully ordered the stores in the writer
 310         * and the loads here in the reader, so that if we find a matching
 311         * hwirq here, the associated GSI and irq_desc fields are valid.
 312         */
 313        for (i = 0; i < pimap->n_mapped; i++)  {
 314                if (xisr == pimap->mapped[i].r_hwirq) {
 315                        /*
 316                         * Order subsequent reads in the caller to serialize
 317                         * with the writer.
 318                         */
 319                        smp_rmb();
 320                        return &pimap->mapped[i];
 321                }
 322        }
 323        return NULL;
 324}
 325
 326/*
 327 * If we have an interrupt that's not an IPI, check if we have a
 328 * passthrough adapter and if so, check if this external interrupt
 329 * is for the adapter.
 330 * We will attempt to deliver the IRQ directly to the target VCPU's
 331 * ICP, the virtual ICP (based on affinity - the xive value in ICS).
 332 *
 333 * If the delivery fails or if this is not for a passthrough adapter,
 334 * return to the host to handle this interrupt. We earlier
 335 * saved a copy of the XIRR in the PACA, it will be picked up by
 336 * the host ICP driver.
 337 */
 338static int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again)
 339{
 340        struct kvmppc_passthru_irqmap *pimap;
 341        struct kvmppc_irq_map *irq_map;
 342        struct kvm_vcpu *vcpu;
 343
 344        vcpu = local_paca->kvm_hstate.kvm_vcpu;
 345        if (!vcpu)
 346                return 1;
 347        pimap = kvmppc_get_passthru_irqmap(vcpu->kvm);
 348        if (!pimap)
 349                return 1;
 350        irq_map = get_irqmap(pimap, xisr);
 351        if (!irq_map)
 352                return 1;
 353
 354        /* We're handling this interrupt, generic code doesn't need to */
 355        local_paca->kvm_hstate.saved_xirr = 0;
 356
 357        return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap, again);
 358}
 359
 360#else
 361static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again)
 362{
 363        return 1;
 364}
 365#endif
 366
 367/*
 368 * Determine what sort of external interrupt is pending (if any).
 369 * Returns:
 370 *      0 if no interrupt is pending
 371 *      1 if an interrupt is pending that needs to be handled by the host
 372 *      2 Passthrough that needs completion in the host
 373 *      -1 if there was a guest wakeup IPI (which has now been cleared)
 374 *      -2 if there is PCI passthrough external interrupt that was handled
 375 */
 376static long kvmppc_read_one_intr(bool *again);
 377
 378long kvmppc_read_intr(void)
 379{
 380        long ret = 0;
 381        long rc;
 382        bool again;
 383
 384        if (xive_enabled())
 385                return 1;
 386
 387        do {
 388                again = false;
 389                rc = kvmppc_read_one_intr(&again);
 390                if (rc && (ret == 0 || rc > ret))
 391                        ret = rc;
 392        } while (again);
 393        return ret;
 394}
 395
 396static long kvmppc_read_one_intr(bool *again)
 397{
 398        void __iomem *xics_phys;
 399        u32 h_xirr;
 400        __be32 xirr;
 401        u32 xisr;
 402        u8 host_ipi;
 403        int64_t rc;
 404
 405        if (xive_enabled())
 406                return 1;
 407
 408        /* see if a host IPI is pending */
 409        host_ipi = local_paca->kvm_hstate.host_ipi;
 410        if (host_ipi)
 411                return 1;
 412
 413        /* Now read the interrupt from the ICP */
 414        xics_phys = local_paca->kvm_hstate.xics_phys;
 415        rc = 0;
 416        if (!xics_phys)
 417                rc = opal_int_get_xirr(&xirr, false);
 418        else
 419                xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
 420        if (rc < 0)
 421                return 1;
 422
 423        /*
 424         * Save XIRR for later. Since we get control in reverse endian
 425         * on LE systems, save it byte reversed and fetch it back in
 426         * host endian. Note that xirr is the value read from the
 427         * XIRR register, while h_xirr is the host endian version.
 428         */
 429        h_xirr = be32_to_cpu(xirr);
 430        local_paca->kvm_hstate.saved_xirr = h_xirr;
 431        xisr = h_xirr & 0xffffff;
 432        /*
 433         * Ensure that the store/load complete to guarantee all side
 434         * effects of loading from XIRR has completed
 435         */
 436        smp_mb();
 437
 438        /* if nothing pending in the ICP */
 439        if (!xisr)
 440                return 0;
 441
 442        /* We found something in the ICP...
 443         *
 444         * If it is an IPI, clear the MFRR and EOI it.
 445         */
 446        if (xisr == XICS_IPI) {
 447                rc = 0;
 448                if (xics_phys) {
 449                        __raw_rm_writeb(0xff, xics_phys + XICS_MFRR);
 450                        __raw_rm_writel(xirr, xics_phys + XICS_XIRR);
 451                } else {
 452                        opal_int_set_mfrr(hard_smp_processor_id(), 0xff);
 453                        rc = opal_int_eoi(h_xirr);
 454                }
 455                /* If rc > 0, there is another interrupt pending */
 456                *again = rc > 0;
 457
 458                /*
 459                 * Need to ensure side effects of above stores
 460                 * complete before proceeding.
 461                 */
 462                smp_mb();
 463
 464                /*
 465                 * We need to re-check host IPI now in case it got set in the
 466                 * meantime. If it's clear, we bounce the interrupt to the
 467                 * guest
 468                 */
 469                host_ipi = local_paca->kvm_hstate.host_ipi;
 470                if (unlikely(host_ipi != 0)) {
 471                        /* We raced with the host,
 472                         * we need to resend that IPI, bummer
 473                         */
 474                        if (xics_phys)
 475                                __raw_rm_writeb(IPI_PRIORITY,
 476                                                xics_phys + XICS_MFRR);
 477                        else
 478                                opal_int_set_mfrr(hard_smp_processor_id(),
 479                                                  IPI_PRIORITY);
 480                        /* Let side effects complete */
 481                        smp_mb();
 482                        return 1;
 483                }
 484
 485                /* OK, it's an IPI for us */
 486                local_paca->kvm_hstate.saved_xirr = 0;
 487                return -1;
 488        }
 489
 490        return kvmppc_check_passthru(xisr, xirr, again);
 491}
 492
 493#ifdef CONFIG_KVM_XICS
 494unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
 495{
 496        if (!kvmppc_xics_enabled(vcpu))
 497                return H_TOO_HARD;
 498        if (xics_on_xive())
 499                return xive_rm_h_xirr(vcpu);
 500        else
 501                return xics_rm_h_xirr(vcpu);
 502}
 503
 504unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
 505{
 506        if (!kvmppc_xics_enabled(vcpu))
 507                return H_TOO_HARD;
 508        vcpu->arch.regs.gpr[5] = get_tb();
 509        if (xics_on_xive())
 510                return xive_rm_h_xirr(vcpu);
 511        else
 512                return xics_rm_h_xirr(vcpu);
 513}
 514
 515unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
 516{
 517        if (!kvmppc_xics_enabled(vcpu))
 518                return H_TOO_HARD;
 519        if (xics_on_xive())
 520                return xive_rm_h_ipoll(vcpu, server);
 521        else
 522                return H_TOO_HARD;
 523}
 524
 525int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 526                    unsigned long mfrr)
 527{
 528        if (!kvmppc_xics_enabled(vcpu))
 529                return H_TOO_HARD;
 530        if (xics_on_xive())
 531                return xive_rm_h_ipi(vcpu, server, mfrr);
 532        else
 533                return xics_rm_h_ipi(vcpu, server, mfrr);
 534}
 535
 536int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
 537{
 538        if (!kvmppc_xics_enabled(vcpu))
 539                return H_TOO_HARD;
 540        if (xics_on_xive())
 541                return xive_rm_h_cppr(vcpu, cppr);
 542        else
 543                return xics_rm_h_cppr(vcpu, cppr);
 544}
 545
 546int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
 547{
 548        if (!kvmppc_xics_enabled(vcpu))
 549                return H_TOO_HARD;
 550        if (xics_on_xive())
 551                return xive_rm_h_eoi(vcpu, xirr);
 552        else
 553                return xics_rm_h_eoi(vcpu, xirr);
 554}
 555#endif /* CONFIG_KVM_XICS */
 556
 557void kvmppc_bad_interrupt(struct pt_regs *regs)
 558{
 559        /*
 560         * 100 could happen at any time, 200 can happen due to invalid real
 561         * address access for example (or any time due to a hardware problem).
 562         */
 563        if (TRAP(regs) == 0x100) {
 564                get_paca()->in_nmi++;
 565                system_reset_exception(regs);
 566                get_paca()->in_nmi--;
 567        } else if (TRAP(regs) == 0x200) {
 568                machine_check_exception(regs);
 569        } else {
 570                die("Bad interrupt in KVM entry/exit code", regs, SIGABRT);
 571        }
 572        panic("Bad KVM trap");
 573}
 574
 575static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
 576{
 577        vcpu->arch.ceded = 0;
 578        if (vcpu->arch.timer_running) {
 579                hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
 580                vcpu->arch.timer_running = 0;
 581        }
 582}
 583
 584void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
 585{
 586        /* Guest must always run with ME enabled, HV disabled. */
 587        msr = (msr | MSR_ME) & ~MSR_HV;
 588
 589        /*
 590         * Check for illegal transactional state bit combination
 591         * and if we find it, force the TS field to a safe state.
 592         */
 593        if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
 594                msr &= ~MSR_TS_MASK;
 595        vcpu->arch.shregs.msr = msr;
 596        kvmppc_end_cede(vcpu);
 597}
 598EXPORT_SYMBOL_GPL(kvmppc_set_msr_hv);
 599
 600static void inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
 601{
 602        unsigned long msr, pc, new_msr, new_pc;
 603
 604        msr = kvmppc_get_msr(vcpu);
 605        pc = kvmppc_get_pc(vcpu);
 606        new_msr = vcpu->arch.intr_msr;
 607        new_pc = vec;
 608
 609        /* If transactional, change to suspend mode on IRQ delivery */
 610        if (MSR_TM_TRANSACTIONAL(msr))
 611                new_msr |= MSR_TS_S;
 612        else
 613                new_msr |= msr & MSR_TS_MASK;
 614
 615        /*
 616         * Perform MSR and PC adjustment for LPCR[AIL]=3 if it is set and
 617         * applicable. AIL=2 is not supported.
 618         *
 619         * AIL does not apply to SRESET, MCE, or HMI (which is never
 620         * delivered to the guest), and does not apply if IR=0 or DR=0.
 621         */
 622        if (vec != BOOK3S_INTERRUPT_SYSTEM_RESET &&
 623            vec != BOOK3S_INTERRUPT_MACHINE_CHECK &&
 624            (vcpu->arch.vcore->lpcr & LPCR_AIL) == LPCR_AIL_3 &&
 625            (msr & (MSR_IR|MSR_DR)) == (MSR_IR|MSR_DR) ) {
 626                new_msr |= MSR_IR | MSR_DR;
 627                new_pc += 0xC000000000004000ULL;
 628        }
 629
 630        kvmppc_set_srr0(vcpu, pc);
 631        kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
 632        kvmppc_set_pc(vcpu, new_pc);
 633        vcpu->arch.shregs.msr = new_msr;
 634}
 635
 636void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
 637{
 638        inject_interrupt(vcpu, vec, srr1_flags);
 639        kvmppc_end_cede(vcpu);
 640}
 641EXPORT_SYMBOL_GPL(kvmppc_inject_interrupt_hv);
 642
 643/*
 644 * Is there a PRIV_DOORBELL pending for the guest (on POWER9)?
 645 * Can we inject a Decrementer or a External interrupt?
 646 */
 647void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
 648{
 649        int ext;
 650        unsigned long lpcr;
 651
 652        /* Insert EXTERNAL bit into LPCR at the MER bit position */
 653        ext = (vcpu->arch.pending_exceptions >> BOOK3S_IRQPRIO_EXTERNAL) & 1;
 654        lpcr = mfspr(SPRN_LPCR);
 655        lpcr |= ext << LPCR_MER_SH;
 656        mtspr(SPRN_LPCR, lpcr);
 657        isync();
 658
 659        if (vcpu->arch.shregs.msr & MSR_EE) {
 660                if (ext) {
 661                        inject_interrupt(vcpu, BOOK3S_INTERRUPT_EXTERNAL, 0);
 662                } else {
 663                        long int dec = mfspr(SPRN_DEC);
 664                        if (!(lpcr & LPCR_LD))
 665                                dec = (int) dec;
 666                        if (dec < 0)
 667                                inject_interrupt(vcpu,
 668                                        BOOK3S_INTERRUPT_DECREMENTER, 0);
 669                }
 670        }
 671
 672        if (vcpu->arch.doorbell_request) {
 673                mtspr(SPRN_DPDES, 1);
 674                vcpu->arch.vcore->dpdes = 1;
 675                smp_wmb();
 676                vcpu->arch.doorbell_request = 0;
 677        }
 678}
 679
 680static void flush_guest_tlb(struct kvm *kvm)
 681{
 682        unsigned long rb, set;
 683
 684        rb = PPC_BIT(52);       /* IS = 2 */
 685        if (kvm_is_radix(kvm)) {
 686                /* R=1 PRS=1 RIC=2 */
 687                asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
 688                             : : "r" (rb), "i" (1), "i" (1), "i" (2),
 689                               "r" (0) : "memory");
 690                for (set = 1; set < kvm->arch.tlb_sets; ++set) {
 691                        rb += PPC_BIT(51);      /* increment set number */
 692                        /* R=1 PRS=1 RIC=0 */
 693                        asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
 694                                     : : "r" (rb), "i" (1), "i" (1), "i" (0),
 695                                       "r" (0) : "memory");
 696                }
 697                asm volatile("ptesync": : :"memory");
 698                // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
 699                asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory");
 700        } else {
 701                for (set = 0; set < kvm->arch.tlb_sets; ++set) {
 702                        /* R=0 PRS=0 RIC=0 */
 703                        asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
 704                                     : : "r" (rb), "i" (0), "i" (0), "i" (0),
 705                                       "r" (0) : "memory");
 706                        rb += PPC_BIT(51);      /* increment set number */
 707                }
 708                asm volatile("ptesync": : :"memory");
 709                // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
 710                if (cpu_has_feature(CPU_FTR_ARCH_300))
 711                        asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
 712        }
 713}
 714
 715void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
 716                                 struct kvm_nested_guest *nested)
 717{
 718        cpumask_t *need_tlb_flush;
 719
 720        /*
 721         * On POWER9, individual threads can come in here, but the
 722         * TLB is shared between the 4 threads in a core, hence
 723         * invalidating on one thread invalidates for all.
 724         * Thus we make all 4 threads use the same bit.
 725         */
 726        if (cpu_has_feature(CPU_FTR_ARCH_300))
 727                pcpu = cpu_first_tlb_thread_sibling(pcpu);
 728
 729        if (nested)
 730                need_tlb_flush = &nested->need_tlb_flush;
 731        else
 732                need_tlb_flush = &kvm->arch.need_tlb_flush;
 733
 734        if (cpumask_test_cpu(pcpu, need_tlb_flush)) {
 735                flush_guest_tlb(kvm);
 736
 737                /* Clear the bit after the TLB flush */
 738                cpumask_clear_cpu(pcpu, need_tlb_flush);
 739        }
 740}
 741EXPORT_SYMBOL_GPL(kvmppc_check_need_tlb_flush);
 742