linux/arch/powerpc/kvm/book3s_hv_rm_mmu.c
<<
>>
Prefs
   1/*
   2 * This program is free software; you can redistribute it and/or modify
   3 * it under the terms of the GNU General Public License, version 2, as
   4 * published by the Free Software Foundation.
   5 *
   6 * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
   7 */
   8
   9#include <linux/types.h>
  10#include <linux/string.h>
  11#include <linux/kvm.h>
  12#include <linux/kvm_host.h>
  13#include <linux/hugetlb.h>
  14#include <linux/module.h>
  15#include <linux/log2.h>
  16
  17#include <asm/tlbflush.h>
  18#include <asm/kvm_ppc.h>
  19#include <asm/kvm_book3s.h>
  20#include <asm/book3s/64/mmu-hash.h>
  21#include <asm/hvcall.h>
  22#include <asm/synch.h>
  23#include <asm/ppc-opcode.h>
  24
  25/* Translate address of a vmalloc'd thing to a linear map address */
  26static void *real_vmalloc_addr(void *x)
  27{
  28        unsigned long addr = (unsigned long) x;
  29        pte_t *p;
  30        /*
  31         * assume we don't have huge pages in vmalloc space...
  32         * So don't worry about THP collapse/split. Called
  33         * Only in realmode, hence won't need irq_save/restore.
  34         */
  35        p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL, NULL);
  36        if (!p || !pte_present(*p))
  37                return NULL;
  38        addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
  39        return __va(addr);
  40}
  41
  42/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
  43static int global_invalidates(struct kvm *kvm, unsigned long flags)
  44{
  45        int global;
  46
  47        /*
  48         * If there is only one vcore, and it's currently running,
  49         * as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
  50         * we can use tlbiel as long as we mark all other physical
  51         * cores as potentially having stale TLB entries for this lpid.
  52         * Otherwise, don't use tlbiel.
  53         */
  54        if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
  55                global = 0;
  56        else
  57                global = 1;
  58
  59        if (!global) {
  60                /* any other core might now have stale TLB entries... */
  61                smp_wmb();
  62                cpumask_setall(&kvm->arch.need_tlb_flush);
  63                cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu,
  64                                  &kvm->arch.need_tlb_flush);
  65        }
  66
  67        return global;
  68}
  69
  70/*
  71 * Add this HPTE into the chain for the real page.
  72 * Must be called with the chain locked; it unlocks the chain.
  73 */
  74void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
  75                             unsigned long *rmap, long pte_index, int realmode)
  76{
  77        struct revmap_entry *head, *tail;
  78        unsigned long i;
  79
  80        if (*rmap & KVMPPC_RMAP_PRESENT) {
  81                i = *rmap & KVMPPC_RMAP_INDEX;
  82                head = &kvm->arch.revmap[i];
  83                if (realmode)
  84                        head = real_vmalloc_addr(head);
  85                tail = &kvm->arch.revmap[head->back];
  86                if (realmode)
  87                        tail = real_vmalloc_addr(tail);
  88                rev->forw = i;
  89                rev->back = head->back;
  90                tail->forw = pte_index;
  91                head->back = pte_index;
  92        } else {
  93                rev->forw = rev->back = pte_index;
  94                *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
  95                        pte_index | KVMPPC_RMAP_PRESENT;
  96        }
  97        unlock_rmap(rmap);
  98}
  99EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
 100
 101/* Update the changed page order field of an rmap entry */
 102void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize)
 103{
 104        unsigned long order;
 105
 106        if (!psize)
 107                return;
 108        order = ilog2(psize);
 109        order <<= KVMPPC_RMAP_CHG_SHIFT;
 110        if (order > (*rmap & KVMPPC_RMAP_CHG_ORDER))
 111                *rmap = (*rmap & ~KVMPPC_RMAP_CHG_ORDER) | order;
 112}
 113EXPORT_SYMBOL_GPL(kvmppc_update_rmap_change);
 114
 115/* Returns a pointer to the revmap entry for the page mapped by a HPTE */
 116static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v,
 117                                      unsigned long hpte_gr)
 118{
 119        struct kvm_memory_slot *memslot;
 120        unsigned long *rmap;
 121        unsigned long gfn;
 122
 123        gfn = hpte_rpn(hpte_gr, hpte_page_size(hpte_v, hpte_gr));
 124        memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
 125        if (!memslot)
 126                return NULL;
 127
 128        rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
 129        return rmap;
 130}
 131
 132/* Remove this HPTE from the chain for a real page */
 133static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 134                                struct revmap_entry *rev,
 135                                unsigned long hpte_v, unsigned long hpte_r)
 136{
 137        struct revmap_entry *next, *prev;
 138        unsigned long ptel, head;
 139        unsigned long *rmap;
 140        unsigned long rcbits;
 141
 142        rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
 143        ptel = rev->guest_rpte |= rcbits;
 144        rmap = revmap_for_hpte(kvm, hpte_v, ptel);
 145        if (!rmap)
 146                return;
 147        lock_rmap(rmap);
 148
 149        head = *rmap & KVMPPC_RMAP_INDEX;
 150        next = real_vmalloc_addr(&kvm->arch.revmap[rev->forw]);
 151        prev = real_vmalloc_addr(&kvm->arch.revmap[rev->back]);
 152        next->back = rev->back;
 153        prev->forw = rev->forw;
 154        if (head == pte_index) {
 155                head = rev->forw;
 156                if (head == pte_index)
 157                        *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
 158                else
 159                        *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
 160        }
 161        *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
 162        if (rcbits & HPTE_R_C)
 163                kvmppc_update_rmap_change(rmap, hpte_page_size(hpte_v, hpte_r));
 164        unlock_rmap(rmap);
 165}
 166
 167long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 168                       long pte_index, unsigned long pteh, unsigned long ptel,
 169                       pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
 170{
 171        unsigned long i, pa, gpa, gfn, psize;
 172        unsigned long slot_fn, hva;
 173        __be64 *hpte;
 174        struct revmap_entry *rev;
 175        unsigned long g_ptel;
 176        struct kvm_memory_slot *memslot;
 177        unsigned hpage_shift;
 178        bool is_ci;
 179        unsigned long *rmap;
 180        pte_t *ptep;
 181        unsigned int writing;
 182        unsigned long mmu_seq;
 183        unsigned long rcbits, irq_flags = 0;
 184
 185        psize = hpte_page_size(pteh, ptel);
 186        if (!psize)
 187                return H_PARAMETER;
 188        writing = hpte_is_writable(ptel);
 189        pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
 190        ptel &= ~HPTE_GR_RESERVED;
 191        g_ptel = ptel;
 192
 193        /* used later to detect if we might have been invalidated */
 194        mmu_seq = kvm->mmu_notifier_seq;
 195        smp_rmb();
 196
 197        /* Find the memslot (if any) for this address */
 198        gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
 199        gfn = gpa >> PAGE_SHIFT;
 200        memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
 201        pa = 0;
 202        is_ci = false;
 203        rmap = NULL;
 204        if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
 205                /* Emulated MMIO - mark this with key=31 */
 206                pteh |= HPTE_V_ABSENT;
 207                ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
 208                goto do_insert;
 209        }
 210
 211        /* Check if the requested page fits entirely in the memslot. */
 212        if (!slot_is_aligned(memslot, psize))
 213                return H_PARAMETER;
 214        slot_fn = gfn - memslot->base_gfn;
 215        rmap = &memslot->arch.rmap[slot_fn];
 216
 217        /* Translate to host virtual address */
 218        hva = __gfn_to_hva_memslot(memslot, gfn);
 219        /*
 220         * If we had a page table table change after lookup, we would
 221         * retry via mmu_notifier_retry.
 222         */
 223        if (realmode)
 224                ptep = __find_linux_pte_or_hugepte(pgdir, hva, NULL,
 225                                                   &hpage_shift);
 226        else {
 227                local_irq_save(irq_flags);
 228                ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL,
 229                                                 &hpage_shift);
 230        }
 231        if (ptep) {
 232                pte_t pte;
 233                unsigned int host_pte_size;
 234
 235                if (hpage_shift)
 236                        host_pte_size = 1ul << hpage_shift;
 237                else
 238                        host_pte_size = PAGE_SIZE;
 239                /*
 240                 * We should always find the guest page size
 241                 * to <= host page size, if host is using hugepage
 242                 */
 243                if (host_pte_size < psize) {
 244                        if (!realmode)
 245                                local_irq_restore(flags);
 246                        return H_PARAMETER;
 247                }
 248                pte = kvmppc_read_update_linux_pte(ptep, writing);
 249                if (pte_present(pte) && !pte_protnone(pte)) {
 250                        if (writing && !pte_write(pte))
 251                                /* make the actual HPTE be read-only */
 252                                ptel = hpte_make_readonly(ptel);
 253                        is_ci = pte_ci(pte);
 254                        pa = pte_pfn(pte) << PAGE_SHIFT;
 255                        pa |= hva & (host_pte_size - 1);
 256                        pa |= gpa & ~PAGE_MASK;
 257                }
 258        }
 259        if (!realmode)
 260                local_irq_restore(irq_flags);
 261
 262        ptel &= ~(HPTE_R_PP0 - psize);
 263        ptel |= pa;
 264
 265        if (pa)
 266                pteh |= HPTE_V_VALID;
 267        else
 268                pteh |= HPTE_V_ABSENT;
 269
 270        /*If we had host pte mapping then  Check WIMG */
 271        if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) {
 272                if (is_ci)
 273                        return H_PARAMETER;
 274                /*
 275                 * Allow guest to map emulated device memory as
 276                 * uncacheable, but actually make it cacheable.
 277                 */
 278                ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G);
 279                ptel |= HPTE_R_M;
 280        }
 281
 282        /* Find and lock the HPTEG slot to use */
 283 do_insert:
 284        if (pte_index >= kvm->arch.hpt_npte)
 285                return H_PARAMETER;
 286        if (likely((flags & H_EXACT) == 0)) {
 287                pte_index &= ~7UL;
 288                hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
 289                for (i = 0; i < 8; ++i) {
 290                        if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 &&
 291                            try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
 292                                          HPTE_V_ABSENT))
 293                                break;
 294                        hpte += 2;
 295                }
 296                if (i == 8) {
 297                        /*
 298                         * Since try_lock_hpte doesn't retry (not even stdcx.
 299                         * failures), it could be that there is a free slot
 300                         * but we transiently failed to lock it.  Try again,
 301                         * actually locking each slot and checking it.
 302                         */
 303                        hpte -= 16;
 304                        for (i = 0; i < 8; ++i) {
 305                                u64 pte;
 306                                while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 307                                        cpu_relax();
 308                                pte = be64_to_cpu(hpte[0]);
 309                                if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT)))
 310                                        break;
 311                                __unlock_hpte(hpte, pte);
 312                                hpte += 2;
 313                        }
 314                        if (i == 8)
 315                                return H_PTEG_FULL;
 316                }
 317                pte_index += i;
 318        } else {
 319                hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
 320                if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
 321                                   HPTE_V_ABSENT)) {
 322                        /* Lock the slot and check again */
 323                        u64 pte;
 324
 325                        while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 326                                cpu_relax();
 327                        pte = be64_to_cpu(hpte[0]);
 328                        if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
 329                                __unlock_hpte(hpte, pte);
 330                                return H_PTEG_FULL;
 331                        }
 332                }
 333        }
 334
 335        /* Save away the guest's idea of the second HPTE dword */
 336        rev = &kvm->arch.revmap[pte_index];
 337        if (realmode)
 338                rev = real_vmalloc_addr(rev);
 339        if (rev) {
 340                rev->guest_rpte = g_ptel;
 341                note_hpte_modification(kvm, rev);
 342        }
 343
 344        /* Link HPTE into reverse-map chain */
 345        if (pteh & HPTE_V_VALID) {
 346                if (realmode)
 347                        rmap = real_vmalloc_addr(rmap);
 348                lock_rmap(rmap);
 349                /* Check for pending invalidations under the rmap chain lock */
 350                if (mmu_notifier_retry(kvm, mmu_seq)) {
 351                        /* inval in progress, write a non-present HPTE */
 352                        pteh |= HPTE_V_ABSENT;
 353                        pteh &= ~HPTE_V_VALID;
 354                        unlock_rmap(rmap);
 355                } else {
 356                        kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
 357                                                realmode);
 358                        /* Only set R/C in real HPTE if already set in *rmap */
 359                        rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
 360                        ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
 361                }
 362        }
 363
 364        hpte[1] = cpu_to_be64(ptel);
 365
 366        /* Write the first HPTE dword, unlocking the HPTE and making it valid */
 367        eieio();
 368        __unlock_hpte(hpte, pteh);
 369        asm volatile("ptesync" : : : "memory");
 370
 371        *pte_idx_ret = pte_index;
 372        return H_SUCCESS;
 373}
 374EXPORT_SYMBOL_GPL(kvmppc_do_h_enter);
 375
 376long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 377                    long pte_index, unsigned long pteh, unsigned long ptel)
 378{
 379        return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel,
 380                                 vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]);
 381}
 382
 383#ifdef __BIG_ENDIAN__
 384#define LOCK_TOKEN      (*(u32 *)(&get_paca()->lock_token))
 385#else
 386#define LOCK_TOKEN      (*(u32 *)(&get_paca()->paca_index))
 387#endif
 388
 389static inline int try_lock_tlbie(unsigned int *lock)
 390{
 391        unsigned int tmp, old;
 392        unsigned int token = LOCK_TOKEN;
 393
 394        asm volatile("1:lwarx   %1,0,%2\n"
 395                     "  cmpwi   cr0,%1,0\n"
 396                     "  bne     2f\n"
 397                     "  stwcx.  %3,0,%2\n"
 398                     "  bne-    1b\n"
 399                     "  isync\n"
 400                     "2:"
 401                     : "=&r" (tmp), "=&r" (old)
 402                     : "r" (lock), "r" (token)
 403                     : "cc", "memory");
 404        return old == 0;
 405}
 406
 407static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
 408                      long npages, int global, bool need_sync)
 409{
 410        long i;
 411
 412        if (global) {
 413                while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
 414                        cpu_relax();
 415                if (need_sync)
 416                        asm volatile("ptesync" : : : "memory");
 417                for (i = 0; i < npages; ++i)
 418                        asm volatile(PPC_TLBIE(%1,%0) : :
 419                                     "r" (rbvalues[i]), "r" (kvm->arch.lpid));
 420                asm volatile("eieio; tlbsync; ptesync" : : : "memory");
 421                kvm->arch.tlbie_lock = 0;
 422        } else {
 423                if (need_sync)
 424                        asm volatile("ptesync" : : : "memory");
 425                for (i = 0; i < npages; ++i)
 426                        asm volatile("tlbiel %0" : : "r" (rbvalues[i]));
 427                asm volatile("ptesync" : : : "memory");
 428        }
 429}
 430
 431long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
 432                        unsigned long pte_index, unsigned long avpn,
 433                        unsigned long *hpret)
 434{
 435        __be64 *hpte;
 436        unsigned long v, r, rb;
 437        struct revmap_entry *rev;
 438        u64 pte;
 439
 440        if (pte_index >= kvm->arch.hpt_npte)
 441                return H_PARAMETER;
 442        hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
 443        while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 444                cpu_relax();
 445        pte = be64_to_cpu(hpte[0]);
 446        if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
 447            ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) ||
 448            ((flags & H_ANDCOND) && (pte & avpn) != 0)) {
 449                __unlock_hpte(hpte, pte);
 450                return H_NOT_FOUND;
 451        }
 452
 453        rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
 454        v = pte & ~HPTE_V_HVLOCK;
 455        if (v & HPTE_V_VALID) {
 456                hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
 457                rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index);
 458                do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
 459                /*
 460                 * The reference (R) and change (C) bits in a HPT
 461                 * entry can be set by hardware at any time up until
 462                 * the HPTE is invalidated and the TLB invalidation
 463                 * sequence has completed.  This means that when
 464                 * removing a HPTE, we need to re-read the HPTE after
 465                 * the invalidation sequence has completed in order to
 466                 * obtain reliable values of R and C.
 467                 */
 468                remove_revmap_chain(kvm, pte_index, rev, v,
 469                                    be64_to_cpu(hpte[1]));
 470        }
 471        r = rev->guest_rpte & ~HPTE_GR_RESERVED;
 472        note_hpte_modification(kvm, rev);
 473        unlock_hpte(hpte, 0);
 474
 475        if (v & HPTE_V_ABSENT)
 476                v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID;
 477        hpret[0] = v;
 478        hpret[1] = r;
 479        return H_SUCCESS;
 480}
 481EXPORT_SYMBOL_GPL(kvmppc_do_h_remove);
 482
 483long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
 484                     unsigned long pte_index, unsigned long avpn)
 485{
 486        return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
 487                                  &vcpu->arch.gpr[4]);
 488}
 489
 490long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 491{
 492        struct kvm *kvm = vcpu->kvm;
 493        unsigned long *args = &vcpu->arch.gpr[4];
 494        __be64 *hp, *hptes[4];
 495        unsigned long tlbrb[4];
 496        long int i, j, k, n, found, indexes[4];
 497        unsigned long flags, req, pte_index, rcbits;
 498        int global;
 499        long int ret = H_SUCCESS;
 500        struct revmap_entry *rev, *revs[4];
 501        u64 hp0;
 502
 503        global = global_invalidates(kvm, 0);
 504        for (i = 0; i < 4 && ret == H_SUCCESS; ) {
 505                n = 0;
 506                for (; i < 4; ++i) {
 507                        j = i * 2;
 508                        pte_index = args[j];
 509                        flags = pte_index >> 56;
 510                        pte_index &= ((1ul << 56) - 1);
 511                        req = flags >> 6;
 512                        flags &= 3;
 513                        if (req == 3) {         /* no more requests */
 514                                i = 4;
 515                                break;
 516                        }
 517                        if (req != 1 || flags == 3 ||
 518                            pte_index >= kvm->arch.hpt_npte) {
 519                                /* parameter error */
 520                                args[j] = ((0xa0 | flags) << 56) + pte_index;
 521                                ret = H_PARAMETER;
 522                                break;
 523                        }
 524                        hp = (__be64 *) (kvm->arch.hpt_virt + (pte_index << 4));
 525                        /* to avoid deadlock, don't spin except for first */
 526                        if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
 527                                if (n)
 528                                        break;
 529                                while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
 530                                        cpu_relax();
 531                        }
 532                        found = 0;
 533                        hp0 = be64_to_cpu(hp[0]);
 534                        if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) {
 535                                switch (flags & 3) {
 536                                case 0:         /* absolute */
 537                                        found = 1;
 538                                        break;
 539                                case 1:         /* andcond */
 540                                        if (!(hp0 & args[j + 1]))
 541                                                found = 1;
 542                                        break;
 543                                case 2:         /* AVPN */
 544                                        if ((hp0 & ~0x7fUL) == args[j + 1])
 545                                                found = 1;
 546                                        break;
 547                                }
 548                        }
 549                        if (!found) {
 550                                hp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
 551                                args[j] = ((0x90 | flags) << 56) + pte_index;
 552                                continue;
 553                        }
 554
 555                        args[j] = ((0x80 | flags) << 56) + pte_index;
 556                        rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
 557                        note_hpte_modification(kvm, rev);
 558
 559                        if (!(hp0 & HPTE_V_VALID)) {
 560                                /* insert R and C bits from PTE */
 561                                rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
 562                                args[j] |= rcbits << (56 - 5);
 563                                hp[0] = 0;
 564                                continue;
 565                        }
 566
 567                        /* leave it locked */
 568                        hp[0] &= ~cpu_to_be64(HPTE_V_VALID);
 569                        tlbrb[n] = compute_tlbie_rb(be64_to_cpu(hp[0]),
 570                                be64_to_cpu(hp[1]), pte_index);
 571                        indexes[n] = j;
 572                        hptes[n] = hp;
 573                        revs[n] = rev;
 574                        ++n;
 575                }
 576
 577                if (!n)
 578                        break;
 579
 580                /* Now that we've collected a batch, do the tlbies */
 581                do_tlbies(kvm, tlbrb, n, global, true);
 582
 583                /* Read PTE low words after tlbie to get final R/C values */
 584                for (k = 0; k < n; ++k) {
 585                        j = indexes[k];
 586                        pte_index = args[j] & ((1ul << 56) - 1);
 587                        hp = hptes[k];
 588                        rev = revs[k];
 589                        remove_revmap_chain(kvm, pte_index, rev,
 590                                be64_to_cpu(hp[0]), be64_to_cpu(hp[1]));
 591                        rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
 592                        args[j] |= rcbits << (56 - 5);
 593                        __unlock_hpte(hp, 0);
 594                }
 595        }
 596
 597        return ret;
 598}
 599
 600long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 601                      unsigned long pte_index, unsigned long avpn,
 602                      unsigned long va)
 603{
 604        struct kvm *kvm = vcpu->kvm;
 605        __be64 *hpte;
 606        struct revmap_entry *rev;
 607        unsigned long v, r, rb, mask, bits;
 608        u64 pte;
 609
 610        if (pte_index >= kvm->arch.hpt_npte)
 611                return H_PARAMETER;
 612
 613        hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
 614        while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 615                cpu_relax();
 616        pte = be64_to_cpu(hpte[0]);
 617        if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
 618            ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) {
 619                __unlock_hpte(hpte, pte);
 620                return H_NOT_FOUND;
 621        }
 622
 623        v = pte;
 624        bits = (flags << 55) & HPTE_R_PP0;
 625        bits |= (flags << 48) & HPTE_R_KEY_HI;
 626        bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
 627
 628        /* Update guest view of 2nd HPTE dword */
 629        mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
 630                HPTE_R_KEY_HI | HPTE_R_KEY_LO;
 631        rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
 632        if (rev) {
 633                r = (rev->guest_rpte & ~mask) | bits;
 634                rev->guest_rpte = r;
 635                note_hpte_modification(kvm, rev);
 636        }
 637
 638        /* Update HPTE */
 639        if (v & HPTE_V_VALID) {
 640                /*
 641                 * If the page is valid, don't let it transition from
 642                 * readonly to writable.  If it should be writable, we'll
 643                 * take a trap and let the page fault code sort it out.
 644                 */
 645                pte = be64_to_cpu(hpte[1]);
 646                r = (pte & ~mask) | bits;
 647                if (hpte_is_writable(r) && !hpte_is_writable(pte))
 648                        r = hpte_make_readonly(r);
 649                /* If the PTE is changing, invalidate it first */
 650                if (r != pte) {
 651                        rb = compute_tlbie_rb(v, r, pte_index);
 652                        hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) |
 653                                              HPTE_V_ABSENT);
 654                        do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags),
 655                                  true);
 656                        hpte[1] = cpu_to_be64(r);
 657                }
 658        }
 659        unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
 660        asm volatile("ptesync" : : : "memory");
 661        return H_SUCCESS;
 662}
 663
 664long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
 665                   unsigned long pte_index)
 666{
 667        struct kvm *kvm = vcpu->kvm;
 668        __be64 *hpte;
 669        unsigned long v, r;
 670        int i, n = 1;
 671        struct revmap_entry *rev = NULL;
 672
 673        if (pte_index >= kvm->arch.hpt_npte)
 674                return H_PARAMETER;
 675        if (flags & H_READ_4) {
 676                pte_index &= ~3;
 677                n = 4;
 678        }
 679        rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
 680        for (i = 0; i < n; ++i, ++pte_index) {
 681                hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
 682                v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
 683                r = be64_to_cpu(hpte[1]);
 684                if (v & HPTE_V_ABSENT) {
 685                        v &= ~HPTE_V_ABSENT;
 686                        v |= HPTE_V_VALID;
 687                }
 688                if (v & HPTE_V_VALID) {
 689                        r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
 690                        r &= ~HPTE_GR_RESERVED;
 691                }
 692                vcpu->arch.gpr[4 + i * 2] = v;
 693                vcpu->arch.gpr[5 + i * 2] = r;
 694        }
 695        return H_SUCCESS;
 696}
 697
 698long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
 699                        unsigned long pte_index)
 700{
 701        struct kvm *kvm = vcpu->kvm;
 702        __be64 *hpte;
 703        unsigned long v, r, gr;
 704        struct revmap_entry *rev;
 705        unsigned long *rmap;
 706        long ret = H_NOT_FOUND;
 707
 708        if (pte_index >= kvm->arch.hpt_npte)
 709                return H_PARAMETER;
 710
 711        rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
 712        hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
 713        while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 714                cpu_relax();
 715        v = be64_to_cpu(hpte[0]);
 716        r = be64_to_cpu(hpte[1]);
 717        if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
 718                goto out;
 719
 720        gr = rev->guest_rpte;
 721        if (rev->guest_rpte & HPTE_R_R) {
 722                rev->guest_rpte &= ~HPTE_R_R;
 723                note_hpte_modification(kvm, rev);
 724        }
 725        if (v & HPTE_V_VALID) {
 726                gr |= r & (HPTE_R_R | HPTE_R_C);
 727                if (r & HPTE_R_R) {
 728                        kvmppc_clear_ref_hpte(kvm, hpte, pte_index);
 729                        rmap = revmap_for_hpte(kvm, v, gr);
 730                        if (rmap) {
 731                                lock_rmap(rmap);
 732                                *rmap |= KVMPPC_RMAP_REFERENCED;
 733                                unlock_rmap(rmap);
 734                        }
 735                }
 736        }
 737        vcpu->arch.gpr[4] = gr;
 738        ret = H_SUCCESS;
 739 out:
 740        unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
 741        return ret;
 742}
 743
 744long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
 745                        unsigned long pte_index)
 746{
 747        struct kvm *kvm = vcpu->kvm;
 748        __be64 *hpte;
 749        unsigned long v, r, gr;
 750        struct revmap_entry *rev;
 751        unsigned long *rmap;
 752        long ret = H_NOT_FOUND;
 753
 754        if (pte_index >= kvm->arch.hpt_npte)
 755                return H_PARAMETER;
 756
 757        rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
 758        hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
 759        while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 760                cpu_relax();
 761        v = be64_to_cpu(hpte[0]);
 762        r = be64_to_cpu(hpte[1]);
 763        if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
 764                goto out;
 765
 766        gr = rev->guest_rpte;
 767        if (gr & HPTE_R_C) {
 768                rev->guest_rpte &= ~HPTE_R_C;
 769                note_hpte_modification(kvm, rev);
 770        }
 771        if (v & HPTE_V_VALID) {
 772                /* need to make it temporarily absent so C is stable */
 773                hpte[0] |= cpu_to_be64(HPTE_V_ABSENT);
 774                kvmppc_invalidate_hpte(kvm, hpte, pte_index);
 775                r = be64_to_cpu(hpte[1]);
 776                gr |= r & (HPTE_R_R | HPTE_R_C);
 777                if (r & HPTE_R_C) {
 778                        unsigned long psize = hpte_page_size(v, r);
 779                        hpte[1] = cpu_to_be64(r & ~HPTE_R_C);
 780                        eieio();
 781                        rmap = revmap_for_hpte(kvm, v, gr);
 782                        if (rmap) {
 783                                lock_rmap(rmap);
 784                                *rmap |= KVMPPC_RMAP_CHANGED;
 785                                kvmppc_update_rmap_change(rmap, psize);
 786                                unlock_rmap(rmap);
 787                        }
 788                }
 789        }
 790        vcpu->arch.gpr[4] = gr;
 791        ret = H_SUCCESS;
 792 out:
 793        unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
 794        return ret;
 795}
 796
 797void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
 798                        unsigned long pte_index)
 799{
 800        unsigned long rb;
 801
 802        hptep[0] &= ~cpu_to_be64(HPTE_V_VALID);
 803        rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]),
 804                              pte_index);
 805        do_tlbies(kvm, &rb, 1, 1, true);
 806}
 807EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
 808
 809void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
 810                           unsigned long pte_index)
 811{
 812        unsigned long rb;
 813        unsigned char rbyte;
 814
 815        rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]),
 816                              pte_index);
 817        rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8;
 818        /* modify only the second-last byte, which contains the ref bit */
 819        *((char *)hptep + 14) = rbyte;
 820        do_tlbies(kvm, &rb, 1, 1, false);
 821}
 822EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
 823
 824static int slb_base_page_shift[4] = {
 825        24,     /* 16M */
 826        16,     /* 64k */
 827        34,     /* 16G */
 828        20,     /* 1M, unsupported */
 829};
 830
 831/* When called from virtmode, this func should be protected by
 832 * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK
 833 * can trigger deadlock issue.
 834 */
 835long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
 836                              unsigned long valid)
 837{
 838        unsigned int i;
 839        unsigned int pshift;
 840        unsigned long somask;
 841        unsigned long vsid, hash;
 842        unsigned long avpn;
 843        __be64 *hpte;
 844        unsigned long mask, val;
 845        unsigned long v, r;
 846
 847        /* Get page shift, work out hash and AVPN etc. */
 848        mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY;
 849        val = 0;
 850        pshift = 12;
 851        if (slb_v & SLB_VSID_L) {
 852                mask |= HPTE_V_LARGE;
 853                val |= HPTE_V_LARGE;
 854                pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4];
 855        }
 856        if (slb_v & SLB_VSID_B_1T) {
 857                somask = (1UL << 40) - 1;
 858                vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T;
 859                vsid ^= vsid << 25;
 860        } else {
 861                somask = (1UL << 28) - 1;
 862                vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
 863        }
 864        hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvm->arch.hpt_mask;
 865        avpn = slb_v & ~(somask >> 16); /* also includes B */
 866        avpn |= (eaddr & somask) >> 16;
 867
 868        if (pshift >= 24)
 869                avpn &= ~((1UL << (pshift - 16)) - 1);
 870        else
 871                avpn &= ~0x7fUL;
 872        val |= avpn;
 873
 874        for (;;) {
 875                hpte = (__be64 *)(kvm->arch.hpt_virt + (hash << 7));
 876
 877                for (i = 0; i < 16; i += 2) {
 878                        /* Read the PTE racily */
 879                        v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
 880
 881                        /* Check valid/absent, hash, segment size and AVPN */
 882                        if (!(v & valid) || (v & mask) != val)
 883                                continue;
 884
 885                        /* Lock the PTE and read it under the lock */
 886                        while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK))
 887                                cpu_relax();
 888                        v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
 889                        r = be64_to_cpu(hpte[i+1]);
 890
 891                        /*
 892                         * Check the HPTE again, including base page size
 893                         */
 894                        if ((v & valid) && (v & mask) == val &&
 895                            hpte_base_page_size(v, r) == (1ul << pshift))
 896                                /* Return with the HPTE still locked */
 897                                return (hash << 3) + (i >> 1);
 898
 899                        __unlock_hpte(&hpte[i], v);
 900                }
 901
 902                if (val & HPTE_V_SECONDARY)
 903                        break;
 904                val |= HPTE_V_SECONDARY;
 905                hash = hash ^ kvm->arch.hpt_mask;
 906        }
 907        return -1;
 908}
 909EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte);
 910
 911/*
 912 * Called in real mode to check whether an HPTE not found fault
 913 * is due to accessing a paged-out page or an emulated MMIO page,
 914 * or if a protection fault is due to accessing a page that the
 915 * guest wanted read/write access to but which we made read-only.
 916 * Returns a possibly modified status (DSISR) value if not
 917 * (i.e. pass the interrupt to the guest),
 918 * -1 to pass the fault up to host kernel mode code, -2 to do that
 919 * and also load the instruction word (for MMIO emulation),
 920 * or 0 if we should make the guest retry the access.
 921 */
 922long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
 923                          unsigned long slb_v, unsigned int status, bool data)
 924{
 925        struct kvm *kvm = vcpu->kvm;
 926        long int index;
 927        unsigned long v, r, gr;
 928        __be64 *hpte;
 929        unsigned long valid;
 930        struct revmap_entry *rev;
 931        unsigned long pp, key;
 932
 933        /* For protection fault, expect to find a valid HPTE */
 934        valid = HPTE_V_VALID;
 935        if (status & DSISR_NOHPTE)
 936                valid |= HPTE_V_ABSENT;
 937
 938        index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
 939        if (index < 0) {
 940                if (status & DSISR_NOHPTE)
 941                        return status;  /* there really was no HPTE */
 942                return 0;               /* for prot fault, HPTE disappeared */
 943        }
 944        hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
 945        v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
 946        r = be64_to_cpu(hpte[1]);
 947        rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
 948        gr = rev->guest_rpte;
 949
 950        unlock_hpte(hpte, v);
 951
 952        /* For not found, if the HPTE is valid by now, retry the instruction */
 953        if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
 954                return 0;
 955
 956        /* Check access permissions to the page */
 957        pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
 958        key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
 959        status &= ~DSISR_NOHPTE;        /* DSISR_NOHPTE == SRR1_ISI_NOPT */
 960        if (!data) {
 961                if (gr & (HPTE_R_N | HPTE_R_G))
 962                        return status | SRR1_ISI_N_OR_G;
 963                if (!hpte_read_permission(pp, slb_v & key))
 964                        return status | SRR1_ISI_PROT;
 965        } else if (status & DSISR_ISSTORE) {
 966                /* check write permission */
 967                if (!hpte_write_permission(pp, slb_v & key))
 968                        return status | DSISR_PROTFAULT;
 969        } else {
 970                if (!hpte_read_permission(pp, slb_v & key))
 971                        return status | DSISR_PROTFAULT;
 972        }
 973
 974        /* Check storage key, if applicable */
 975        if (data && (vcpu->arch.shregs.msr & MSR_DR)) {
 976                unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr);
 977                if (status & DSISR_ISSTORE)
 978                        perm >>= 1;
 979                if (perm & 1)
 980                        return status | DSISR_KEYFAULT;
 981        }
 982
 983        /* Save HPTE info for virtual-mode handler */
 984        vcpu->arch.pgfault_addr = addr;
 985        vcpu->arch.pgfault_index = index;
 986        vcpu->arch.pgfault_hpte[0] = v;
 987        vcpu->arch.pgfault_hpte[1] = r;
 988
 989        /* Check the storage key to see if it is possibly emulated MMIO */
 990        if (data && (vcpu->arch.shregs.msr & MSR_IR) &&
 991            (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
 992            (HPTE_R_KEY_HI | HPTE_R_KEY_LO))
 993                return -2;      /* MMIO emulation - load instr word */
 994
 995        return -1;              /* send fault up to host kernel mode */
 996}
 997