linux/arch/powerpc/kvm/book3s_hv_rm_mmu.c
<<
>>
Prefs
   1/*
   2 * This program is free software; you can redistribute it and/or modify
   3 * it under the terms of the GNU General Public License, version 2, as
   4 * published by the Free Software Foundation.
   5 *
   6 * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
   7 */
   8
   9#include <linux/types.h>
  10#include <linux/string.h>
  11#include <linux/kvm.h>
  12#include <linux/kvm_host.h>
  13#include <linux/hugetlb.h>
  14#include <linux/module.h>
  15
  16#include <asm/tlbflush.h>
  17#include <asm/kvm_ppc.h>
  18#include <asm/kvm_book3s.h>
  19#include <asm/mmu-hash64.h>
  20#include <asm/hvcall.h>
  21#include <asm/synch.h>
  22#include <asm/ppc-opcode.h>
  23
  24/* Translate address of a vmalloc'd thing to a linear map address */
  25static void *real_vmalloc_addr(void *x)
  26{
  27        unsigned long addr = (unsigned long) x;
  28        pte_t *p;
  29
  30        p = find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL);
  31        if (!p || !pte_present(*p))
  32                return NULL;
  33        /* assume we don't have huge pages in vmalloc space... */
  34        addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
  35        return __va(addr);
  36}
  37
  38/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
  39static int global_invalidates(struct kvm *kvm, unsigned long flags)
  40{
  41        int global;
  42
  43        /*
  44         * If there is only one vcore, and it's currently running,
  45         * we can use tlbiel as long as we mark all other physical
  46         * cores as potentially having stale TLB entries for this lpid.
  47         * If we're not using MMU notifiers, we never take pages away
  48         * from the guest, so we can use tlbiel if requested.
  49         * Otherwise, don't use tlbiel.
  50         */
  51        if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcore)
  52                global = 0;
  53        else if (kvm->arch.using_mmu_notifiers)
  54                global = 1;
  55        else
  56                global = !(flags & H_LOCAL);
  57
  58        if (!global) {
  59                /* any other core might now have stale TLB entries... */
  60                smp_wmb();
  61                cpumask_setall(&kvm->arch.need_tlb_flush);
  62                cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu,
  63                                  &kvm->arch.need_tlb_flush);
  64        }
  65
  66        return global;
  67}
  68
  69/*
  70 * Add this HPTE into the chain for the real page.
  71 * Must be called with the chain locked; it unlocks the chain.
  72 */
  73void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
  74                             unsigned long *rmap, long pte_index, int realmode)
  75{
  76        struct revmap_entry *head, *tail;
  77        unsigned long i;
  78
  79        if (*rmap & KVMPPC_RMAP_PRESENT) {
  80                i = *rmap & KVMPPC_RMAP_INDEX;
  81                head = &kvm->arch.revmap[i];
  82                if (realmode)
  83                        head = real_vmalloc_addr(head);
  84                tail = &kvm->arch.revmap[head->back];
  85                if (realmode)
  86                        tail = real_vmalloc_addr(tail);
  87                rev->forw = i;
  88                rev->back = head->back;
  89                tail->forw = pte_index;
  90                head->back = pte_index;
  91        } else {
  92                rev->forw = rev->back = pte_index;
  93                *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
  94                        pte_index | KVMPPC_RMAP_PRESENT;
  95        }
  96        unlock_rmap(rmap);
  97}
  98EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
  99
 100/* Remove this HPTE from the chain for a real page */
 101static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 102                                struct revmap_entry *rev,
 103                                unsigned long hpte_v, unsigned long hpte_r)
 104{
 105        struct revmap_entry *next, *prev;
 106        unsigned long gfn, ptel, head;
 107        struct kvm_memory_slot *memslot;
 108        unsigned long *rmap;
 109        unsigned long rcbits;
 110
 111        rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
 112        ptel = rev->guest_rpte |= rcbits;
 113        gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
 114        memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
 115        if (!memslot)
 116                return;
 117
 118        rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
 119        lock_rmap(rmap);
 120
 121        head = *rmap & KVMPPC_RMAP_INDEX;
 122        next = real_vmalloc_addr(&kvm->arch.revmap[rev->forw]);
 123        prev = real_vmalloc_addr(&kvm->arch.revmap[rev->back]);
 124        next->back = rev->back;
 125        prev->forw = rev->forw;
 126        if (head == pte_index) {
 127                head = rev->forw;
 128                if (head == pte_index)
 129                        *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
 130                else
 131                        *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
 132        }
 133        *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
 134        unlock_rmap(rmap);
 135}
 136
 137static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
 138                              int writing, unsigned long *pte_sizep)
 139{
 140        pte_t *ptep;
 141        unsigned long ps = *pte_sizep;
 142        unsigned int hugepage_shift;
 143
 144        ptep = find_linux_pte_or_hugepte(pgdir, hva, &hugepage_shift);
 145        if (!ptep)
 146                return __pte(0);
 147        if (hugepage_shift)
 148                *pte_sizep = 1ul << hugepage_shift;
 149        else
 150                *pte_sizep = PAGE_SIZE;
 151        if (ps > *pte_sizep)
 152                return __pte(0);
 153        return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift);
 154}
 155
 156static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
 157{
 158        asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
 159        hpte[0] = hpte_v;
 160}
 161
 162long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 163                       long pte_index, unsigned long pteh, unsigned long ptel,
 164                       pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
 165{
 166        unsigned long i, pa, gpa, gfn, psize;
 167        unsigned long slot_fn, hva;
 168        unsigned long *hpte;
 169        struct revmap_entry *rev;
 170        unsigned long g_ptel;
 171        struct kvm_memory_slot *memslot;
 172        unsigned long *physp, pte_size;
 173        unsigned long is_io;
 174        unsigned long *rmap;
 175        pte_t pte;
 176        unsigned int writing;
 177        unsigned long mmu_seq;
 178        unsigned long rcbits;
 179
 180        psize = hpte_page_size(pteh, ptel);
 181        if (!psize)
 182                return H_PARAMETER;
 183        writing = hpte_is_writable(ptel);
 184        pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
 185        ptel &= ~HPTE_GR_RESERVED;
 186        g_ptel = ptel;
 187
 188        /* used later to detect if we might have been invalidated */
 189        mmu_seq = kvm->mmu_notifier_seq;
 190        smp_rmb();
 191
 192        /* Find the memslot (if any) for this address */
 193        gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
 194        gfn = gpa >> PAGE_SHIFT;
 195        memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
 196        pa = 0;
 197        is_io = ~0ul;
 198        rmap = NULL;
 199        if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
 200                /* PPC970 can't do emulated MMIO */
 201                if (!cpu_has_feature(CPU_FTR_ARCH_206))
 202                        return H_PARAMETER;
 203                /* Emulated MMIO - mark this with key=31 */
 204                pteh |= HPTE_V_ABSENT;
 205                ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
 206                goto do_insert;
 207        }
 208
 209        /* Check if the requested page fits entirely in the memslot. */
 210        if (!slot_is_aligned(memslot, psize))
 211                return H_PARAMETER;
 212        slot_fn = gfn - memslot->base_gfn;
 213        rmap = &memslot->arch.rmap[slot_fn];
 214
 215        if (!kvm->arch.using_mmu_notifiers) {
 216                physp = memslot->arch.slot_phys;
 217                if (!physp)
 218                        return H_PARAMETER;
 219                physp += slot_fn;
 220                if (realmode)
 221                        physp = real_vmalloc_addr(physp);
 222                pa = *physp;
 223                if (!pa)
 224                        return H_TOO_HARD;
 225                is_io = pa & (HPTE_R_I | HPTE_R_W);
 226                pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
 227                pa &= PAGE_MASK;
 228        } else {
 229                /* Translate to host virtual address */
 230                hva = __gfn_to_hva_memslot(memslot, gfn);
 231
 232                /* Look up the Linux PTE for the backing page */
 233                pte_size = psize;
 234                pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
 235                if (pte_present(pte)) {
 236                        if (writing && !pte_write(pte))
 237                                /* make the actual HPTE be read-only */
 238                                ptel = hpte_make_readonly(ptel);
 239                        is_io = hpte_cache_bits(pte_val(pte));
 240                        pa = pte_pfn(pte) << PAGE_SHIFT;
 241                }
 242        }
 243
 244        if (pte_size < psize)
 245                return H_PARAMETER;
 246        if (pa && pte_size > psize)
 247                pa |= gpa & (pte_size - 1);
 248
 249        ptel &= ~(HPTE_R_PP0 - psize);
 250        ptel |= pa;
 251
 252        if (pa)
 253                pteh |= HPTE_V_VALID;
 254        else
 255                pteh |= HPTE_V_ABSENT;
 256
 257        /* Check WIMG */
 258        if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) {
 259                if (is_io)
 260                        return H_PARAMETER;
 261                /*
 262                 * Allow guest to map emulated device memory as
 263                 * uncacheable, but actually make it cacheable.
 264                 */
 265                ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G);
 266                ptel |= HPTE_R_M;
 267        }
 268
 269        /* Find and lock the HPTEG slot to use */
 270 do_insert:
 271        if (pte_index >= kvm->arch.hpt_npte)
 272                return H_PARAMETER;
 273        if (likely((flags & H_EXACT) == 0)) {
 274                pte_index &= ~7UL;
 275                hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
 276                for (i = 0; i < 8; ++i) {
 277                        if ((*hpte & HPTE_V_VALID) == 0 &&
 278                            try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
 279                                          HPTE_V_ABSENT))
 280                                break;
 281                        hpte += 2;
 282                }
 283                if (i == 8) {
 284                        /*
 285                         * Since try_lock_hpte doesn't retry (not even stdcx.
 286                         * failures), it could be that there is a free slot
 287                         * but we transiently failed to lock it.  Try again,
 288                         * actually locking each slot and checking it.
 289                         */
 290                        hpte -= 16;
 291                        for (i = 0; i < 8; ++i) {
 292                                while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 293                                        cpu_relax();
 294                                if (!(*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)))
 295                                        break;
 296                                *hpte &= ~HPTE_V_HVLOCK;
 297                                hpte += 2;
 298                        }
 299                        if (i == 8)
 300                                return H_PTEG_FULL;
 301                }
 302                pte_index += i;
 303        } else {
 304                hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
 305                if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
 306                                   HPTE_V_ABSENT)) {
 307                        /* Lock the slot and check again */
 308                        while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 309                                cpu_relax();
 310                        if (*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
 311                                *hpte &= ~HPTE_V_HVLOCK;
 312                                return H_PTEG_FULL;
 313                        }
 314                }
 315        }
 316
 317        /* Save away the guest's idea of the second HPTE dword */
 318        rev = &kvm->arch.revmap[pte_index];
 319        if (realmode)
 320                rev = real_vmalloc_addr(rev);
 321        if (rev) {
 322                rev->guest_rpte = g_ptel;
 323                note_hpte_modification(kvm, rev);
 324        }
 325
 326        /* Link HPTE into reverse-map chain */
 327        if (pteh & HPTE_V_VALID) {
 328                if (realmode)
 329                        rmap = real_vmalloc_addr(rmap);
 330                lock_rmap(rmap);
 331                /* Check for pending invalidations under the rmap chain lock */
 332                if (kvm->arch.using_mmu_notifiers &&
 333                    mmu_notifier_retry(kvm, mmu_seq)) {
 334                        /* inval in progress, write a non-present HPTE */
 335                        pteh |= HPTE_V_ABSENT;
 336                        pteh &= ~HPTE_V_VALID;
 337                        unlock_rmap(rmap);
 338                } else {
 339                        kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
 340                                                realmode);
 341                        /* Only set R/C in real HPTE if already set in *rmap */
 342                        rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
 343                        ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
 344                }
 345        }
 346
 347        hpte[1] = ptel;
 348
 349        /* Write the first HPTE dword, unlocking the HPTE and making it valid */
 350        eieio();
 351        hpte[0] = pteh;
 352        asm volatile("ptesync" : : : "memory");
 353
 354        *pte_idx_ret = pte_index;
 355        return H_SUCCESS;
 356}
 357EXPORT_SYMBOL_GPL(kvmppc_do_h_enter);
 358
 359long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 360                    long pte_index, unsigned long pteh, unsigned long ptel)
 361{
 362        return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel,
 363                                 vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]);
 364}
 365
 366#define LOCK_TOKEN      (*(u32 *)(&get_paca()->lock_token))
 367
 368static inline int try_lock_tlbie(unsigned int *lock)
 369{
 370        unsigned int tmp, old;
 371        unsigned int token = LOCK_TOKEN;
 372
 373        asm volatile("1:lwarx   %1,0,%2\n"
 374                     "  cmpwi   cr0,%1,0\n"
 375                     "  bne     2f\n"
 376                     "  stwcx.  %3,0,%2\n"
 377                     "  bne-    1b\n"
 378                     "  isync\n"
 379                     "2:"
 380                     : "=&r" (tmp), "=&r" (old)
 381                     : "r" (lock), "r" (token)
 382                     : "cc", "memory");
 383        return old == 0;
 384}
 385
 386long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
 387                        unsigned long pte_index, unsigned long avpn,
 388                        unsigned long *hpret)
 389{
 390        unsigned long *hpte;
 391        unsigned long v, r, rb;
 392        struct revmap_entry *rev;
 393
 394        if (pte_index >= kvm->arch.hpt_npte)
 395                return H_PARAMETER;
 396        hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
 397        while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 398                cpu_relax();
 399        if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
 400            ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) ||
 401            ((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) {
 402                hpte[0] &= ~HPTE_V_HVLOCK;
 403                return H_NOT_FOUND;
 404        }
 405
 406        rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
 407        v = hpte[0] & ~HPTE_V_HVLOCK;
 408        if (v & HPTE_V_VALID) {
 409                hpte[0] &= ~HPTE_V_VALID;
 410                rb = compute_tlbie_rb(v, hpte[1], pte_index);
 411                if (global_invalidates(kvm, flags)) {
 412                        while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
 413                                cpu_relax();
 414                        asm volatile("ptesync" : : : "memory");
 415                        asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
 416                                     : : "r" (rb), "r" (kvm->arch.lpid));
 417                        asm volatile("ptesync" : : : "memory");
 418                        kvm->arch.tlbie_lock = 0;
 419                } else {
 420                        asm volatile("ptesync" : : : "memory");
 421                        asm volatile("tlbiel %0" : : "r" (rb));
 422                        asm volatile("ptesync" : : : "memory");
 423                }
 424                /* Read PTE low word after tlbie to get final R/C values */
 425                remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
 426        }
 427        r = rev->guest_rpte & ~HPTE_GR_RESERVED;
 428        note_hpte_modification(kvm, rev);
 429        unlock_hpte(hpte, 0);
 430
 431        hpret[0] = v;
 432        hpret[1] = r;
 433        return H_SUCCESS;
 434}
 435EXPORT_SYMBOL_GPL(kvmppc_do_h_remove);
 436
 437long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
 438                     unsigned long pte_index, unsigned long avpn)
 439{
 440        return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
 441                                  &vcpu->arch.gpr[4]);
 442}
 443
 444long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 445{
 446        struct kvm *kvm = vcpu->kvm;
 447        unsigned long *args = &vcpu->arch.gpr[4];
 448        unsigned long *hp, *hptes[4], tlbrb[4];
 449        long int i, j, k, n, found, indexes[4];
 450        unsigned long flags, req, pte_index, rcbits;
 451        long int local = 0;
 452        long int ret = H_SUCCESS;
 453        struct revmap_entry *rev, *revs[4];
 454
 455        if (atomic_read(&kvm->online_vcpus) == 1)
 456                local = 1;
 457        for (i = 0; i < 4 && ret == H_SUCCESS; ) {
 458                n = 0;
 459                for (; i < 4; ++i) {
 460                        j = i * 2;
 461                        pte_index = args[j];
 462                        flags = pte_index >> 56;
 463                        pte_index &= ((1ul << 56) - 1);
 464                        req = flags >> 6;
 465                        flags &= 3;
 466                        if (req == 3) {         /* no more requests */
 467                                i = 4;
 468                                break;
 469                        }
 470                        if (req != 1 || flags == 3 ||
 471                            pte_index >= kvm->arch.hpt_npte) {
 472                                /* parameter error */
 473                                args[j] = ((0xa0 | flags) << 56) + pte_index;
 474                                ret = H_PARAMETER;
 475                                break;
 476                        }
 477                        hp = (unsigned long *)
 478                                (kvm->arch.hpt_virt + (pte_index << 4));
 479                        /* to avoid deadlock, don't spin except for first */
 480                        if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
 481                                if (n)
 482                                        break;
 483                                while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
 484                                        cpu_relax();
 485                        }
 486                        found = 0;
 487                        if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) {
 488                                switch (flags & 3) {
 489                                case 0:         /* absolute */
 490                                        found = 1;
 491                                        break;
 492                                case 1:         /* andcond */
 493                                        if (!(hp[0] & args[j + 1]))
 494                                                found = 1;
 495                                        break;
 496                                case 2:         /* AVPN */
 497                                        if ((hp[0] & ~0x7fUL) == args[j + 1])
 498                                                found = 1;
 499                                        break;
 500                                }
 501                        }
 502                        if (!found) {
 503                                hp[0] &= ~HPTE_V_HVLOCK;
 504                                args[j] = ((0x90 | flags) << 56) + pte_index;
 505                                continue;
 506                        }
 507
 508                        args[j] = ((0x80 | flags) << 56) + pte_index;
 509                        rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
 510                        note_hpte_modification(kvm, rev);
 511
 512                        if (!(hp[0] & HPTE_V_VALID)) {
 513                                /* insert R and C bits from PTE */
 514                                rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
 515                                args[j] |= rcbits << (56 - 5);
 516                                hp[0] = 0;
 517                                continue;
 518                        }
 519
 520                        hp[0] &= ~HPTE_V_VALID;         /* leave it locked */
 521                        tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index);
 522                        indexes[n] = j;
 523                        hptes[n] = hp;
 524                        revs[n] = rev;
 525                        ++n;
 526                }
 527
 528                if (!n)
 529                        break;
 530
 531                /* Now that we've collected a batch, do the tlbies */
 532                if (!local) {
 533                        while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
 534                                cpu_relax();
 535                        asm volatile("ptesync" : : : "memory");
 536                        for (k = 0; k < n; ++k)
 537                                asm volatile(PPC_TLBIE(%1,%0) : :
 538                                             "r" (tlbrb[k]),
 539                                             "r" (kvm->arch.lpid));
 540                        asm volatile("eieio; tlbsync; ptesync" : : : "memory");
 541                        kvm->arch.tlbie_lock = 0;
 542                } else {
 543                        asm volatile("ptesync" : : : "memory");
 544                        for (k = 0; k < n; ++k)
 545                                asm volatile("tlbiel %0" : : "r" (tlbrb[k]));
 546                        asm volatile("ptesync" : : : "memory");
 547                }
 548
 549                /* Read PTE low words after tlbie to get final R/C values */
 550                for (k = 0; k < n; ++k) {
 551                        j = indexes[k];
 552                        pte_index = args[j] & ((1ul << 56) - 1);
 553                        hp = hptes[k];
 554                        rev = revs[k];
 555                        remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]);
 556                        rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
 557                        args[j] |= rcbits << (56 - 5);
 558                        hp[0] = 0;
 559                }
 560        }
 561
 562        return ret;
 563}
 564
 565long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 566                      unsigned long pte_index, unsigned long avpn,
 567                      unsigned long va)
 568{
 569        struct kvm *kvm = vcpu->kvm;
 570        unsigned long *hpte;
 571        struct revmap_entry *rev;
 572        unsigned long v, r, rb, mask, bits;
 573
 574        if (pte_index >= kvm->arch.hpt_npte)
 575                return H_PARAMETER;
 576
 577        hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
 578        while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 579                cpu_relax();
 580        if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
 581            ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) {
 582                hpte[0] &= ~HPTE_V_HVLOCK;
 583                return H_NOT_FOUND;
 584        }
 585
 586        v = hpte[0];
 587        bits = (flags << 55) & HPTE_R_PP0;
 588        bits |= (flags << 48) & HPTE_R_KEY_HI;
 589        bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
 590
 591        /* Update guest view of 2nd HPTE dword */
 592        mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
 593                HPTE_R_KEY_HI | HPTE_R_KEY_LO;
 594        rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
 595        if (rev) {
 596                r = (rev->guest_rpte & ~mask) | bits;
 597                rev->guest_rpte = r;
 598                note_hpte_modification(kvm, rev);
 599        }
 600        r = (hpte[1] & ~mask) | bits;
 601
 602        /* Update HPTE */
 603        if (v & HPTE_V_VALID) {
 604                rb = compute_tlbie_rb(v, r, pte_index);
 605                hpte[0] = v & ~HPTE_V_VALID;
 606                if (global_invalidates(kvm, flags)) {
 607                        while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
 608                                cpu_relax();
 609                        asm volatile("ptesync" : : : "memory");
 610                        asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
 611                                     : : "r" (rb), "r" (kvm->arch.lpid));
 612                        asm volatile("ptesync" : : : "memory");
 613                        kvm->arch.tlbie_lock = 0;
 614                } else {
 615                        asm volatile("ptesync" : : : "memory");
 616                        asm volatile("tlbiel %0" : : "r" (rb));
 617                        asm volatile("ptesync" : : : "memory");
 618                }
 619                /*
 620                 * If the host has this page as readonly but the guest
 621                 * wants to make it read/write, reduce the permissions.
 622                 * Checking the host permissions involves finding the
 623                 * memslot and then the Linux PTE for the page.
 624                 */
 625                if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) {
 626                        unsigned long psize, gfn, hva;
 627                        struct kvm_memory_slot *memslot;
 628                        pgd_t *pgdir = vcpu->arch.pgdir;
 629                        pte_t pte;
 630
 631                        psize = hpte_page_size(v, r);
 632                        gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
 633                        memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
 634                        if (memslot) {
 635                                hva = __gfn_to_hva_memslot(memslot, gfn);
 636                                pte = lookup_linux_pte(pgdir, hva, 1, &psize);
 637                                if (pte_present(pte) && !pte_write(pte))
 638                                        r = hpte_make_readonly(r);
 639                        }
 640                }
 641        }
 642        hpte[1] = r;
 643        eieio();
 644        hpte[0] = v & ~HPTE_V_HVLOCK;
 645        asm volatile("ptesync" : : : "memory");
 646        return H_SUCCESS;
 647}
 648
 649long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
 650                   unsigned long pte_index)
 651{
 652        struct kvm *kvm = vcpu->kvm;
 653        unsigned long *hpte, v, r;
 654        int i, n = 1;
 655        struct revmap_entry *rev = NULL;
 656
 657        if (pte_index >= kvm->arch.hpt_npte)
 658                return H_PARAMETER;
 659        if (flags & H_READ_4) {
 660                pte_index &= ~3;
 661                n = 4;
 662        }
 663        rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
 664        for (i = 0; i < n; ++i, ++pte_index) {
 665                hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
 666                v = hpte[0] & ~HPTE_V_HVLOCK;
 667                r = hpte[1];
 668                if (v & HPTE_V_ABSENT) {
 669                        v &= ~HPTE_V_ABSENT;
 670                        v |= HPTE_V_VALID;
 671                }
 672                if (v & HPTE_V_VALID) {
 673                        r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
 674                        r &= ~HPTE_GR_RESERVED;
 675                }
 676                vcpu->arch.gpr[4 + i * 2] = v;
 677                vcpu->arch.gpr[5 + i * 2] = r;
 678        }
 679        return H_SUCCESS;
 680}
 681
 682void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
 683                        unsigned long pte_index)
 684{
 685        unsigned long rb;
 686
 687        hptep[0] &= ~HPTE_V_VALID;
 688        rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
 689        while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
 690                cpu_relax();
 691        asm volatile("ptesync" : : : "memory");
 692        asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
 693                     : : "r" (rb), "r" (kvm->arch.lpid));
 694        asm volatile("ptesync" : : : "memory");
 695        kvm->arch.tlbie_lock = 0;
 696}
 697EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
 698
 699void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
 700                           unsigned long pte_index)
 701{
 702        unsigned long rb;
 703        unsigned char rbyte;
 704
 705        rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
 706        rbyte = (hptep[1] & ~HPTE_R_R) >> 8;
 707        /* modify only the second-last byte, which contains the ref bit */
 708        *((char *)hptep + 14) = rbyte;
 709        while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
 710                cpu_relax();
 711        asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
 712                     : : "r" (rb), "r" (kvm->arch.lpid));
 713        asm volatile("ptesync" : : : "memory");
 714        kvm->arch.tlbie_lock = 0;
 715}
 716EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
 717
 718static int slb_base_page_shift[4] = {
 719        24,     /* 16M */
 720        16,     /* 64k */
 721        34,     /* 16G */
 722        20,     /* 1M, unsupported */
 723};
 724
 725long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
 726                              unsigned long valid)
 727{
 728        unsigned int i;
 729        unsigned int pshift;
 730        unsigned long somask;
 731        unsigned long vsid, hash;
 732        unsigned long avpn;
 733        unsigned long *hpte;
 734        unsigned long mask, val;
 735        unsigned long v, r;
 736
 737        /* Get page shift, work out hash and AVPN etc. */
 738        mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY;
 739        val = 0;
 740        pshift = 12;
 741        if (slb_v & SLB_VSID_L) {
 742                mask |= HPTE_V_LARGE;
 743                val |= HPTE_V_LARGE;
 744                pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4];
 745        }
 746        if (slb_v & SLB_VSID_B_1T) {
 747                somask = (1UL << 40) - 1;
 748                vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T;
 749                vsid ^= vsid << 25;
 750        } else {
 751                somask = (1UL << 28) - 1;
 752                vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
 753        }
 754        hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvm->arch.hpt_mask;
 755        avpn = slb_v & ~(somask >> 16); /* also includes B */
 756        avpn |= (eaddr & somask) >> 16;
 757
 758        if (pshift >= 24)
 759                avpn &= ~((1UL << (pshift - 16)) - 1);
 760        else
 761                avpn &= ~0x7fUL;
 762        val |= avpn;
 763
 764        for (;;) {
 765                hpte = (unsigned long *)(kvm->arch.hpt_virt + (hash << 7));
 766
 767                for (i = 0; i < 16; i += 2) {
 768                        /* Read the PTE racily */
 769                        v = hpte[i] & ~HPTE_V_HVLOCK;
 770
 771                        /* Check valid/absent, hash, segment size and AVPN */
 772                        if (!(v & valid) || (v & mask) != val)
 773                                continue;
 774
 775                        /* Lock the PTE and read it under the lock */
 776                        while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK))
 777                                cpu_relax();
 778                        v = hpte[i] & ~HPTE_V_HVLOCK;
 779                        r = hpte[i+1];
 780
 781                        /*
 782                         * Check the HPTE again, including large page size
 783                         * Since we don't currently allow any MPSS (mixed
 784                         * page-size segment) page sizes, it is sufficient
 785                         * to check against the actual page size.
 786                         */
 787                        if ((v & valid) && (v & mask) == val &&
 788                            hpte_page_size(v, r) == (1ul << pshift))
 789                                /* Return with the HPTE still locked */
 790                                return (hash << 3) + (i >> 1);
 791
 792                        /* Unlock and move on */
 793                        hpte[i] = v;
 794                }
 795
 796                if (val & HPTE_V_SECONDARY)
 797                        break;
 798                val |= HPTE_V_SECONDARY;
 799                hash = hash ^ kvm->arch.hpt_mask;
 800        }
 801        return -1;
 802}
 803EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte);
 804
 805/*
 806 * Called in real mode to check whether an HPTE not found fault
 807 * is due to accessing a paged-out page or an emulated MMIO page,
 808 * or if a protection fault is due to accessing a page that the
 809 * guest wanted read/write access to but which we made read-only.
 810 * Returns a possibly modified status (DSISR) value if not
 811 * (i.e. pass the interrupt to the guest),
 812 * -1 to pass the fault up to host kernel mode code, -2 to do that
 813 * and also load the instruction word (for MMIO emulation),
 814 * or 0 if we should make the guest retry the access.
 815 */
 816long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
 817                          unsigned long slb_v, unsigned int status, bool data)
 818{
 819        struct kvm *kvm = vcpu->kvm;
 820        long int index;
 821        unsigned long v, r, gr;
 822        unsigned long *hpte;
 823        unsigned long valid;
 824        struct revmap_entry *rev;
 825        unsigned long pp, key;
 826
 827        /* For protection fault, expect to find a valid HPTE */
 828        valid = HPTE_V_VALID;
 829        if (status & DSISR_NOHPTE)
 830                valid |= HPTE_V_ABSENT;
 831
 832        index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
 833        if (index < 0) {
 834                if (status & DSISR_NOHPTE)
 835                        return status;  /* there really was no HPTE */
 836                return 0;               /* for prot fault, HPTE disappeared */
 837        }
 838        hpte = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
 839        v = hpte[0] & ~HPTE_V_HVLOCK;
 840        r = hpte[1];
 841        rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
 842        gr = rev->guest_rpte;
 843
 844        unlock_hpte(hpte, v);
 845
 846        /* For not found, if the HPTE is valid by now, retry the instruction */
 847        if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
 848                return 0;
 849
 850        /* Check access permissions to the page */
 851        pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
 852        key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
 853        status &= ~DSISR_NOHPTE;        /* DSISR_NOHPTE == SRR1_ISI_NOPT */
 854        if (!data) {
 855                if (gr & (HPTE_R_N | HPTE_R_G))
 856                        return status | SRR1_ISI_N_OR_G;
 857                if (!hpte_read_permission(pp, slb_v & key))
 858                        return status | SRR1_ISI_PROT;
 859        } else if (status & DSISR_ISSTORE) {
 860                /* check write permission */
 861                if (!hpte_write_permission(pp, slb_v & key))
 862                        return status | DSISR_PROTFAULT;
 863        } else {
 864                if (!hpte_read_permission(pp, slb_v & key))
 865                        return status | DSISR_PROTFAULT;
 866        }
 867
 868        /* Check storage key, if applicable */
 869        if (data && (vcpu->arch.shregs.msr & MSR_DR)) {
 870                unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr);
 871                if (status & DSISR_ISSTORE)
 872                        perm >>= 1;
 873                if (perm & 1)
 874                        return status | DSISR_KEYFAULT;
 875        }
 876
 877        /* Save HPTE info for virtual-mode handler */
 878        vcpu->arch.pgfault_addr = addr;
 879        vcpu->arch.pgfault_index = index;
 880        vcpu->arch.pgfault_hpte[0] = v;
 881        vcpu->arch.pgfault_hpte[1] = r;
 882
 883        /* Check the storage key to see if it is possibly emulated MMIO */
 884        if (data && (vcpu->arch.shregs.msr & MSR_IR) &&
 885            (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
 886            (HPTE_R_KEY_HI | HPTE_R_KEY_LO))
 887                return -2;      /* MMIO emulation - load instr word */
 888
 889        return -1;              /* send fault up to host kernel mode */
 890}
 891