linux/arch/powerpc/kvm/book3s_hv_rm_mmu.c
<<
>>
Prefs
   1/*
   2 * This program is free software; you can redistribute it and/or modify
   3 * it under the terms of the GNU General Public License, version 2, as
   4 * published by the Free Software Foundation.
   5 *
   6 * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
   7 */
   8
   9#include <linux/types.h>
  10#include <linux/string.h>
  11#include <linux/kvm.h>
  12#include <linux/kvm_host.h>
  13#include <linux/hugetlb.h>
  14#include <linux/module.h>
  15
  16#include <asm/tlbflush.h>
  17#include <asm/kvm_ppc.h>
  18#include <asm/kvm_book3s.h>
  19#include <asm/mmu-hash64.h>
  20#include <asm/hvcall.h>
  21#include <asm/synch.h>
  22#include <asm/ppc-opcode.h>
  23
  24/* Translate address of a vmalloc'd thing to a linear map address */
  25static void *real_vmalloc_addr(void *x)
  26{
  27        unsigned long addr = (unsigned long) x;
  28        pte_t *p;
  29        /*
  30         * assume we don't have huge pages in vmalloc space...
  31         * So don't worry about THP collapse/split. Called
  32         * Only in realmode, hence won't need irq_save/restore.
  33         */
  34        p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL);
  35        if (!p || !pte_present(*p))
  36                return NULL;
  37        addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
  38        return __va(addr);
  39}
  40
  41/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
  42static int global_invalidates(struct kvm *kvm, unsigned long flags)
  43{
  44        int global;
  45
  46        /*
  47         * If there is only one vcore, and it's currently running,
  48         * as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
  49         * we can use tlbiel as long as we mark all other physical
  50         * cores as potentially having stale TLB entries for this lpid.
  51         * Otherwise, don't use tlbiel.
  52         */
  53        if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
  54                global = 0;
  55        else
  56                global = 1;
  57
  58        if (!global) {
  59                /* any other core might now have stale TLB entries... */
  60                smp_wmb();
  61                cpumask_setall(&kvm->arch.need_tlb_flush);
  62                cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu,
  63                                  &kvm->arch.need_tlb_flush);
  64        }
  65
  66        return global;
  67}
  68
  69/*
  70 * Add this HPTE into the chain for the real page.
  71 * Must be called with the chain locked; it unlocks the chain.
  72 */
  73void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
  74                             unsigned long *rmap, long pte_index, int realmode)
  75{
  76        struct revmap_entry *head, *tail;
  77        unsigned long i;
  78
  79        if (*rmap & KVMPPC_RMAP_PRESENT) {
  80                i = *rmap & KVMPPC_RMAP_INDEX;
  81                head = &kvm->arch.revmap[i];
  82                if (realmode)
  83                        head = real_vmalloc_addr(head);
  84                tail = &kvm->arch.revmap[head->back];
  85                if (realmode)
  86                        tail = real_vmalloc_addr(tail);
  87                rev->forw = i;
  88                rev->back = head->back;
  89                tail->forw = pte_index;
  90                head->back = pte_index;
  91        } else {
  92                rev->forw = rev->back = pte_index;
  93                *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
  94                        pte_index | KVMPPC_RMAP_PRESENT;
  95        }
  96        unlock_rmap(rmap);
  97}
  98EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
  99
 100/* Remove this HPTE from the chain for a real page */
 101static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 102                                struct revmap_entry *rev,
 103                                unsigned long hpte_v, unsigned long hpte_r)
 104{
 105        struct revmap_entry *next, *prev;
 106        unsigned long gfn, ptel, head;
 107        struct kvm_memory_slot *memslot;
 108        unsigned long *rmap;
 109        unsigned long rcbits;
 110
 111        rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
 112        ptel = rev->guest_rpte |= rcbits;
 113        gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
 114        memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
 115        if (!memslot)
 116                return;
 117
 118        rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
 119        lock_rmap(rmap);
 120
 121        head = *rmap & KVMPPC_RMAP_INDEX;
 122        next = real_vmalloc_addr(&kvm->arch.revmap[rev->forw]);
 123        prev = real_vmalloc_addr(&kvm->arch.revmap[rev->back]);
 124        next->back = rev->back;
 125        prev->forw = rev->forw;
 126        if (head == pte_index) {
 127                head = rev->forw;
 128                if (head == pte_index)
 129                        *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
 130                else
 131                        *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
 132        }
 133        *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
 134        unlock_rmap(rmap);
 135}
 136
 137long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 138                       long pte_index, unsigned long pteh, unsigned long ptel,
 139                       pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
 140{
 141        unsigned long i, pa, gpa, gfn, psize;
 142        unsigned long slot_fn, hva;
 143        __be64 *hpte;
 144        struct revmap_entry *rev;
 145        unsigned long g_ptel;
 146        struct kvm_memory_slot *memslot;
 147        unsigned hpage_shift;
 148        unsigned long is_io;
 149        unsigned long *rmap;
 150        pte_t *ptep;
 151        unsigned int writing;
 152        unsigned long mmu_seq;
 153        unsigned long rcbits, irq_flags = 0;
 154
 155        psize = hpte_page_size(pteh, ptel);
 156        if (!psize)
 157                return H_PARAMETER;
 158        writing = hpte_is_writable(ptel);
 159        pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
 160        ptel &= ~HPTE_GR_RESERVED;
 161        g_ptel = ptel;
 162
 163        /* used later to detect if we might have been invalidated */
 164        mmu_seq = kvm->mmu_notifier_seq;
 165        smp_rmb();
 166
 167        /* Find the memslot (if any) for this address */
 168        gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
 169        gfn = gpa >> PAGE_SHIFT;
 170        memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
 171        pa = 0;
 172        is_io = ~0ul;
 173        rmap = NULL;
 174        if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
 175                /* Emulated MMIO - mark this with key=31 */
 176                pteh |= HPTE_V_ABSENT;
 177                ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
 178                goto do_insert;
 179        }
 180
 181        /* Check if the requested page fits entirely in the memslot. */
 182        if (!slot_is_aligned(memslot, psize))
 183                return H_PARAMETER;
 184        slot_fn = gfn - memslot->base_gfn;
 185        rmap = &memslot->arch.rmap[slot_fn];
 186
 187        /* Translate to host virtual address */
 188        hva = __gfn_to_hva_memslot(memslot, gfn);
 189        /*
 190         * If we had a page table table change after lookup, we would
 191         * retry via mmu_notifier_retry.
 192         */
 193        if (realmode)
 194                ptep = __find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift);
 195        else {
 196                local_irq_save(irq_flags);
 197                ptep = find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift);
 198        }
 199        if (ptep) {
 200                pte_t pte;
 201                unsigned int host_pte_size;
 202
 203                if (hpage_shift)
 204                        host_pte_size = 1ul << hpage_shift;
 205                else
 206                        host_pte_size = PAGE_SIZE;
 207                /*
 208                 * We should always find the guest page size
 209                 * to <= host page size, if host is using hugepage
 210                 */
 211                if (host_pte_size < psize) {
 212                        if (!realmode)
 213                                local_irq_restore(flags);
 214                        return H_PARAMETER;
 215                }
 216                pte = kvmppc_read_update_linux_pte(ptep, writing);
 217                if (pte_present(pte) && !pte_protnone(pte)) {
 218                        if (writing && !pte_write(pte))
 219                                /* make the actual HPTE be read-only */
 220                                ptel = hpte_make_readonly(ptel);
 221                        is_io = hpte_cache_bits(pte_val(pte));
 222                        pa = pte_pfn(pte) << PAGE_SHIFT;
 223                        pa |= hva & (host_pte_size - 1);
 224                        pa |= gpa & ~PAGE_MASK;
 225                }
 226        }
 227        if (!realmode)
 228                local_irq_restore(irq_flags);
 229
 230        ptel &= ~(HPTE_R_PP0 - psize);
 231        ptel |= pa;
 232
 233        if (pa)
 234                pteh |= HPTE_V_VALID;
 235        else
 236                pteh |= HPTE_V_ABSENT;
 237
 238        /* Check WIMG */
 239        if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) {
 240                if (is_io)
 241                        return H_PARAMETER;
 242                /*
 243                 * Allow guest to map emulated device memory as
 244                 * uncacheable, but actually make it cacheable.
 245                 */
 246                ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G);
 247                ptel |= HPTE_R_M;
 248        }
 249
 250        /* Find and lock the HPTEG slot to use */
 251 do_insert:
 252        if (pte_index >= kvm->arch.hpt_npte)
 253                return H_PARAMETER;
 254        if (likely((flags & H_EXACT) == 0)) {
 255                pte_index &= ~7UL;
 256                hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
 257                for (i = 0; i < 8; ++i) {
 258                        if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 &&
 259                            try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
 260                                          HPTE_V_ABSENT))
 261                                break;
 262                        hpte += 2;
 263                }
 264                if (i == 8) {
 265                        /*
 266                         * Since try_lock_hpte doesn't retry (not even stdcx.
 267                         * failures), it could be that there is a free slot
 268                         * but we transiently failed to lock it.  Try again,
 269                         * actually locking each slot and checking it.
 270                         */
 271                        hpte -= 16;
 272                        for (i = 0; i < 8; ++i) {
 273                                u64 pte;
 274                                while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 275                                        cpu_relax();
 276                                pte = be64_to_cpu(hpte[0]);
 277                                if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT)))
 278                                        break;
 279                                __unlock_hpte(hpte, pte);
 280                                hpte += 2;
 281                        }
 282                        if (i == 8)
 283                                return H_PTEG_FULL;
 284                }
 285                pte_index += i;
 286        } else {
 287                hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
 288                if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
 289                                   HPTE_V_ABSENT)) {
 290                        /* Lock the slot and check again */
 291                        u64 pte;
 292
 293                        while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 294                                cpu_relax();
 295                        pte = be64_to_cpu(hpte[0]);
 296                        if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
 297                                __unlock_hpte(hpte, pte);
 298                                return H_PTEG_FULL;
 299                        }
 300                }
 301        }
 302
 303        /* Save away the guest's idea of the second HPTE dword */
 304        rev = &kvm->arch.revmap[pte_index];
 305        if (realmode)
 306                rev = real_vmalloc_addr(rev);
 307        if (rev) {
 308                rev->guest_rpte = g_ptel;
 309                note_hpte_modification(kvm, rev);
 310        }
 311
 312        /* Link HPTE into reverse-map chain */
 313        if (pteh & HPTE_V_VALID) {
 314                if (realmode)
 315                        rmap = real_vmalloc_addr(rmap);
 316                lock_rmap(rmap);
 317                /* Check for pending invalidations under the rmap chain lock */
 318                if (mmu_notifier_retry(kvm, mmu_seq)) {
 319                        /* inval in progress, write a non-present HPTE */
 320                        pteh |= HPTE_V_ABSENT;
 321                        pteh &= ~HPTE_V_VALID;
 322                        unlock_rmap(rmap);
 323                } else {
 324                        kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
 325                                                realmode);
 326                        /* Only set R/C in real HPTE if already set in *rmap */
 327                        rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
 328                        ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
 329                }
 330        }
 331
 332        hpte[1] = cpu_to_be64(ptel);
 333
 334        /* Write the first HPTE dword, unlocking the HPTE and making it valid */
 335        eieio();
 336        __unlock_hpte(hpte, pteh);
 337        asm volatile("ptesync" : : : "memory");
 338
 339        *pte_idx_ret = pte_index;
 340        return H_SUCCESS;
 341}
 342EXPORT_SYMBOL_GPL(kvmppc_do_h_enter);
 343
 344long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 345                    long pte_index, unsigned long pteh, unsigned long ptel)
 346{
 347        return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel,
 348                                 vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]);
 349}
 350
 351#ifdef __BIG_ENDIAN__
 352#define LOCK_TOKEN      (*(u32 *)(&get_paca()->lock_token))
 353#else
 354#define LOCK_TOKEN      (*(u32 *)(&get_paca()->paca_index))
 355#endif
 356
 357static inline int try_lock_tlbie(unsigned int *lock)
 358{
 359        unsigned int tmp, old;
 360        unsigned int token = LOCK_TOKEN;
 361
 362        asm volatile("1:lwarx   %1,0,%2\n"
 363                     "  cmpwi   cr0,%1,0\n"
 364                     "  bne     2f\n"
 365                     "  stwcx.  %3,0,%2\n"
 366                     "  bne-    1b\n"
 367                     "  isync\n"
 368                     "2:"
 369                     : "=&r" (tmp), "=&r" (old)
 370                     : "r" (lock), "r" (token)
 371                     : "cc", "memory");
 372        return old == 0;
 373}
 374
 375static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
 376                      long npages, int global, bool need_sync)
 377{
 378        long i;
 379
 380        if (global) {
 381                while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
 382                        cpu_relax();
 383                if (need_sync)
 384                        asm volatile("ptesync" : : : "memory");
 385                for (i = 0; i < npages; ++i)
 386                        asm volatile(PPC_TLBIE(%1,%0) : :
 387                                     "r" (rbvalues[i]), "r" (kvm->arch.lpid));
 388                asm volatile("eieio; tlbsync; ptesync" : : : "memory");
 389                kvm->arch.tlbie_lock = 0;
 390        } else {
 391                if (need_sync)
 392                        asm volatile("ptesync" : : : "memory");
 393                for (i = 0; i < npages; ++i)
 394                        asm volatile("tlbiel %0" : : "r" (rbvalues[i]));
 395                asm volatile("ptesync" : : : "memory");
 396        }
 397}
 398
 399long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
 400                        unsigned long pte_index, unsigned long avpn,
 401                        unsigned long *hpret)
 402{
 403        __be64 *hpte;
 404        unsigned long v, r, rb;
 405        struct revmap_entry *rev;
 406        u64 pte;
 407
 408        if (pte_index >= kvm->arch.hpt_npte)
 409                return H_PARAMETER;
 410        hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
 411        while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 412                cpu_relax();
 413        pte = be64_to_cpu(hpte[0]);
 414        if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
 415            ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) ||
 416            ((flags & H_ANDCOND) && (pte & avpn) != 0)) {
 417                __unlock_hpte(hpte, pte);
 418                return H_NOT_FOUND;
 419        }
 420
 421        rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
 422        v = pte & ~HPTE_V_HVLOCK;
 423        if (v & HPTE_V_VALID) {
 424                u64 pte1;
 425
 426                pte1 = be64_to_cpu(hpte[1]);
 427                hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
 428                rb = compute_tlbie_rb(v, pte1, pte_index);
 429                do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
 430                /* Read PTE low word after tlbie to get final R/C values */
 431                remove_revmap_chain(kvm, pte_index, rev, v, pte1);
 432        }
 433        r = rev->guest_rpte & ~HPTE_GR_RESERVED;
 434        note_hpte_modification(kvm, rev);
 435        unlock_hpte(hpte, 0);
 436
 437        hpret[0] = v;
 438        hpret[1] = r;
 439        return H_SUCCESS;
 440}
 441EXPORT_SYMBOL_GPL(kvmppc_do_h_remove);
 442
 443long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
 444                     unsigned long pte_index, unsigned long avpn)
 445{
 446        return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
 447                                  &vcpu->arch.gpr[4]);
 448}
 449
 450long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 451{
 452        struct kvm *kvm = vcpu->kvm;
 453        unsigned long *args = &vcpu->arch.gpr[4];
 454        __be64 *hp, *hptes[4];
 455        unsigned long tlbrb[4];
 456        long int i, j, k, n, found, indexes[4];
 457        unsigned long flags, req, pte_index, rcbits;
 458        int global;
 459        long int ret = H_SUCCESS;
 460        struct revmap_entry *rev, *revs[4];
 461        u64 hp0;
 462
 463        global = global_invalidates(kvm, 0);
 464        for (i = 0; i < 4 && ret == H_SUCCESS; ) {
 465                n = 0;
 466                for (; i < 4; ++i) {
 467                        j = i * 2;
 468                        pte_index = args[j];
 469                        flags = pte_index >> 56;
 470                        pte_index &= ((1ul << 56) - 1);
 471                        req = flags >> 6;
 472                        flags &= 3;
 473                        if (req == 3) {         /* no more requests */
 474                                i = 4;
 475                                break;
 476                        }
 477                        if (req != 1 || flags == 3 ||
 478                            pte_index >= kvm->arch.hpt_npte) {
 479                                /* parameter error */
 480                                args[j] = ((0xa0 | flags) << 56) + pte_index;
 481                                ret = H_PARAMETER;
 482                                break;
 483                        }
 484                        hp = (__be64 *) (kvm->arch.hpt_virt + (pte_index << 4));
 485                        /* to avoid deadlock, don't spin except for first */
 486                        if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
 487                                if (n)
 488                                        break;
 489                                while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
 490                                        cpu_relax();
 491                        }
 492                        found = 0;
 493                        hp0 = be64_to_cpu(hp[0]);
 494                        if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) {
 495                                switch (flags & 3) {
 496                                case 0:         /* absolute */
 497                                        found = 1;
 498                                        break;
 499                                case 1:         /* andcond */
 500                                        if (!(hp0 & args[j + 1]))
 501                                                found = 1;
 502                                        break;
 503                                case 2:         /* AVPN */
 504                                        if ((hp0 & ~0x7fUL) == args[j + 1])
 505                                                found = 1;
 506                                        break;
 507                                }
 508                        }
 509                        if (!found) {
 510                                hp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
 511                                args[j] = ((0x90 | flags) << 56) + pte_index;
 512                                continue;
 513                        }
 514
 515                        args[j] = ((0x80 | flags) << 56) + pte_index;
 516                        rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
 517                        note_hpte_modification(kvm, rev);
 518
 519                        if (!(hp0 & HPTE_V_VALID)) {
 520                                /* insert R and C bits from PTE */
 521                                rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
 522                                args[j] |= rcbits << (56 - 5);
 523                                hp[0] = 0;
 524                                continue;
 525                        }
 526
 527                        /* leave it locked */
 528                        hp[0] &= ~cpu_to_be64(HPTE_V_VALID);
 529                        tlbrb[n] = compute_tlbie_rb(be64_to_cpu(hp[0]),
 530                                be64_to_cpu(hp[1]), pte_index);
 531                        indexes[n] = j;
 532                        hptes[n] = hp;
 533                        revs[n] = rev;
 534                        ++n;
 535                }
 536
 537                if (!n)
 538                        break;
 539
 540                /* Now that we've collected a batch, do the tlbies */
 541                do_tlbies(kvm, tlbrb, n, global, true);
 542
 543                /* Read PTE low words after tlbie to get final R/C values */
 544                for (k = 0; k < n; ++k) {
 545                        j = indexes[k];
 546                        pte_index = args[j] & ((1ul << 56) - 1);
 547                        hp = hptes[k];
 548                        rev = revs[k];
 549                        remove_revmap_chain(kvm, pte_index, rev,
 550                                be64_to_cpu(hp[0]), be64_to_cpu(hp[1]));
 551                        rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
 552                        args[j] |= rcbits << (56 - 5);
 553                        __unlock_hpte(hp, 0);
 554                }
 555        }
 556
 557        return ret;
 558}
 559
 560long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 561                      unsigned long pte_index, unsigned long avpn,
 562                      unsigned long va)
 563{
 564        struct kvm *kvm = vcpu->kvm;
 565        __be64 *hpte;
 566        struct revmap_entry *rev;
 567        unsigned long v, r, rb, mask, bits;
 568        u64 pte;
 569
 570        if (pte_index >= kvm->arch.hpt_npte)
 571                return H_PARAMETER;
 572
 573        hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
 574        while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 575                cpu_relax();
 576        pte = be64_to_cpu(hpte[0]);
 577        if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
 578            ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) {
 579                __unlock_hpte(hpte, pte);
 580                return H_NOT_FOUND;
 581        }
 582
 583        v = pte;
 584        bits = (flags << 55) & HPTE_R_PP0;
 585        bits |= (flags << 48) & HPTE_R_KEY_HI;
 586        bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
 587
 588        /* Update guest view of 2nd HPTE dword */
 589        mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
 590                HPTE_R_KEY_HI | HPTE_R_KEY_LO;
 591        rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
 592        if (rev) {
 593                r = (rev->guest_rpte & ~mask) | bits;
 594                rev->guest_rpte = r;
 595                note_hpte_modification(kvm, rev);
 596        }
 597
 598        /* Update HPTE */
 599        if (v & HPTE_V_VALID) {
 600                /*
 601                 * If the page is valid, don't let it transition from
 602                 * readonly to writable.  If it should be writable, we'll
 603                 * take a trap and let the page fault code sort it out.
 604                 */
 605                pte = be64_to_cpu(hpte[1]);
 606                r = (pte & ~mask) | bits;
 607                if (hpte_is_writable(r) && !hpte_is_writable(pte))
 608                        r = hpte_make_readonly(r);
 609                /* If the PTE is changing, invalidate it first */
 610                if (r != pte) {
 611                        rb = compute_tlbie_rb(v, r, pte_index);
 612                        hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) |
 613                                              HPTE_V_ABSENT);
 614                        do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags),
 615                                  true);
 616                        hpte[1] = cpu_to_be64(r);
 617                }
 618        }
 619        unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
 620        asm volatile("ptesync" : : : "memory");
 621        return H_SUCCESS;
 622}
 623
 624long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
 625                   unsigned long pte_index)
 626{
 627        struct kvm *kvm = vcpu->kvm;
 628        __be64 *hpte;
 629        unsigned long v, r;
 630        int i, n = 1;
 631        struct revmap_entry *rev = NULL;
 632
 633        if (pte_index >= kvm->arch.hpt_npte)
 634                return H_PARAMETER;
 635        if (flags & H_READ_4) {
 636                pte_index &= ~3;
 637                n = 4;
 638        }
 639        rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
 640        for (i = 0; i < n; ++i, ++pte_index) {
 641                hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
 642                v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
 643                r = be64_to_cpu(hpte[1]);
 644                if (v & HPTE_V_ABSENT) {
 645                        v &= ~HPTE_V_ABSENT;
 646                        v |= HPTE_V_VALID;
 647                }
 648                if (v & HPTE_V_VALID) {
 649                        r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
 650                        r &= ~HPTE_GR_RESERVED;
 651                }
 652                vcpu->arch.gpr[4 + i * 2] = v;
 653                vcpu->arch.gpr[5 + i * 2] = r;
 654        }
 655        return H_SUCCESS;
 656}
 657
 658void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
 659                        unsigned long pte_index)
 660{
 661        unsigned long rb;
 662
 663        hptep[0] &= ~cpu_to_be64(HPTE_V_VALID);
 664        rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]),
 665                              pte_index);
 666        do_tlbies(kvm, &rb, 1, 1, true);
 667}
 668EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
 669
 670void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
 671                           unsigned long pte_index)
 672{
 673        unsigned long rb;
 674        unsigned char rbyte;
 675
 676        rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]),
 677                              pte_index);
 678        rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8;
 679        /* modify only the second-last byte, which contains the ref bit */
 680        *((char *)hptep + 14) = rbyte;
 681        do_tlbies(kvm, &rb, 1, 1, false);
 682}
 683EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
 684
 685static int slb_base_page_shift[4] = {
 686        24,     /* 16M */
 687        16,     /* 64k */
 688        34,     /* 16G */
 689        20,     /* 1M, unsupported */
 690};
 691
 692/* When called from virtmode, this func should be protected by
 693 * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK
 694 * can trigger deadlock issue.
 695 */
 696long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
 697                              unsigned long valid)
 698{
 699        unsigned int i;
 700        unsigned int pshift;
 701        unsigned long somask;
 702        unsigned long vsid, hash;
 703        unsigned long avpn;
 704        __be64 *hpte;
 705        unsigned long mask, val;
 706        unsigned long v, r;
 707
 708        /* Get page shift, work out hash and AVPN etc. */
 709        mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY;
 710        val = 0;
 711        pshift = 12;
 712        if (slb_v & SLB_VSID_L) {
 713                mask |= HPTE_V_LARGE;
 714                val |= HPTE_V_LARGE;
 715                pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4];
 716        }
 717        if (slb_v & SLB_VSID_B_1T) {
 718                somask = (1UL << 40) - 1;
 719                vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T;
 720                vsid ^= vsid << 25;
 721        } else {
 722                somask = (1UL << 28) - 1;
 723                vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
 724        }
 725        hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvm->arch.hpt_mask;
 726        avpn = slb_v & ~(somask >> 16); /* also includes B */
 727        avpn |= (eaddr & somask) >> 16;
 728
 729        if (pshift >= 24)
 730                avpn &= ~((1UL << (pshift - 16)) - 1);
 731        else
 732                avpn &= ~0x7fUL;
 733        val |= avpn;
 734
 735        for (;;) {
 736                hpte = (__be64 *)(kvm->arch.hpt_virt + (hash << 7));
 737
 738                for (i = 0; i < 16; i += 2) {
 739                        /* Read the PTE racily */
 740                        v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
 741
 742                        /* Check valid/absent, hash, segment size and AVPN */
 743                        if (!(v & valid) || (v & mask) != val)
 744                                continue;
 745
 746                        /* Lock the PTE and read it under the lock */
 747                        while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK))
 748                                cpu_relax();
 749                        v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
 750                        r = be64_to_cpu(hpte[i+1]);
 751
 752                        /*
 753                         * Check the HPTE again, including base page size
 754                         */
 755                        if ((v & valid) && (v & mask) == val &&
 756                            hpte_base_page_size(v, r) == (1ul << pshift))
 757                                /* Return with the HPTE still locked */
 758                                return (hash << 3) + (i >> 1);
 759
 760                        __unlock_hpte(&hpte[i], v);
 761                }
 762
 763                if (val & HPTE_V_SECONDARY)
 764                        break;
 765                val |= HPTE_V_SECONDARY;
 766                hash = hash ^ kvm->arch.hpt_mask;
 767        }
 768        return -1;
 769}
 770EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte);
 771
 772/*
 773 * Called in real mode to check whether an HPTE not found fault
 774 * is due to accessing a paged-out page or an emulated MMIO page,
 775 * or if a protection fault is due to accessing a page that the
 776 * guest wanted read/write access to but which we made read-only.
 777 * Returns a possibly modified status (DSISR) value if not
 778 * (i.e. pass the interrupt to the guest),
 779 * -1 to pass the fault up to host kernel mode code, -2 to do that
 780 * and also load the instruction word (for MMIO emulation),
 781 * or 0 if we should make the guest retry the access.
 782 */
 783long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
 784                          unsigned long slb_v, unsigned int status, bool data)
 785{
 786        struct kvm *kvm = vcpu->kvm;
 787        long int index;
 788        unsigned long v, r, gr;
 789        __be64 *hpte;
 790        unsigned long valid;
 791        struct revmap_entry *rev;
 792        unsigned long pp, key;
 793
 794        /* For protection fault, expect to find a valid HPTE */
 795        valid = HPTE_V_VALID;
 796        if (status & DSISR_NOHPTE)
 797                valid |= HPTE_V_ABSENT;
 798
 799        index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
 800        if (index < 0) {
 801                if (status & DSISR_NOHPTE)
 802                        return status;  /* there really was no HPTE */
 803                return 0;               /* for prot fault, HPTE disappeared */
 804        }
 805        hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
 806        v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
 807        r = be64_to_cpu(hpte[1]);
 808        rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
 809        gr = rev->guest_rpte;
 810
 811        unlock_hpte(hpte, v);
 812
 813        /* For not found, if the HPTE is valid by now, retry the instruction */
 814        if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
 815                return 0;
 816
 817        /* Check access permissions to the page */
 818        pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
 819        key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
 820        status &= ~DSISR_NOHPTE;        /* DSISR_NOHPTE == SRR1_ISI_NOPT */
 821        if (!data) {
 822                if (gr & (HPTE_R_N | HPTE_R_G))
 823                        return status | SRR1_ISI_N_OR_G;
 824                if (!hpte_read_permission(pp, slb_v & key))
 825                        return status | SRR1_ISI_PROT;
 826        } else if (status & DSISR_ISSTORE) {
 827                /* check write permission */
 828                if (!hpte_write_permission(pp, slb_v & key))
 829                        return status | DSISR_PROTFAULT;
 830        } else {
 831                if (!hpte_read_permission(pp, slb_v & key))
 832                        return status | DSISR_PROTFAULT;
 833        }
 834
 835        /* Check storage key, if applicable */
 836        if (data && (vcpu->arch.shregs.msr & MSR_DR)) {
 837                unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr);
 838                if (status & DSISR_ISSTORE)
 839                        perm >>= 1;
 840                if (perm & 1)
 841                        return status | DSISR_KEYFAULT;
 842        }
 843
 844        /* Save HPTE info for virtual-mode handler */
 845        vcpu->arch.pgfault_addr = addr;
 846        vcpu->arch.pgfault_index = index;
 847        vcpu->arch.pgfault_hpte[0] = v;
 848        vcpu->arch.pgfault_hpte[1] = r;
 849
 850        /* Check the storage key to see if it is possibly emulated MMIO */
 851        if (data && (vcpu->arch.shregs.msr & MSR_IR) &&
 852            (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
 853            (HPTE_R_KEY_HI | HPTE_R_KEY_LO))
 854                return -2;      /* MMIO emulation - load instr word */
 855
 856        return -1;              /* send fault up to host kernel mode */
 857}
 858