linux/arch/powerpc/mm/book3s64/hash_native.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * native hashtable management.
   4 *
   5 * SMP scalability work:
   6 *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
   7 */
   8
   9#undef DEBUG_LOW
  10
  11#include <linux/spinlock.h>
  12#include <linux/bitops.h>
  13#include <linux/of.h>
  14#include <linux/processor.h>
  15#include <linux/threads.h>
  16#include <linux/smp.h>
  17
  18#include <asm/machdep.h>
  19#include <asm/mmu.h>
  20#include <asm/mmu_context.h>
  21#include <asm/pgtable.h>
  22#include <asm/trace.h>
  23#include <asm/tlb.h>
  24#include <asm/cputable.h>
  25#include <asm/udbg.h>
  26#include <asm/kexec.h>
  27#include <asm/ppc-opcode.h>
  28#include <asm/feature-fixups.h>
  29
  30#include <misc/cxl-base.h>
  31
  32#ifdef DEBUG_LOW
  33#define DBG_LOW(fmt...) udbg_printf(fmt)
  34#else
  35#define DBG_LOW(fmt...)
  36#endif
  37
  38#ifdef __BIG_ENDIAN__
  39#define HPTE_LOCK_BIT 3
  40#else
  41#define HPTE_LOCK_BIT (56+3)
  42#endif
  43
  44static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
  45
  46static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is)
  47{
  48        unsigned long rb;
  49
  50        rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
  51
  52        asm volatile("tlbiel %0" : : "r" (rb));
  53}
  54
  55/*
  56 * tlbiel instruction for hash, set invalidation
  57 * i.e., r=1 and is=01 or is=10 or is=11
  58 */
  59static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
  60                                        unsigned int pid,
  61                                        unsigned int ric, unsigned int prs)
  62{
  63        unsigned long rb;
  64        unsigned long rs;
  65        unsigned int r = 0; /* hash format */
  66
  67        rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
  68        rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
  69
  70        asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
  71                     : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r)
  72                     : "memory");
  73}
  74
  75
  76static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is)
  77{
  78        unsigned int set;
  79
  80        asm volatile("ptesync": : :"memory");
  81
  82        for (set = 0; set < num_sets; set++)
  83                tlbiel_hash_set_isa206(set, is);
  84
  85        asm volatile("ptesync": : :"memory");
  86}
  87
  88static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
  89{
  90        unsigned int set;
  91
  92        asm volatile("ptesync": : :"memory");
  93
  94        /*
  95         * Flush the first set of the TLB, and any caching of partition table
  96         * entries. Then flush the remaining sets of the TLB. Hash mode uses
  97         * partition scoped TLB translations.
  98         */
  99        tlbiel_hash_set_isa300(0, is, 0, 2, 0);
 100        for (set = 1; set < num_sets; set++)
 101                tlbiel_hash_set_isa300(set, is, 0, 0, 0);
 102
 103        /*
 104         * Now invalidate the process table cache.
 105         *
 106         * From ISA v3.0B p. 1078:
 107         *     The following forms are invalid.
 108         *      * PRS=1, R=0, and RIC!=2 (The only process-scoped
 109         *        HPT caching is of the Process Table.)
 110         */
 111        tlbiel_hash_set_isa300(0, is, 0, 2, 1);
 112
 113        asm volatile("ptesync": : :"memory");
 114
 115        asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
 116}
 117
 118void hash__tlbiel_all(unsigned int action)
 119{
 120        unsigned int is;
 121
 122        switch (action) {
 123        case TLB_INVAL_SCOPE_GLOBAL:
 124                is = 3;
 125                break;
 126        case TLB_INVAL_SCOPE_LPID:
 127                is = 2;
 128                break;
 129        default:
 130                BUG();
 131        }
 132
 133        if (early_cpu_has_feature(CPU_FTR_ARCH_300))
 134                tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is);
 135        else if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
 136                tlbiel_all_isa206(POWER8_TLB_SETS, is);
 137        else if (early_cpu_has_feature(CPU_FTR_ARCH_206))
 138                tlbiel_all_isa206(POWER7_TLB_SETS, is);
 139        else
 140                WARN(1, "%s called on pre-POWER7 CPU\n", __func__);
 141}
 142
 143static inline unsigned long  ___tlbie(unsigned long vpn, int psize,
 144                                                int apsize, int ssize)
 145{
 146        unsigned long va;
 147        unsigned int penc;
 148        unsigned long sllp;
 149
 150        /*
 151         * We need 14 to 65 bits of va for a tlibe of 4K page
 152         * With vpn we ignore the lower VPN_SHIFT bits already.
 153         * And top two bits are already ignored because we can
 154         * only accomodate 76 bits in a 64 bit vpn with a VPN_SHIFT
 155         * of 12.
 156         */
 157        va = vpn << VPN_SHIFT;
 158        /*
 159         * clear top 16 bits of 64bit va, non SLS segment
 160         * Older versions of the architecture (2.02 and earler) require the
 161         * masking of the top 16 bits.
 162         */
 163        if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA))
 164                va &= ~(0xffffULL << 48);
 165
 166        switch (psize) {
 167        case MMU_PAGE_4K:
 168                /* clear out bits after (52) [0....52.....63] */
 169                va &= ~((1ul << (64 - 52)) - 1);
 170                va |= ssize << 8;
 171                sllp = get_sllp_encoding(apsize);
 172                va |= sllp << 5;
 173                asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
 174                             : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
 175                             : "memory");
 176                break;
 177        default:
 178                /* We need 14 to 14 + i bits of va */
 179                penc = mmu_psize_defs[psize].penc[apsize];
 180                va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
 181                va |= penc << 12;
 182                va |= ssize << 8;
 183                /*
 184                 * AVAL bits:
 185                 * We don't need all the bits, but rest of the bits
 186                 * must be ignored by the processor.
 187                 * vpn cover upto 65 bits of va. (0...65) and we need
 188                 * 58..64 bits of va.
 189                 */
 190                va |= (vpn & 0xfe); /* AVAL */
 191                va |= 1; /* L */
 192                asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
 193                             : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
 194                             : "memory");
 195                break;
 196        }
 197        return va;
 198}
 199
 200static inline void fixup_tlbie_vpn(unsigned long vpn, int psize,
 201                                   int apsize, int ssize)
 202{
 203        if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
 204                /* Radix flush for a hash guest */
 205
 206                unsigned long rb,rs,prs,r,ric;
 207
 208                rb = PPC_BIT(52); /* IS = 2 */
 209                rs = 0;  /* lpid = 0 */
 210                prs = 0; /* partition scoped */
 211                r = 1;   /* radix format */
 212                ric = 0; /* RIC_FLSUH_TLB */
 213
 214                /*
 215                 * Need the extra ptesync to make sure we don't
 216                 * re-order the tlbie
 217                 */
 218                asm volatile("ptesync": : :"memory");
 219                asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
 220                             : : "r"(rb), "i"(r), "i"(prs),
 221                               "i"(ric), "r"(rs) : "memory");
 222        }
 223
 224
 225        if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
 226                /* Need the extra ptesync to ensure we don't reorder tlbie*/
 227                asm volatile("ptesync": : :"memory");
 228                ___tlbie(vpn, psize, apsize, ssize);
 229        }
 230}
 231
 232static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
 233{
 234        unsigned long rb;
 235
 236        rb = ___tlbie(vpn, psize, apsize, ssize);
 237        trace_tlbie(0, 0, rb, 0, 0, 0, 0);
 238}
 239
 240static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
 241{
 242        unsigned long va;
 243        unsigned int penc;
 244        unsigned long sllp;
 245
 246        /* VPN_SHIFT can be atmost 12 */
 247        va = vpn << VPN_SHIFT;
 248        /*
 249         * clear top 16 bits of 64 bit va, non SLS segment
 250         * Older versions of the architecture (2.02 and earler) require the
 251         * masking of the top 16 bits.
 252         */
 253        if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA))
 254                va &= ~(0xffffULL << 48);
 255
 256        switch (psize) {
 257        case MMU_PAGE_4K:
 258                /* clear out bits after(52) [0....52.....63] */
 259                va &= ~((1ul << (64 - 52)) - 1);
 260                va |= ssize << 8;
 261                sllp = get_sllp_encoding(apsize);
 262                va |= sllp << 5;
 263                asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,0", %1)
 264                             : : "r" (va), "i" (CPU_FTR_ARCH_206)
 265                             : "memory");
 266                break;
 267        default:
 268                /* We need 14 to 14 + i bits of va */
 269                penc = mmu_psize_defs[psize].penc[apsize];
 270                va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
 271                va |= penc << 12;
 272                va |= ssize << 8;
 273                /*
 274                 * AVAL bits:
 275                 * We don't need all the bits, but rest of the bits
 276                 * must be ignored by the processor.
 277                 * vpn cover upto 65 bits of va. (0...65) and we need
 278                 * 58..64 bits of va.
 279                 */
 280                va |= (vpn & 0xfe);
 281                va |= 1; /* L */
 282                asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,1", %1)
 283                             : : "r" (va), "i" (CPU_FTR_ARCH_206)
 284                             : "memory");
 285                break;
 286        }
 287        trace_tlbie(0, 1, va, 0, 0, 0, 0);
 288
 289}
 290
 291static inline void tlbie(unsigned long vpn, int psize, int apsize,
 292                         int ssize, int local)
 293{
 294        unsigned int use_local;
 295        int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 296
 297        use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && !cxl_ctx_in_use();
 298
 299        if (use_local)
 300                use_local = mmu_psize_defs[psize].tlbiel;
 301        if (lock_tlbie && !use_local)
 302                raw_spin_lock(&native_tlbie_lock);
 303        asm volatile("ptesync": : :"memory");
 304        if (use_local) {
 305                __tlbiel(vpn, psize, apsize, ssize);
 306                asm volatile("ptesync": : :"memory");
 307        } else {
 308                __tlbie(vpn, psize, apsize, ssize);
 309                fixup_tlbie_vpn(vpn, psize, apsize, ssize);
 310                asm volatile("eieio; tlbsync; ptesync": : :"memory");
 311        }
 312        if (lock_tlbie && !use_local)
 313                raw_spin_unlock(&native_tlbie_lock);
 314}
 315
 316static inline void native_lock_hpte(struct hash_pte *hptep)
 317{
 318        unsigned long *word = (unsigned long *)&hptep->v;
 319
 320        while (1) {
 321                if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
 322                        break;
 323                spin_begin();
 324                while(test_bit(HPTE_LOCK_BIT, word))
 325                        spin_cpu_relax();
 326                spin_end();
 327        }
 328}
 329
 330static inline void native_unlock_hpte(struct hash_pte *hptep)
 331{
 332        unsigned long *word = (unsigned long *)&hptep->v;
 333
 334        clear_bit_unlock(HPTE_LOCK_BIT, word);
 335}
 336
 337static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
 338                        unsigned long pa, unsigned long rflags,
 339                        unsigned long vflags, int psize, int apsize, int ssize)
 340{
 341        struct hash_pte *hptep = htab_address + hpte_group;
 342        unsigned long hpte_v, hpte_r;
 343        int i;
 344
 345        if (!(vflags & HPTE_V_BOLTED)) {
 346                DBG_LOW("    insert(group=%lx, vpn=%016lx, pa=%016lx,"
 347                        " rflags=%lx, vflags=%lx, psize=%d)\n",
 348                        hpte_group, vpn, pa, rflags, vflags, psize);
 349        }
 350
 351        for (i = 0; i < HPTES_PER_GROUP; i++) {
 352                if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID)) {
 353                        /* retry with lock held */
 354                        native_lock_hpte(hptep);
 355                        if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID))
 356                                break;
 357                        native_unlock_hpte(hptep);
 358                }
 359
 360                hptep++;
 361        }
 362
 363        if (i == HPTES_PER_GROUP)
 364                return -1;
 365
 366        hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
 367        hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
 368
 369        if (!(vflags & HPTE_V_BOLTED)) {
 370                DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
 371                        i, hpte_v, hpte_r);
 372        }
 373
 374        if (cpu_has_feature(CPU_FTR_ARCH_300)) {
 375                hpte_r = hpte_old_to_new_r(hpte_v, hpte_r);
 376                hpte_v = hpte_old_to_new_v(hpte_v);
 377        }
 378
 379        hptep->r = cpu_to_be64(hpte_r);
 380        /* Guarantee the second dword is visible before the valid bit */
 381        eieio();
 382        /*
 383         * Now set the first dword including the valid bit
 384         * NOTE: this also unlocks the hpte
 385         */
 386        hptep->v = cpu_to_be64(hpte_v);
 387
 388        __asm__ __volatile__ ("ptesync" : : : "memory");
 389
 390        return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
 391}
 392
 393static long native_hpte_remove(unsigned long hpte_group)
 394{
 395        struct hash_pte *hptep;
 396        int i;
 397        int slot_offset;
 398        unsigned long hpte_v;
 399
 400        DBG_LOW("    remove(group=%lx)\n", hpte_group);
 401
 402        /* pick a random entry to start at */
 403        slot_offset = mftb() & 0x7;
 404
 405        for (i = 0; i < HPTES_PER_GROUP; i++) {
 406                hptep = htab_address + hpte_group + slot_offset;
 407                hpte_v = be64_to_cpu(hptep->v);
 408
 409                if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
 410                        /* retry with lock held */
 411                        native_lock_hpte(hptep);
 412                        hpte_v = be64_to_cpu(hptep->v);
 413                        if ((hpte_v & HPTE_V_VALID)
 414                            && !(hpte_v & HPTE_V_BOLTED))
 415                                break;
 416                        native_unlock_hpte(hptep);
 417                }
 418
 419                slot_offset++;
 420                slot_offset &= 0x7;
 421        }
 422
 423        if (i == HPTES_PER_GROUP)
 424                return -1;
 425
 426        /* Invalidate the hpte. NOTE: this also unlocks it */
 427        hptep->v = 0;
 428
 429        return i;
 430}
 431
 432static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
 433                                 unsigned long vpn, int bpsize,
 434                                 int apsize, int ssize, unsigned long flags)
 435{
 436        struct hash_pte *hptep = htab_address + slot;
 437        unsigned long hpte_v, want_v;
 438        int ret = 0, local = 0;
 439
 440        want_v = hpte_encode_avpn(vpn, bpsize, ssize);
 441
 442        DBG_LOW("    update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
 443                vpn, want_v & HPTE_V_AVPN, slot, newpp);
 444
 445        hpte_v = hpte_get_old_v(hptep);
 446        /*
 447         * We need to invalidate the TLB always because hpte_remove doesn't do
 448         * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
 449         * random entry from it. When we do that we don't invalidate the TLB
 450         * (hpte_remove) because we assume the old translation is still
 451         * technically "valid".
 452         */
 453        if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
 454                DBG_LOW(" -> miss\n");
 455                ret = -1;
 456        } else {
 457                native_lock_hpte(hptep);
 458                /* recheck with locks held */
 459                hpte_v = hpte_get_old_v(hptep);
 460                if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) ||
 461                             !(hpte_v & HPTE_V_VALID))) {
 462                        ret = -1;
 463                } else {
 464                        DBG_LOW(" -> hit\n");
 465                        /* Update the HPTE */
 466                        hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
 467                                                ~(HPTE_R_PPP | HPTE_R_N)) |
 468                                               (newpp & (HPTE_R_PPP | HPTE_R_N |
 469                                                         HPTE_R_C)));
 470                }
 471                native_unlock_hpte(hptep);
 472        }
 473
 474        if (flags & HPTE_LOCAL_UPDATE)
 475                local = 1;
 476        /*
 477         * Ensure it is out of the tlb too if it is not a nohpte fault
 478         */
 479        if (!(flags & HPTE_NOHPTE_UPDATE))
 480                tlbie(vpn, bpsize, apsize, ssize, local);
 481
 482        return ret;
 483}
 484
 485static long native_hpte_find(unsigned long vpn, int psize, int ssize)
 486{
 487        struct hash_pte *hptep;
 488        unsigned long hash;
 489        unsigned long i;
 490        long slot;
 491        unsigned long want_v, hpte_v;
 492
 493        hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
 494        want_v = hpte_encode_avpn(vpn, psize, ssize);
 495
 496        /* Bolted mappings are only ever in the primary group */
 497        slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 498        for (i = 0; i < HPTES_PER_GROUP; i++) {
 499
 500                hptep = htab_address + slot;
 501                hpte_v = hpte_get_old_v(hptep);
 502                if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
 503                        /* HPTE matches */
 504                        return slot;
 505                ++slot;
 506        }
 507
 508        return -1;
 509}
 510
 511/*
 512 * Update the page protection bits. Intended to be used to create
 513 * guard pages for kernel data structures on pages which are bolted
 514 * in the HPT. Assumes pages being operated on will not be stolen.
 515 *
 516 * No need to lock here because we should be the only user.
 517 */
 518static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
 519                                       int psize, int ssize)
 520{
 521        unsigned long vpn;
 522        unsigned long vsid;
 523        long slot;
 524        struct hash_pte *hptep;
 525
 526        vsid = get_kernel_vsid(ea, ssize);
 527        vpn = hpt_vpn(ea, vsid, ssize);
 528
 529        slot = native_hpte_find(vpn, psize, ssize);
 530        if (slot == -1)
 531                panic("could not find page to bolt\n");
 532        hptep = htab_address + slot;
 533
 534        /* Update the HPTE */
 535        hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
 536                                ~(HPTE_R_PPP | HPTE_R_N)) |
 537                               (newpp & (HPTE_R_PPP | HPTE_R_N)));
 538        /*
 539         * Ensure it is out of the tlb too. Bolted entries base and
 540         * actual page size will be same.
 541         */
 542        tlbie(vpn, psize, psize, ssize, 0);
 543}
 544
 545/*
 546 * Remove a bolted kernel entry. Memory hotplug uses this.
 547 *
 548 * No need to lock here because we should be the only user.
 549 */
 550static int native_hpte_removebolted(unsigned long ea, int psize, int ssize)
 551{
 552        unsigned long vpn;
 553        unsigned long vsid;
 554        long slot;
 555        struct hash_pte *hptep;
 556
 557        vsid = get_kernel_vsid(ea, ssize);
 558        vpn = hpt_vpn(ea, vsid, ssize);
 559
 560        slot = native_hpte_find(vpn, psize, ssize);
 561        if (slot == -1)
 562                return -ENOENT;
 563
 564        hptep = htab_address + slot;
 565
 566        VM_WARN_ON(!(be64_to_cpu(hptep->v) & HPTE_V_BOLTED));
 567
 568        /* Invalidate the hpte */
 569        hptep->v = 0;
 570
 571        /* Invalidate the TLB */
 572        tlbie(vpn, psize, psize, ssize, 0);
 573        return 0;
 574}
 575
 576
 577static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
 578                                   int bpsize, int apsize, int ssize, int local)
 579{
 580        struct hash_pte *hptep = htab_address + slot;
 581        unsigned long hpte_v;
 582        unsigned long want_v;
 583        unsigned long flags;
 584
 585        local_irq_save(flags);
 586
 587        DBG_LOW("    invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot);
 588
 589        want_v = hpte_encode_avpn(vpn, bpsize, ssize);
 590        hpte_v = hpte_get_old_v(hptep);
 591
 592        if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
 593                native_lock_hpte(hptep);
 594                /* recheck with locks held */
 595                hpte_v = hpte_get_old_v(hptep);
 596
 597                if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
 598                        /* Invalidate the hpte. NOTE: this also unlocks it */
 599                        hptep->v = 0;
 600                else
 601                        native_unlock_hpte(hptep);
 602        }
 603        /*
 604         * We need to invalidate the TLB always because hpte_remove doesn't do
 605         * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
 606         * random entry from it. When we do that we don't invalidate the TLB
 607         * (hpte_remove) because we assume the old translation is still
 608         * technically "valid".
 609         */
 610        tlbie(vpn, bpsize, apsize, ssize, local);
 611
 612        local_irq_restore(flags);
 613}
 614
 615#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 616static void native_hugepage_invalidate(unsigned long vsid,
 617                                       unsigned long addr,
 618                                       unsigned char *hpte_slot_array,
 619                                       int psize, int ssize, int local)
 620{
 621        int i;
 622        struct hash_pte *hptep;
 623        int actual_psize = MMU_PAGE_16M;
 624        unsigned int max_hpte_count, valid;
 625        unsigned long flags, s_addr = addr;
 626        unsigned long hpte_v, want_v, shift;
 627        unsigned long hidx, vpn = 0, hash, slot;
 628
 629        shift = mmu_psize_defs[psize].shift;
 630        max_hpte_count = 1U << (PMD_SHIFT - shift);
 631
 632        local_irq_save(flags);
 633        for (i = 0; i < max_hpte_count; i++) {
 634                valid = hpte_valid(hpte_slot_array, i);
 635                if (!valid)
 636                        continue;
 637                hidx =  hpte_hash_index(hpte_slot_array, i);
 638
 639                /* get the vpn */
 640                addr = s_addr + (i * (1ul << shift));
 641                vpn = hpt_vpn(addr, vsid, ssize);
 642                hash = hpt_hash(vpn, shift, ssize);
 643                if (hidx & _PTEIDX_SECONDARY)
 644                        hash = ~hash;
 645
 646                slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 647                slot += hidx & _PTEIDX_GROUP_IX;
 648
 649                hptep = htab_address + slot;
 650                want_v = hpte_encode_avpn(vpn, psize, ssize);
 651                hpte_v = hpte_get_old_v(hptep);
 652
 653                /* Even if we miss, we need to invalidate the TLB */
 654                if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
 655                        /* recheck with locks held */
 656                        native_lock_hpte(hptep);
 657                        hpte_v = hpte_get_old_v(hptep);
 658
 659                        if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
 660                                /*
 661                                 * Invalidate the hpte. NOTE: this also unlocks it
 662                                 */
 663
 664                                hptep->v = 0;
 665                        } else
 666                                native_unlock_hpte(hptep);
 667                }
 668                /*
 669                 * We need to do tlb invalidate for all the address, tlbie
 670                 * instruction compares entry_VA in tlb with the VA specified
 671                 * here
 672                 */
 673                tlbie(vpn, psize, actual_psize, ssize, local);
 674        }
 675        local_irq_restore(flags);
 676}
 677#else
 678static void native_hugepage_invalidate(unsigned long vsid,
 679                                       unsigned long addr,
 680                                       unsigned char *hpte_slot_array,
 681                                       int psize, int ssize, int local)
 682{
 683        WARN(1, "%s called without THP support\n", __func__);
 684}
 685#endif
 686
 687static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 688                        int *psize, int *apsize, int *ssize, unsigned long *vpn)
 689{
 690        unsigned long avpn, pteg, vpi;
 691        unsigned long hpte_v = be64_to_cpu(hpte->v);
 692        unsigned long hpte_r = be64_to_cpu(hpte->r);
 693        unsigned long vsid, seg_off;
 694        int size, a_size, shift;
 695        /* Look at the 8 bit LP value */
 696        unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
 697
 698        if (cpu_has_feature(CPU_FTR_ARCH_300)) {
 699                hpte_v = hpte_new_to_old_v(hpte_v, hpte_r);
 700                hpte_r = hpte_new_to_old_r(hpte_r);
 701        }
 702        if (!(hpte_v & HPTE_V_LARGE)) {
 703                size   = MMU_PAGE_4K;
 704                a_size = MMU_PAGE_4K;
 705        } else {
 706                size = hpte_page_sizes[lp] & 0xf;
 707                a_size = hpte_page_sizes[lp] >> 4;
 708        }
 709        /* This works for all page sizes, and for 256M and 1T segments */
 710        *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
 711        shift = mmu_psize_defs[size].shift;
 712
 713        avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
 714        pteg = slot / HPTES_PER_GROUP;
 715        if (hpte_v & HPTE_V_SECONDARY)
 716                pteg = ~pteg;
 717
 718        switch (*ssize) {
 719        case MMU_SEGSIZE_256M:
 720                /* We only have 28 - 23 bits of seg_off in avpn */
 721                seg_off = (avpn & 0x1f) << 23;
 722                vsid    =  avpn >> 5;
 723                /* We can find more bits from the pteg value */
 724                if (shift < 23) {
 725                        vpi = (vsid ^ pteg) & htab_hash_mask;
 726                        seg_off |= vpi << shift;
 727                }
 728                *vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
 729                break;
 730        case MMU_SEGSIZE_1T:
 731                /* We only have 40 - 23 bits of seg_off in avpn */
 732                seg_off = (avpn & 0x1ffff) << 23;
 733                vsid    = avpn >> 17;
 734                if (shift < 23) {
 735                        vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
 736                        seg_off |= vpi << shift;
 737                }
 738                *vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
 739                break;
 740        default:
 741                *vpn = size = 0;
 742        }
 743        *psize  = size;
 744        *apsize = a_size;
 745}
 746
 747/*
 748 * clear all mappings on kexec.  All cpus are in real mode (or they will
 749 * be when they isi), and we are the only one left.  We rely on our kernel
 750 * mapping being 0xC0's and the hardware ignoring those two real bits.
 751 *
 752 * This must be called with interrupts disabled.
 753 *
 754 * Taking the native_tlbie_lock is unsafe here due to the possibility of
 755 * lockdep being on. On pre POWER5 hardware, not taking the lock could
 756 * cause deadlock. POWER5 and newer not taking the lock is fine. This only
 757 * gets called during boot before secondary CPUs have come up and during
 758 * crashdump and all bets are off anyway.
 759 *
 760 * TODO: add batching support when enabled.  remember, no dynamic memory here,
 761 * although there is the control page available...
 762 */
 763static void native_hpte_clear(void)
 764{
 765        unsigned long vpn = 0;
 766        unsigned long slot, slots;
 767        struct hash_pte *hptep = htab_address;
 768        unsigned long hpte_v;
 769        unsigned long pteg_count;
 770        int psize, apsize, ssize;
 771
 772        pteg_count = htab_hash_mask + 1;
 773
 774        slots = pteg_count * HPTES_PER_GROUP;
 775
 776        for (slot = 0; slot < slots; slot++, hptep++) {
 777                /*
 778                 * we could lock the pte here, but we are the only cpu
 779                 * running,  right?  and for crash dump, we probably
 780                 * don't want to wait for a maybe bad cpu.
 781                 */
 782                hpte_v = be64_to_cpu(hptep->v);
 783
 784                /*
 785                 * Call __tlbie() here rather than tlbie() since we can't take the
 786                 * native_tlbie_lock.
 787                 */
 788                if (hpte_v & HPTE_V_VALID) {
 789                        hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn);
 790                        hptep->v = 0;
 791                        ___tlbie(vpn, psize, apsize, ssize);
 792                }
 793        }
 794
 795        asm volatile("eieio; tlbsync; ptesync":::"memory");
 796}
 797
 798/*
 799 * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
 800 * the lock all the time
 801 */
 802static void native_flush_hash_range(unsigned long number, int local)
 803{
 804        unsigned long vpn = 0;
 805        unsigned long hash, index, hidx, shift, slot;
 806        struct hash_pte *hptep;
 807        unsigned long hpte_v;
 808        unsigned long want_v;
 809        unsigned long flags;
 810        real_pte_t pte;
 811        struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
 812        unsigned long psize = batch->psize;
 813        int ssize = batch->ssize;
 814        int i;
 815        unsigned int use_local;
 816
 817        use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) &&
 818                mmu_psize_defs[psize].tlbiel && !cxl_ctx_in_use();
 819
 820        local_irq_save(flags);
 821
 822        for (i = 0; i < number; i++) {
 823                vpn = batch->vpn[i];
 824                pte = batch->pte[i];
 825
 826                pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
 827                        hash = hpt_hash(vpn, shift, ssize);
 828                        hidx = __rpte_to_hidx(pte, index);
 829                        if (hidx & _PTEIDX_SECONDARY)
 830                                hash = ~hash;
 831                        slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 832                        slot += hidx & _PTEIDX_GROUP_IX;
 833                        hptep = htab_address + slot;
 834                        want_v = hpte_encode_avpn(vpn, psize, ssize);
 835                        hpte_v = hpte_get_old_v(hptep);
 836
 837                        if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
 838                                continue;
 839                        /* lock and try again */
 840                        native_lock_hpte(hptep);
 841                        hpte_v = hpte_get_old_v(hptep);
 842
 843                        if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
 844                                native_unlock_hpte(hptep);
 845                        else
 846                                hptep->v = 0;
 847
 848                } pte_iterate_hashed_end();
 849        }
 850
 851        if (use_local) {
 852                asm volatile("ptesync":::"memory");
 853                for (i = 0; i < number; i++) {
 854                        vpn = batch->vpn[i];
 855                        pte = batch->pte[i];
 856
 857                        pte_iterate_hashed_subpages(pte, psize,
 858                                                    vpn, index, shift) {
 859                                __tlbiel(vpn, psize, psize, ssize);
 860                        } pte_iterate_hashed_end();
 861                }
 862                asm volatile("ptesync":::"memory");
 863        } else {
 864                int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 865
 866                if (lock_tlbie)
 867                        raw_spin_lock(&native_tlbie_lock);
 868
 869                asm volatile("ptesync":::"memory");
 870                for (i = 0; i < number; i++) {
 871                        vpn = batch->vpn[i];
 872                        pte = batch->pte[i];
 873
 874                        pte_iterate_hashed_subpages(pte, psize,
 875                                                    vpn, index, shift) {
 876                                __tlbie(vpn, psize, psize, ssize);
 877                        } pte_iterate_hashed_end();
 878                }
 879                /*
 880                 * Just do one more with the last used values.
 881                 */
 882                fixup_tlbie_vpn(vpn, psize, psize, ssize);
 883                asm volatile("eieio; tlbsync; ptesync":::"memory");
 884
 885                if (lock_tlbie)
 886                        raw_spin_unlock(&native_tlbie_lock);
 887        }
 888
 889        local_irq_restore(flags);
 890}
 891
 892void __init hpte_init_native(void)
 893{
 894        mmu_hash_ops.hpte_invalidate    = native_hpte_invalidate;
 895        mmu_hash_ops.hpte_updatepp      = native_hpte_updatepp;
 896        mmu_hash_ops.hpte_updateboltedpp = native_hpte_updateboltedpp;
 897        mmu_hash_ops.hpte_removebolted = native_hpte_removebolted;
 898        mmu_hash_ops.hpte_insert        = native_hpte_insert;
 899        mmu_hash_ops.hpte_remove        = native_hpte_remove;
 900        mmu_hash_ops.hpte_clear_all     = native_hpte_clear;
 901        mmu_hash_ops.flush_hash_range = native_flush_hash_range;
 902        mmu_hash_ops.hugepage_invalidate   = native_hugepage_invalidate;
 903}
 904