linux/arch/powerpc/mm/hash_native_64.c
<<
>>
Prefs
   1/*
   2 * native hashtable management.
   3 *
   4 * SMP scalability work:
   5 *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
   6 * 
   7 * This program is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU General Public License
   9 * as published by the Free Software Foundation; either version
  10 * 2 of the License, or (at your option) any later version.
  11 */
  12
  13#undef DEBUG_LOW
  14
  15#include <linux/spinlock.h>
  16#include <linux/bitops.h>
  17#include <linux/threads.h>
  18#include <linux/smp.h>
  19
  20#include <asm/abs_addr.h>
  21#include <asm/machdep.h>
  22#include <asm/mmu.h>
  23#include <asm/mmu_context.h>
  24#include <asm/pgtable.h>
  25#include <asm/tlbflush.h>
  26#include <asm/tlb.h>
  27#include <asm/cputable.h>
  28#include <asm/udbg.h>
  29#include <asm/kexec.h>
  30#include <asm/ppc-opcode.h>
  31
  32#ifdef DEBUG_LOW
  33#define DBG_LOW(fmt...) udbg_printf(fmt)
  34#else
  35#define DBG_LOW(fmt...)
  36#endif
  37
  38#define HPTE_LOCK_BIT 3
  39
  40static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
  41
  42static inline void __tlbie(unsigned long va, int psize, int ssize)
  43{
  44        unsigned int penc;
  45
  46        /* clear top 16 bits, non SLS segment */
  47        va &= ~(0xffffULL << 48);
  48
  49        switch (psize) {
  50        case MMU_PAGE_4K:
  51                va &= ~0xffful;
  52                va |= ssize << 8;
  53                asm volatile(ASM_MMU_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0),
  54                                               %2)
  55                             : : "r" (va), "r"(0), "i" (MMU_FTR_TLBIE_206)
  56                             : "memory");
  57                break;
  58        default:
  59                penc = mmu_psize_defs[psize].penc;
  60                va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
  61                va |= penc << 12;
  62                va |= ssize << 8;
  63                va |= 1; /* L */
  64                asm volatile(ASM_MMU_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0),
  65                                               %2)
  66                             : : "r" (va), "r"(0), "i" (MMU_FTR_TLBIE_206)
  67                             : "memory");
  68                break;
  69        }
  70}
  71
  72static inline void __tlbiel(unsigned long va, int psize, int ssize)
  73{
  74        unsigned int penc;
  75
  76        /* clear top 16 bits, non SLS segment */
  77        va &= ~(0xffffULL << 48);
  78
  79        switch (psize) {
  80        case MMU_PAGE_4K:
  81                va &= ~0xffful;
  82                va |= ssize << 8;
  83                asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
  84                             : : "r"(va) : "memory");
  85                break;
  86        default:
  87                penc = mmu_psize_defs[psize].penc;
  88                va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
  89                va |= penc << 12;
  90                va |= ssize << 8;
  91                va |= 1; /* L */
  92                asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
  93                             : : "r"(va) : "memory");
  94                break;
  95        }
  96
  97}
  98
  99static inline void tlbie(unsigned long va, int psize, int ssize, int local)
 100{
 101        unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL);
 102        int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
 103
 104        if (use_local)
 105                use_local = mmu_psize_defs[psize].tlbiel;
 106        if (lock_tlbie && !use_local)
 107                raw_spin_lock(&native_tlbie_lock);
 108        asm volatile("ptesync": : :"memory");
 109        if (use_local) {
 110                __tlbiel(va, psize, ssize);
 111                asm volatile("ptesync": : :"memory");
 112        } else {
 113                __tlbie(va, psize, ssize);
 114                asm volatile("eieio; tlbsync; ptesync": : :"memory");
 115        }
 116        if (lock_tlbie && !use_local)
 117                raw_spin_unlock(&native_tlbie_lock);
 118}
 119
 120static inline void native_lock_hpte(struct hash_pte *hptep)
 121{
 122        unsigned long *word = &hptep->v;
 123
 124        while (1) {
 125                if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
 126                        break;
 127                while(test_bit(HPTE_LOCK_BIT, word))
 128                        cpu_relax();
 129        }
 130}
 131
 132static inline void native_unlock_hpte(struct hash_pte *hptep)
 133{
 134        unsigned long *word = &hptep->v;
 135
 136        clear_bit_unlock(HPTE_LOCK_BIT, word);
 137}
 138
 139static long native_hpte_insert(unsigned long hpte_group, unsigned long va,
 140                        unsigned long pa, unsigned long rflags,
 141                        unsigned long vflags, int psize, int ssize)
 142{
 143        struct hash_pte *hptep = htab_address + hpte_group;
 144        unsigned long hpte_v, hpte_r;
 145        int i;
 146
 147        if (!(vflags & HPTE_V_BOLTED)) {
 148                DBG_LOW("    insert(group=%lx, va=%016lx, pa=%016lx,"
 149                        " rflags=%lx, vflags=%lx, psize=%d)\n",
 150                        hpte_group, va, pa, rflags, vflags, psize);
 151        }
 152
 153        for (i = 0; i < HPTES_PER_GROUP; i++) {
 154                if (! (hptep->v & HPTE_V_VALID)) {
 155                        /* retry with lock held */
 156                        native_lock_hpte(hptep);
 157                        if (! (hptep->v & HPTE_V_VALID))
 158                                break;
 159                        native_unlock_hpte(hptep);
 160                }
 161
 162                hptep++;
 163        }
 164
 165        if (i == HPTES_PER_GROUP)
 166                return -1;
 167
 168        hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID;
 169        hpte_r = hpte_encode_r(pa, psize) | rflags;
 170
 171        if (!(vflags & HPTE_V_BOLTED)) {
 172                DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
 173                        i, hpte_v, hpte_r);
 174        }
 175
 176        hptep->r = hpte_r;
 177        /* Guarantee the second dword is visible before the valid bit */
 178        eieio();
 179        /*
 180         * Now set the first dword including the valid bit
 181         * NOTE: this also unlocks the hpte
 182         */
 183        hptep->v = hpte_v;
 184
 185        __asm__ __volatile__ ("ptesync" : : : "memory");
 186
 187        return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
 188}
 189
 190static long native_hpte_remove(unsigned long hpte_group)
 191{
 192        struct hash_pte *hptep;
 193        int i;
 194        int slot_offset;
 195        unsigned long hpte_v;
 196
 197        DBG_LOW("    remove(group=%lx)\n", hpte_group);
 198
 199        /* pick a random entry to start at */
 200        slot_offset = mftb() & 0x7;
 201
 202        for (i = 0; i < HPTES_PER_GROUP; i++) {
 203                hptep = htab_address + hpte_group + slot_offset;
 204                hpte_v = hptep->v;
 205
 206                if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
 207                        /* retry with lock held */
 208                        native_lock_hpte(hptep);
 209                        hpte_v = hptep->v;
 210                        if ((hpte_v & HPTE_V_VALID)
 211                            && !(hpte_v & HPTE_V_BOLTED))
 212                                break;
 213                        native_unlock_hpte(hptep);
 214                }
 215
 216                slot_offset++;
 217                slot_offset &= 0x7;
 218        }
 219
 220        if (i == HPTES_PER_GROUP)
 221                return -1;
 222
 223        /* Invalidate the hpte. NOTE: this also unlocks it */
 224        hptep->v = 0;
 225
 226        return i;
 227}
 228
 229static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
 230                                 unsigned long va, int psize, int ssize,
 231                                 int local)
 232{
 233        struct hash_pte *hptep = htab_address + slot;
 234        unsigned long hpte_v, want_v;
 235        int ret = 0;
 236
 237        want_v = hpte_encode_v(va, psize, ssize);
 238
 239        DBG_LOW("    update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)",
 240                va, want_v & HPTE_V_AVPN, slot, newpp);
 241
 242        native_lock_hpte(hptep);
 243
 244        hpte_v = hptep->v;
 245
 246        /* Even if we miss, we need to invalidate the TLB */
 247        if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
 248                DBG_LOW(" -> miss\n");
 249                ret = -1;
 250        } else {
 251                DBG_LOW(" -> hit\n");
 252                /* Update the HPTE */
 253                hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
 254                        (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C));
 255        }
 256        native_unlock_hpte(hptep);
 257
 258        /* Ensure it is out of the tlb too. */
 259        tlbie(va, psize, ssize, local);
 260
 261        return ret;
 262}
 263
 264static long native_hpte_find(unsigned long va, int psize, int ssize)
 265{
 266        struct hash_pte *hptep;
 267        unsigned long hash;
 268        unsigned long i;
 269        long slot;
 270        unsigned long want_v, hpte_v;
 271
 272        hash = hpt_hash(va, mmu_psize_defs[psize].shift, ssize);
 273        want_v = hpte_encode_v(va, psize, ssize);
 274
 275        /* Bolted mappings are only ever in the primary group */
 276        slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 277        for (i = 0; i < HPTES_PER_GROUP; i++) {
 278                hptep = htab_address + slot;
 279                hpte_v = hptep->v;
 280
 281                if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
 282                        /* HPTE matches */
 283                        return slot;
 284                ++slot;
 285        }
 286
 287        return -1;
 288}
 289
 290/*
 291 * Update the page protection bits. Intended to be used to create
 292 * guard pages for kernel data structures on pages which are bolted
 293 * in the HPT. Assumes pages being operated on will not be stolen.
 294 *
 295 * No need to lock here because we should be the only user.
 296 */
 297static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
 298                                       int psize, int ssize)
 299{
 300        unsigned long vsid, va;
 301        long slot;
 302        struct hash_pte *hptep;
 303
 304        vsid = get_kernel_vsid(ea, ssize);
 305        va = hpt_va(ea, vsid, ssize);
 306
 307        slot = native_hpte_find(va, psize, ssize);
 308        if (slot == -1)
 309                panic("could not find page to bolt\n");
 310        hptep = htab_address + slot;
 311
 312        /* Update the HPTE */
 313        hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
 314                (newpp & (HPTE_R_PP | HPTE_R_N));
 315
 316        /* Ensure it is out of the tlb too. */
 317        tlbie(va, psize, ssize, 0);
 318}
 319
 320static void native_hpte_invalidate(unsigned long slot, unsigned long va,
 321                                   int psize, int ssize, int local)
 322{
 323        struct hash_pte *hptep = htab_address + slot;
 324        unsigned long hpte_v;
 325        unsigned long want_v;
 326        unsigned long flags;
 327
 328        local_irq_save(flags);
 329
 330        DBG_LOW("    invalidate(va=%016lx, hash: %x)\n", va, slot);
 331
 332        want_v = hpte_encode_v(va, psize, ssize);
 333        native_lock_hpte(hptep);
 334        hpte_v = hptep->v;
 335
 336        /* Even if we miss, we need to invalidate the TLB */
 337        if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
 338                native_unlock_hpte(hptep);
 339        else
 340                /* Invalidate the hpte. NOTE: this also unlocks it */
 341                hptep->v = 0;
 342
 343        /* Invalidate the TLB */
 344        tlbie(va, psize, ssize, local);
 345
 346        local_irq_restore(flags);
 347}
 348
 349#define LP_SHIFT        12
 350#define LP_BITS         8
 351#define LP_MASK(i)      ((0xFF >> (i)) << LP_SHIFT)
 352
 353static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 354                        int *psize, int *ssize, unsigned long *va)
 355{
 356        unsigned long hpte_r = hpte->r;
 357        unsigned long hpte_v = hpte->v;
 358        unsigned long avpn;
 359        int i, size, shift, penc;
 360
 361        if (!(hpte_v & HPTE_V_LARGE))
 362                size = MMU_PAGE_4K;
 363        else {
 364                for (i = 0; i < LP_BITS; i++) {
 365                        if ((hpte_r & LP_MASK(i+1)) == LP_MASK(i+1))
 366                                break;
 367                }
 368                penc = LP_MASK(i+1) >> LP_SHIFT;
 369                for (size = 0; size < MMU_PAGE_COUNT; size++) {
 370
 371                        /* 4K pages are not represented by LP */
 372                        if (size == MMU_PAGE_4K)
 373                                continue;
 374
 375                        /* valid entries have a shift value */
 376                        if (!mmu_psize_defs[size].shift)
 377                                continue;
 378
 379                        if (penc == mmu_psize_defs[size].penc)
 380                                break;
 381                }
 382        }
 383
 384        /* This works for all page sizes, and for 256M and 1T segments */
 385        shift = mmu_psize_defs[size].shift;
 386        avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm) << 23;
 387
 388        if (shift < 23) {
 389                unsigned long vpi, vsid, pteg;
 390
 391                pteg = slot / HPTES_PER_GROUP;
 392                if (hpte_v & HPTE_V_SECONDARY)
 393                        pteg = ~pteg;
 394                switch (hpte_v >> HPTE_V_SSIZE_SHIFT) {
 395                case MMU_SEGSIZE_256M:
 396                        vpi = ((avpn >> 28) ^ pteg) & htab_hash_mask;
 397                        break;
 398                case MMU_SEGSIZE_1T:
 399                        vsid = avpn >> 40;
 400                        vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
 401                        break;
 402                default:
 403                        avpn = vpi = size = 0;
 404                }
 405                avpn |= (vpi << mmu_psize_defs[size].shift);
 406        }
 407
 408        *va = avpn;
 409        *psize = size;
 410        *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
 411}
 412
 413/*
 414 * clear all mappings on kexec.  All cpus are in real mode (or they will
 415 * be when they isi), and we are the only one left.  We rely on our kernel
 416 * mapping being 0xC0's and the hardware ignoring those two real bits.
 417 *
 418 * TODO: add batching support when enabled.  remember, no dynamic memory here,
 419 * athough there is the control page available...
 420 */
 421static void native_hpte_clear(void)
 422{
 423        unsigned long slot, slots, flags;
 424        struct hash_pte *hptep = htab_address;
 425        unsigned long hpte_v, va;
 426        unsigned long pteg_count;
 427        int psize, ssize;
 428
 429        pteg_count = htab_hash_mask + 1;
 430
 431        local_irq_save(flags);
 432
 433        /* we take the tlbie lock and hold it.  Some hardware will
 434         * deadlock if we try to tlbie from two processors at once.
 435         */
 436        raw_spin_lock(&native_tlbie_lock);
 437
 438        slots = pteg_count * HPTES_PER_GROUP;
 439
 440        for (slot = 0; slot < slots; slot++, hptep++) {
 441                /*
 442                 * we could lock the pte here, but we are the only cpu
 443                 * running,  right?  and for crash dump, we probably
 444                 * don't want to wait for a maybe bad cpu.
 445                 */
 446                hpte_v = hptep->v;
 447
 448                /*
 449                 * Call __tlbie() here rather than tlbie() since we
 450                 * already hold the native_tlbie_lock.
 451                 */
 452                if (hpte_v & HPTE_V_VALID) {
 453                        hpte_decode(hptep, slot, &psize, &ssize, &va);
 454                        hptep->v = 0;
 455                        __tlbie(va, psize, ssize);
 456                }
 457        }
 458
 459        asm volatile("eieio; tlbsync; ptesync":::"memory");
 460        raw_spin_unlock(&native_tlbie_lock);
 461        local_irq_restore(flags);
 462}
 463
 464/*
 465 * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
 466 * the lock all the time
 467 */
 468static void native_flush_hash_range(unsigned long number, int local)
 469{
 470        unsigned long va, hash, index, hidx, shift, slot;
 471        struct hash_pte *hptep;
 472        unsigned long hpte_v;
 473        unsigned long want_v;
 474        unsigned long flags;
 475        real_pte_t pte;
 476        struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
 477        unsigned long psize = batch->psize;
 478        int ssize = batch->ssize;
 479        int i;
 480
 481        local_irq_save(flags);
 482
 483        for (i = 0; i < number; i++) {
 484                va = batch->vaddr[i];
 485                pte = batch->pte[i];
 486
 487                pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
 488                        hash = hpt_hash(va, shift, ssize);
 489                        hidx = __rpte_to_hidx(pte, index);
 490                        if (hidx & _PTEIDX_SECONDARY)
 491                                hash = ~hash;
 492                        slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 493                        slot += hidx & _PTEIDX_GROUP_IX;
 494                        hptep = htab_address + slot;
 495                        want_v = hpte_encode_v(va, psize, ssize);
 496                        native_lock_hpte(hptep);
 497                        hpte_v = hptep->v;
 498                        if (!HPTE_V_COMPARE(hpte_v, want_v) ||
 499                            !(hpte_v & HPTE_V_VALID))
 500                                native_unlock_hpte(hptep);
 501                        else
 502                                hptep->v = 0;
 503                } pte_iterate_hashed_end();
 504        }
 505
 506        if (cpu_has_feature(CPU_FTR_TLBIEL) &&
 507            mmu_psize_defs[psize].tlbiel && local) {
 508                asm volatile("ptesync":::"memory");
 509                for (i = 0; i < number; i++) {
 510                        va = batch->vaddr[i];
 511                        pte = batch->pte[i];
 512
 513                        pte_iterate_hashed_subpages(pte, psize, va, index,
 514                                                    shift) {
 515                                __tlbiel(va, psize, ssize);
 516                        } pte_iterate_hashed_end();
 517                }
 518                asm volatile("ptesync":::"memory");
 519        } else {
 520                int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
 521
 522                if (lock_tlbie)
 523                        raw_spin_lock(&native_tlbie_lock);
 524
 525                asm volatile("ptesync":::"memory");
 526                for (i = 0; i < number; i++) {
 527                        va = batch->vaddr[i];
 528                        pte = batch->pte[i];
 529
 530                        pte_iterate_hashed_subpages(pte, psize, va, index,
 531                                                    shift) {
 532                                __tlbie(va, psize, ssize);
 533                        } pte_iterate_hashed_end();
 534                }
 535                asm volatile("eieio; tlbsync; ptesync":::"memory");
 536
 537                if (lock_tlbie)
 538                        raw_spin_unlock(&native_tlbie_lock);
 539        }
 540
 541        local_irq_restore(flags);
 542}
 543
 544#ifdef CONFIG_PPC_PSERIES
 545/* Disable TLB batching on nighthawk */
 546static inline int tlb_batching_enabled(void)
 547{
 548        struct device_node *root = of_find_node_by_path("/");
 549        int enabled = 1;
 550
 551        if (root) {
 552                const char *model = of_get_property(root, "model", NULL);
 553                if (model && !strcmp(model, "IBM,9076-N81"))
 554                        enabled = 0;
 555                of_node_put(root);
 556        }
 557
 558        return enabled;
 559}
 560#else
 561static inline int tlb_batching_enabled(void)
 562{
 563        return 1;
 564}
 565#endif
 566
 567void __init hpte_init_native(void)
 568{
 569        ppc_md.hpte_invalidate  = native_hpte_invalidate;
 570        ppc_md.hpte_updatepp    = native_hpte_updatepp;
 571        ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
 572        ppc_md.hpte_insert      = native_hpte_insert;
 573        ppc_md.hpte_remove      = native_hpte_remove;
 574        ppc_md.hpte_clear_all   = native_hpte_clear;
 575        if (tlb_batching_enabled())
 576                ppc_md.flush_hash_range = native_flush_hash_range;
 577}
 578