linux/arch/powerpc/mm/hash_native_64.c
<<
>>
Prefs
   1/*
   2 * native hashtable management.
   3 *
   4 * SMP scalability work:
   5 *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
   6 * 
   7 * This program is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU General Public License
   9 * as published by the Free Software Foundation; either version
  10 * 2 of the License, or (at your option) any later version.
  11 */
  12
  13#undef DEBUG_LOW
  14
  15#include <linux/spinlock.h>
  16#include <linux/bitops.h>
  17#include <linux/threads.h>
  18#include <linux/smp.h>
  19
  20#include <asm/abs_addr.h>
  21#include <asm/machdep.h>
  22#include <asm/mmu.h>
  23#include <asm/mmu_context.h>
  24#include <asm/pgtable.h>
  25#include <asm/tlbflush.h>
  26#include <asm/tlb.h>
  27#include <asm/cputable.h>
  28#include <asm/udbg.h>
  29#include <asm/kexec.h>
  30#include <asm/ppc-opcode.h>
  31
  32#ifdef DEBUG_LOW
  33#define DBG_LOW(fmt...) udbg_printf(fmt)
  34#else
  35#define DBG_LOW(fmt...)
  36#endif
  37
  38#define HPTE_LOCK_BIT 3
  39
  40static DEFINE_SPINLOCK(native_tlbie_lock);
  41
  42static inline void __tlbie(unsigned long va, int psize, int ssize)
  43{
  44        unsigned int penc;
  45
  46        /* clear top 16 bits, non SLS segment */
  47        va &= ~(0xffffULL << 48);
  48
  49        switch (psize) {
  50        case MMU_PAGE_4K:
  51                va &= ~0xffful;
  52                va |= ssize << 8;
  53                asm volatile(ASM_MMU_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0),
  54                                               %2)
  55                             : : "r" (va), "r"(0), "i" (MMU_FTR_TLBIE_206)
  56                             : "memory");
  57                break;
  58        default:
  59                penc = mmu_psize_defs[psize].penc;
  60                va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
  61                va |= penc << 12;
  62                va |= ssize << 8;
  63                va |= 1; /* L */
  64                asm volatile(ASM_MMU_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0),
  65                                               %2)
  66                             : : "r" (va), "r"(0), "i" (MMU_FTR_TLBIE_206)
  67                             : "memory");
  68                break;
  69        }
  70}
  71
  72static inline void __tlbiel(unsigned long va, int psize, int ssize)
  73{
  74        unsigned int penc;
  75
  76        /* clear top 16 bits, non SLS segment */
  77        va &= ~(0xffffULL << 48);
  78
  79        switch (psize) {
  80        case MMU_PAGE_4K:
  81                va &= ~0xffful;
  82                va |= ssize << 8;
  83                asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
  84                             : : "r"(va) : "memory");
  85                break;
  86        default:
  87                penc = mmu_psize_defs[psize].penc;
  88                va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
  89                va |= penc << 12;
  90                va |= ssize << 8;
  91                va |= 1; /* L */
  92                asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
  93                             : : "r"(va) : "memory");
  94                break;
  95        }
  96
  97}
  98
  99static inline void tlbie(unsigned long va, int psize, int ssize, int local)
 100{
 101        unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL);
 102        int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
 103
 104        if (use_local)
 105                use_local = mmu_psize_defs[psize].tlbiel;
 106        if (lock_tlbie && !use_local)
 107                spin_lock(&native_tlbie_lock);
 108        asm volatile("ptesync": : :"memory");
 109        if (use_local) {
 110                __tlbiel(va, psize, ssize);
 111                asm volatile("ptesync": : :"memory");
 112        } else {
 113                __tlbie(va, psize, ssize);
 114                asm volatile("eieio; tlbsync; ptesync": : :"memory");
 115        }
 116        if (lock_tlbie && !use_local)
 117                spin_unlock(&native_tlbie_lock);
 118}
 119
 120static inline void native_lock_hpte(struct hash_pte *hptep)
 121{
 122        unsigned long *word = &hptep->v;
 123
 124        while (1) {
 125                if (!test_and_set_bit(HPTE_LOCK_BIT, word))
 126                        break;
 127                while(test_bit(HPTE_LOCK_BIT, word))
 128                        cpu_relax();
 129        }
 130}
 131
 132static inline void native_unlock_hpte(struct hash_pte *hptep)
 133{
 134        unsigned long *word = &hptep->v;
 135
 136        asm volatile("lwsync":::"memory");
 137        clear_bit(HPTE_LOCK_BIT, word);
 138}
 139
 140static long native_hpte_insert(unsigned long hpte_group, unsigned long va,
 141                        unsigned long pa, unsigned long rflags,
 142                        unsigned long vflags, int psize, int ssize)
 143{
 144        struct hash_pte *hptep = htab_address + hpte_group;
 145        unsigned long hpte_v, hpte_r;
 146        int i;
 147
 148        if (!(vflags & HPTE_V_BOLTED)) {
 149                DBG_LOW("    insert(group=%lx, va=%016lx, pa=%016lx,"
 150                        " rflags=%lx, vflags=%lx, psize=%d)\n",
 151                        hpte_group, va, pa, rflags, vflags, psize);
 152        }
 153
 154        for (i = 0; i < HPTES_PER_GROUP; i++) {
 155                if (! (hptep->v & HPTE_V_VALID)) {
 156                        /* retry with lock held */
 157                        native_lock_hpte(hptep);
 158                        if (! (hptep->v & HPTE_V_VALID))
 159                                break;
 160                        native_unlock_hpte(hptep);
 161                }
 162
 163                hptep++;
 164        }
 165
 166        if (i == HPTES_PER_GROUP)
 167                return -1;
 168
 169        hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID;
 170        hpte_r = hpte_encode_r(pa, psize) | rflags;
 171
 172        if (!(vflags & HPTE_V_BOLTED)) {
 173                DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
 174                        i, hpte_v, hpte_r);
 175        }
 176
 177        hptep->r = hpte_r;
 178        /* Guarantee the second dword is visible before the valid bit */
 179        eieio();
 180        /*
 181         * Now set the first dword including the valid bit
 182         * NOTE: this also unlocks the hpte
 183         */
 184        hptep->v = hpte_v;
 185
 186        __asm__ __volatile__ ("ptesync" : : : "memory");
 187
 188        return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
 189}
 190
 191static long native_hpte_remove(unsigned long hpte_group)
 192{
 193        struct hash_pte *hptep;
 194        int i;
 195        int slot_offset;
 196        unsigned long hpte_v;
 197
 198        DBG_LOW("    remove(group=%lx)\n", hpte_group);
 199
 200        /* pick a random entry to start at */
 201        slot_offset = mftb() & 0x7;
 202
 203        for (i = 0; i < HPTES_PER_GROUP; i++) {
 204                hptep = htab_address + hpte_group + slot_offset;
 205                hpte_v = hptep->v;
 206
 207                if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
 208                        /* retry with lock held */
 209                        native_lock_hpte(hptep);
 210                        hpte_v = hptep->v;
 211                        if ((hpte_v & HPTE_V_VALID)
 212                            && !(hpte_v & HPTE_V_BOLTED))
 213                                break;
 214                        native_unlock_hpte(hptep);
 215                }
 216
 217                slot_offset++;
 218                slot_offset &= 0x7;
 219        }
 220
 221        if (i == HPTES_PER_GROUP)
 222                return -1;
 223
 224        /* Invalidate the hpte. NOTE: this also unlocks it */
 225        hptep->v = 0;
 226
 227        return i;
 228}
 229
 230static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
 231                                 unsigned long va, int psize, int ssize,
 232                                 int local)
 233{
 234        struct hash_pte *hptep = htab_address + slot;
 235        unsigned long hpte_v, want_v;
 236        int ret = 0;
 237
 238        want_v = hpte_encode_v(va, psize, ssize);
 239
 240        DBG_LOW("    update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)",
 241                va, want_v & HPTE_V_AVPN, slot, newpp);
 242
 243        native_lock_hpte(hptep);
 244
 245        hpte_v = hptep->v;
 246
 247        /* Even if we miss, we need to invalidate the TLB */
 248        if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
 249                DBG_LOW(" -> miss\n");
 250                ret = -1;
 251        } else {
 252                DBG_LOW(" -> hit\n");
 253                /* Update the HPTE */
 254                hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
 255                        (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C));
 256        }
 257        native_unlock_hpte(hptep);
 258
 259        /* Ensure it is out of the tlb too. */
 260        tlbie(va, psize, ssize, local);
 261
 262        return ret;
 263}
 264
 265static long native_hpte_find(unsigned long va, int psize, int ssize)
 266{
 267        struct hash_pte *hptep;
 268        unsigned long hash;
 269        unsigned long i;
 270        long slot;
 271        unsigned long want_v, hpte_v;
 272
 273        hash = hpt_hash(va, mmu_psize_defs[psize].shift, ssize);
 274        want_v = hpte_encode_v(va, psize, ssize);
 275
 276        /* Bolted mappings are only ever in the primary group */
 277        slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 278        for (i = 0; i < HPTES_PER_GROUP; i++) {
 279                hptep = htab_address + slot;
 280                hpte_v = hptep->v;
 281
 282                if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
 283                        /* HPTE matches */
 284                        return slot;
 285                ++slot;
 286        }
 287
 288        return -1;
 289}
 290
 291/*
 292 * Update the page protection bits. Intended to be used to create
 293 * guard pages for kernel data structures on pages which are bolted
 294 * in the HPT. Assumes pages being operated on will not be stolen.
 295 *
 296 * No need to lock here because we should be the only user.
 297 */
 298static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
 299                                       int psize, int ssize)
 300{
 301        unsigned long vsid, va;
 302        long slot;
 303        struct hash_pte *hptep;
 304
 305        vsid = get_kernel_vsid(ea, ssize);
 306        va = hpt_va(ea, vsid, ssize);
 307
 308        slot = native_hpte_find(va, psize, ssize);
 309        if (slot == -1)
 310                panic("could not find page to bolt\n");
 311        hptep = htab_address + slot;
 312
 313        /* Update the HPTE */
 314        hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
 315                (newpp & (HPTE_R_PP | HPTE_R_N));
 316
 317        /* Ensure it is out of the tlb too. */
 318        tlbie(va, psize, ssize, 0);
 319}
 320
 321static void native_hpte_invalidate(unsigned long slot, unsigned long va,
 322                                   int psize, int ssize, int local)
 323{
 324        struct hash_pte *hptep = htab_address + slot;
 325        unsigned long hpte_v;
 326        unsigned long want_v;
 327        unsigned long flags;
 328
 329        local_irq_save(flags);
 330
 331        DBG_LOW("    invalidate(va=%016lx, hash: %x)\n", va, slot);
 332
 333        want_v = hpte_encode_v(va, psize, ssize);
 334        native_lock_hpte(hptep);
 335        hpte_v = hptep->v;
 336
 337        /* Even if we miss, we need to invalidate the TLB */
 338        if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
 339                native_unlock_hpte(hptep);
 340        else
 341                /* Invalidate the hpte. NOTE: this also unlocks it */
 342                hptep->v = 0;
 343
 344        /* Invalidate the TLB */
 345        tlbie(va, psize, ssize, local);
 346
 347        local_irq_restore(flags);
 348}
 349
 350#define LP_SHIFT        12
 351#define LP_BITS         8
 352#define LP_MASK(i)      ((0xFF >> (i)) << LP_SHIFT)
 353
 354static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 355                        int *psize, int *ssize, unsigned long *va)
 356{
 357        unsigned long hpte_r = hpte->r;
 358        unsigned long hpte_v = hpte->v;
 359        unsigned long avpn;
 360        int i, size, shift, penc;
 361
 362        if (!(hpte_v & HPTE_V_LARGE))
 363                size = MMU_PAGE_4K;
 364        else {
 365                for (i = 0; i < LP_BITS; i++) {
 366                        if ((hpte_r & LP_MASK(i+1)) == LP_MASK(i+1))
 367                                break;
 368                }
 369                penc = LP_MASK(i+1) >> LP_SHIFT;
 370                for (size = 0; size < MMU_PAGE_COUNT; size++) {
 371
 372                        /* 4K pages are not represented by LP */
 373                        if (size == MMU_PAGE_4K)
 374                                continue;
 375
 376                        /* valid entries have a shift value */
 377                        if (!mmu_psize_defs[size].shift)
 378                                continue;
 379
 380                        if (penc == mmu_psize_defs[size].penc)
 381                                break;
 382                }
 383        }
 384
 385        /* This works for all page sizes, and for 256M and 1T segments */
 386        shift = mmu_psize_defs[size].shift;
 387        avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm) << 23;
 388
 389        if (shift < 23) {
 390                unsigned long vpi, vsid, pteg;
 391
 392                pteg = slot / HPTES_PER_GROUP;
 393                if (hpte_v & HPTE_V_SECONDARY)
 394                        pteg = ~pteg;
 395                switch (hpte_v >> HPTE_V_SSIZE_SHIFT) {
 396                case MMU_SEGSIZE_256M:
 397                        vpi = ((avpn >> 28) ^ pteg) & htab_hash_mask;
 398                        break;
 399                case MMU_SEGSIZE_1T:
 400                        vsid = avpn >> 40;
 401                        vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
 402                        break;
 403                default:
 404                        avpn = vpi = size = 0;
 405                }
 406                avpn |= (vpi << mmu_psize_defs[size].shift);
 407        }
 408
 409        *va = avpn;
 410        *psize = size;
 411        *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
 412}
 413
 414/*
 415 * clear all mappings on kexec.  All cpus are in real mode (or they will
 416 * be when they isi), and we are the only one left.  We rely on our kernel
 417 * mapping being 0xC0's and the hardware ignoring those two real bits.
 418 *
 419 * TODO: add batching support when enabled.  remember, no dynamic memory here,
 420 * athough there is the control page available...
 421 */
 422static void native_hpte_clear(void)
 423{
 424        unsigned long slot, slots, flags;
 425        struct hash_pte *hptep = htab_address;
 426        unsigned long hpte_v, va;
 427        unsigned long pteg_count;
 428        int psize, ssize;
 429
 430        pteg_count = htab_hash_mask + 1;
 431
 432        local_irq_save(flags);
 433
 434        /* we take the tlbie lock and hold it.  Some hardware will
 435         * deadlock if we try to tlbie from two processors at once.
 436         */
 437        spin_lock(&native_tlbie_lock);
 438
 439        slots = pteg_count * HPTES_PER_GROUP;
 440
 441        for (slot = 0; slot < slots; slot++, hptep++) {
 442                /*
 443                 * we could lock the pte here, but we are the only cpu
 444                 * running,  right?  and for crash dump, we probably
 445                 * don't want to wait for a maybe bad cpu.
 446                 */
 447                hpte_v = hptep->v;
 448
 449                /*
 450                 * Call __tlbie() here rather than tlbie() since we
 451                 * already hold the native_tlbie_lock.
 452                 */
 453                if (hpte_v & HPTE_V_VALID) {
 454                        hpte_decode(hptep, slot, &psize, &ssize, &va);
 455                        hptep->v = 0;
 456                        __tlbie(va, psize, ssize);
 457                }
 458        }
 459
 460        asm volatile("eieio; tlbsync; ptesync":::"memory");
 461        spin_unlock(&native_tlbie_lock);
 462        local_irq_restore(flags);
 463}
 464
 465/*
 466 * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
 467 * the lock all the time
 468 */
 469static void native_flush_hash_range(unsigned long number, int local)
 470{
 471        unsigned long va, hash, index, hidx, shift, slot;
 472        struct hash_pte *hptep;
 473        unsigned long hpte_v;
 474        unsigned long want_v;
 475        unsigned long flags;
 476        real_pte_t pte;
 477        struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
 478        unsigned long psize = batch->psize;
 479        int ssize = batch->ssize;
 480        int i;
 481
 482        local_irq_save(flags);
 483
 484        for (i = 0; i < number; i++) {
 485                va = batch->vaddr[i];
 486                pte = batch->pte[i];
 487
 488                pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
 489                        hash = hpt_hash(va, shift, ssize);
 490                        hidx = __rpte_to_hidx(pte, index);
 491                        if (hidx & _PTEIDX_SECONDARY)
 492                                hash = ~hash;
 493                        slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 494                        slot += hidx & _PTEIDX_GROUP_IX;
 495                        hptep = htab_address + slot;
 496                        want_v = hpte_encode_v(va, psize, ssize);
 497                        native_lock_hpte(hptep);
 498                        hpte_v = hptep->v;
 499                        if (!HPTE_V_COMPARE(hpte_v, want_v) ||
 500                            !(hpte_v & HPTE_V_VALID))
 501                                native_unlock_hpte(hptep);
 502                        else
 503                                hptep->v = 0;
 504                } pte_iterate_hashed_end();
 505        }
 506
 507        if (cpu_has_feature(CPU_FTR_TLBIEL) &&
 508            mmu_psize_defs[psize].tlbiel && local) {
 509                asm volatile("ptesync":::"memory");
 510                for (i = 0; i < number; i++) {
 511                        va = batch->vaddr[i];
 512                        pte = batch->pte[i];
 513
 514                        pte_iterate_hashed_subpages(pte, psize, va, index,
 515                                                    shift) {
 516                                __tlbiel(va, psize, ssize);
 517                        } pte_iterate_hashed_end();
 518                }
 519                asm volatile("ptesync":::"memory");
 520        } else {
 521                int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
 522
 523                if (lock_tlbie)
 524                        spin_lock(&native_tlbie_lock);
 525
 526                asm volatile("ptesync":::"memory");
 527                for (i = 0; i < number; i++) {
 528                        va = batch->vaddr[i];
 529                        pte = batch->pte[i];
 530
 531                        pte_iterate_hashed_subpages(pte, psize, va, index,
 532                                                    shift) {
 533                                __tlbie(va, psize, ssize);
 534                        } pte_iterate_hashed_end();
 535                }
 536                asm volatile("eieio; tlbsync; ptesync":::"memory");
 537
 538                if (lock_tlbie)
 539                        spin_unlock(&native_tlbie_lock);
 540        }
 541
 542        local_irq_restore(flags);
 543}
 544
 545#ifdef CONFIG_PPC_PSERIES
 546/* Disable TLB batching on nighthawk */
 547static inline int tlb_batching_enabled(void)
 548{
 549        struct device_node *root = of_find_node_by_path("/");
 550        int enabled = 1;
 551
 552        if (root) {
 553                const char *model = of_get_property(root, "model", NULL);
 554                if (model && !strcmp(model, "IBM,9076-N81"))
 555                        enabled = 0;
 556                of_node_put(root);
 557        }
 558
 559        return enabled;
 560}
 561#else
 562static inline int tlb_batching_enabled(void)
 563{
 564        return 1;
 565}
 566#endif
 567
 568void __init hpte_init_native(void)
 569{
 570        ppc_md.hpte_invalidate  = native_hpte_invalidate;
 571        ppc_md.hpte_updatepp    = native_hpte_updatepp;
 572        ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
 573        ppc_md.hpte_insert      = native_hpte_insert;
 574        ppc_md.hpte_remove      = native_hpte_remove;
 575        ppc_md.hpte_clear_all   = native_hpte_clear;
 576        if (tlb_batching_enabled())
 577                ppc_md.flush_hash_range = native_flush_hash_range;
 578}
 579