linux/arch/sparc/mm/tsb.c
<<
>>
Prefs
   1/* arch/sparc64/mm/tsb.c
   2 *
   3 * Copyright (C) 2006, 2008 David S. Miller <davem@davemloft.net>
   4 */
   5
   6#include <linux/kernel.h>
   7#include <linux/preempt.h>
   8#include <linux/slab.h>
   9#include <asm/system.h>
  10#include <asm/page.h>
  11#include <asm/tlbflush.h>
  12#include <asm/tlb.h>
  13#include <asm/mmu_context.h>
  14#include <asm/pgtable.h>
  15#include <asm/tsb.h>
  16#include <asm/oplib.h>
  17
  18extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
  19
  20static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long hash_shift, unsigned long nentries)
  21{
  22        vaddr >>= hash_shift;
  23        return vaddr & (nentries - 1);
  24}
  25
  26static inline int tag_compare(unsigned long tag, unsigned long vaddr)
  27{
  28        return (tag == (vaddr >> 22));
  29}
  30
  31/* TSB flushes need only occur on the processor initiating the address
  32 * space modification, not on each cpu the address space has run on.
  33 * Only the TLB flush needs that treatment.
  34 */
  35
  36void flush_tsb_kernel_range(unsigned long start, unsigned long end)
  37{
  38        unsigned long v;
  39
  40        for (v = start; v < end; v += PAGE_SIZE) {
  41                unsigned long hash = tsb_hash(v, PAGE_SHIFT,
  42                                              KERNEL_TSB_NENTRIES);
  43                struct tsb *ent = &swapper_tsb[hash];
  44
  45                if (tag_compare(ent->tag, v))
  46                        ent->tag = (1UL << TSB_TAG_INVALID_BIT);
  47        }
  48}
  49
  50static void __flush_tsb_one(struct mmu_gather *mp, unsigned long hash_shift, unsigned long tsb, unsigned long nentries)
  51{
  52        unsigned long i;
  53
  54        for (i = 0; i < mp->tlb_nr; i++) {
  55                unsigned long v = mp->vaddrs[i];
  56                unsigned long tag, ent, hash;
  57
  58                v &= ~0x1UL;
  59
  60                hash = tsb_hash(v, hash_shift, nentries);
  61                ent = tsb + (hash * sizeof(struct tsb));
  62                tag = (v >> 22UL);
  63
  64                tsb_flush(ent, tag);
  65        }
  66}
  67
  68void flush_tsb_user(struct mmu_gather *mp)
  69{
  70        struct mm_struct *mm = mp->mm;
  71        unsigned long nentries, base, flags;
  72
  73        spin_lock_irqsave(&mm->context.lock, flags);
  74
  75        base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
  76        nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
  77        if (tlb_type == cheetah_plus || tlb_type == hypervisor)
  78                base = __pa(base);
  79        __flush_tsb_one(mp, PAGE_SHIFT, base, nentries);
  80
  81#ifdef CONFIG_HUGETLB_PAGE
  82        if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
  83                base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
  84                nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
  85                if (tlb_type == cheetah_plus || tlb_type == hypervisor)
  86                        base = __pa(base);
  87                __flush_tsb_one(mp, HPAGE_SHIFT, base, nentries);
  88        }
  89#endif
  90        spin_unlock_irqrestore(&mm->context.lock, flags);
  91}
  92
  93#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB)
  94#define HV_PGSZ_IDX_BASE        HV_PGSZ_IDX_8K
  95#define HV_PGSZ_MASK_BASE       HV_PGSZ_MASK_8K
  96#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB)
  97#define HV_PGSZ_IDX_BASE        HV_PGSZ_IDX_64K
  98#define HV_PGSZ_MASK_BASE       HV_PGSZ_MASK_64K
  99#else
 100#error Broken base page size setting...
 101#endif
 102
 103#ifdef CONFIG_HUGETLB_PAGE
 104#if defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
 105#define HV_PGSZ_IDX_HUGE        HV_PGSZ_IDX_64K
 106#define HV_PGSZ_MASK_HUGE       HV_PGSZ_MASK_64K
 107#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
 108#define HV_PGSZ_IDX_HUGE        HV_PGSZ_IDX_512K
 109#define HV_PGSZ_MASK_HUGE       HV_PGSZ_MASK_512K
 110#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
 111#define HV_PGSZ_IDX_HUGE        HV_PGSZ_IDX_4MB
 112#define HV_PGSZ_MASK_HUGE       HV_PGSZ_MASK_4MB
 113#else
 114#error Broken huge page size setting...
 115#endif
 116#endif
 117
 118static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes)
 119{
 120        unsigned long tsb_reg, base, tsb_paddr;
 121        unsigned long page_sz, tte;
 122
 123        mm->context.tsb_block[tsb_idx].tsb_nentries =
 124                tsb_bytes / sizeof(struct tsb);
 125
 126        base = TSBMAP_BASE;
 127        tte = pgprot_val(PAGE_KERNEL_LOCKED);
 128        tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb);
 129        BUG_ON(tsb_paddr & (tsb_bytes - 1UL));
 130
 131        /* Use the smallest page size that can map the whole TSB
 132         * in one TLB entry.
 133         */
 134        switch (tsb_bytes) {
 135        case 8192 << 0:
 136                tsb_reg = 0x0UL;
 137#ifdef DCACHE_ALIASING_POSSIBLE
 138                base += (tsb_paddr & 8192);
 139#endif
 140                page_sz = 8192;
 141                break;
 142
 143        case 8192 << 1:
 144                tsb_reg = 0x1UL;
 145                page_sz = 64 * 1024;
 146                break;
 147
 148        case 8192 << 2:
 149                tsb_reg = 0x2UL;
 150                page_sz = 64 * 1024;
 151                break;
 152
 153        case 8192 << 3:
 154                tsb_reg = 0x3UL;
 155                page_sz = 64 * 1024;
 156                break;
 157
 158        case 8192 << 4:
 159                tsb_reg = 0x4UL;
 160                page_sz = 512 * 1024;
 161                break;
 162
 163        case 8192 << 5:
 164                tsb_reg = 0x5UL;
 165                page_sz = 512 * 1024;
 166                break;
 167
 168        case 8192 << 6:
 169                tsb_reg = 0x6UL;
 170                page_sz = 512 * 1024;
 171                break;
 172
 173        case 8192 << 7:
 174                tsb_reg = 0x7UL;
 175                page_sz = 4 * 1024 * 1024;
 176                break;
 177
 178        default:
 179                printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n",
 180                       current->comm, current->pid, tsb_bytes);
 181                do_exit(SIGSEGV);
 182        };
 183        tte |= pte_sz_bits(page_sz);
 184
 185        if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
 186                /* Physical mapping, no locked TLB entry for TSB.  */
 187                tsb_reg |= tsb_paddr;
 188
 189                mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
 190                mm->context.tsb_block[tsb_idx].tsb_map_vaddr = 0;
 191                mm->context.tsb_block[tsb_idx].tsb_map_pte = 0;
 192        } else {
 193                tsb_reg |= base;
 194                tsb_reg |= (tsb_paddr & (page_sz - 1UL));
 195                tte |= (tsb_paddr & ~(page_sz - 1UL));
 196
 197                mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
 198                mm->context.tsb_block[tsb_idx].tsb_map_vaddr = base;
 199                mm->context.tsb_block[tsb_idx].tsb_map_pte = tte;
 200        }
 201
 202        /* Setup the Hypervisor TSB descriptor.  */
 203        if (tlb_type == hypervisor) {
 204                struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx];
 205
 206                switch (tsb_idx) {
 207                case MM_TSB_BASE:
 208                        hp->pgsz_idx = HV_PGSZ_IDX_BASE;
 209                        break;
 210#ifdef CONFIG_HUGETLB_PAGE
 211                case MM_TSB_HUGE:
 212                        hp->pgsz_idx = HV_PGSZ_IDX_HUGE;
 213                        break;
 214#endif
 215                default:
 216                        BUG();
 217                };
 218                hp->assoc = 1;
 219                hp->num_ttes = tsb_bytes / 16;
 220                hp->ctx_idx = 0;
 221                switch (tsb_idx) {
 222                case MM_TSB_BASE:
 223                        hp->pgsz_mask = HV_PGSZ_MASK_BASE;
 224                        break;
 225#ifdef CONFIG_HUGETLB_PAGE
 226                case MM_TSB_HUGE:
 227                        hp->pgsz_mask = HV_PGSZ_MASK_HUGE;
 228                        break;
 229#endif
 230                default:
 231                        BUG();
 232                };
 233                hp->tsb_base = tsb_paddr;
 234                hp->resv = 0;
 235        }
 236}
 237
 238static struct kmem_cache *tsb_caches[8] __read_mostly;
 239
 240static const char *tsb_cache_names[8] = {
 241        "tsb_8KB",
 242        "tsb_16KB",
 243        "tsb_32KB",
 244        "tsb_64KB",
 245        "tsb_128KB",
 246        "tsb_256KB",
 247        "tsb_512KB",
 248        "tsb_1MB",
 249};
 250
 251void __init pgtable_cache_init(void)
 252{
 253        unsigned long i;
 254
 255        for (i = 0; i < 8; i++) {
 256                unsigned long size = 8192 << i;
 257                const char *name = tsb_cache_names[i];
 258
 259                tsb_caches[i] = kmem_cache_create(name,
 260                                                  size, size,
 261                                                  0, NULL);
 262                if (!tsb_caches[i]) {
 263                        prom_printf("Could not create %s cache\n", name);
 264                        prom_halt();
 265                }
 266        }
 267}
 268
 269int sysctl_tsb_ratio = -2;
 270
 271static unsigned long tsb_size_to_rss_limit(unsigned long new_size)
 272{
 273        unsigned long num_ents = (new_size / sizeof(struct tsb));
 274
 275        if (sysctl_tsb_ratio < 0)
 276                return num_ents - (num_ents >> -sysctl_tsb_ratio);
 277        else
 278                return num_ents + (num_ents >> sysctl_tsb_ratio);
 279}
 280
 281/* When the RSS of an address space exceeds tsb_rss_limit for a TSB,
 282 * do_sparc64_fault() invokes this routine to try and grow it.
 283 *
 284 * When we reach the maximum TSB size supported, we stick ~0UL into
 285 * tsb_rss_limit for that TSB so the grow checks in do_sparc64_fault()
 286 * will not trigger any longer.
 287 *
 288 * The TSB can be anywhere from 8K to 1MB in size, in increasing powers
 289 * of two.  The TSB must be aligned to it's size, so f.e. a 512K TSB
 290 * must be 512K aligned.  It also must be physically contiguous, so we
 291 * cannot use vmalloc().
 292 *
 293 * The idea here is to grow the TSB when the RSS of the process approaches
 294 * the number of entries that the current TSB can hold at once.  Currently,
 295 * we trigger when the RSS hits 3/4 of the TSB capacity.
 296 */
 297void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
 298{
 299        unsigned long max_tsb_size = 1 * 1024 * 1024;
 300        unsigned long new_size, old_size, flags;
 301        struct tsb *old_tsb, *new_tsb;
 302        unsigned long new_cache_index, old_cache_index;
 303        unsigned long new_rss_limit;
 304        gfp_t gfp_flags;
 305
 306        if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))
 307                max_tsb_size = (PAGE_SIZE << MAX_ORDER);
 308
 309        new_cache_index = 0;
 310        for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) {
 311                new_rss_limit = tsb_size_to_rss_limit(new_size);
 312                if (new_rss_limit > rss)
 313                        break;
 314                new_cache_index++;
 315        }
 316
 317        if (new_size == max_tsb_size)
 318                new_rss_limit = ~0UL;
 319
 320retry_tsb_alloc:
 321        gfp_flags = GFP_KERNEL;
 322        if (new_size > (PAGE_SIZE * 2))
 323                gfp_flags = __GFP_NOWARN | __GFP_NORETRY;
 324
 325        new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index],
 326                                        gfp_flags, numa_node_id());
 327        if (unlikely(!new_tsb)) {
 328                /* Not being able to fork due to a high-order TSB
 329                 * allocation failure is very bad behavior.  Just back
 330                 * down to a 0-order allocation and force no TSB
 331                 * growing for this address space.
 332                 */
 333                if (mm->context.tsb_block[tsb_index].tsb == NULL &&
 334                    new_cache_index > 0) {
 335                        new_cache_index = 0;
 336                        new_size = 8192;
 337                        new_rss_limit = ~0UL;
 338                        goto retry_tsb_alloc;
 339                }
 340
 341                /* If we failed on a TSB grow, we are under serious
 342                 * memory pressure so don't try to grow any more.
 343                 */
 344                if (mm->context.tsb_block[tsb_index].tsb != NULL)
 345                        mm->context.tsb_block[tsb_index].tsb_rss_limit = ~0UL;
 346                return;
 347        }
 348
 349        /* Mark all tags as invalid.  */
 350        tsb_init(new_tsb, new_size);
 351
 352        /* Ok, we are about to commit the changes.  If we are
 353         * growing an existing TSB the locking is very tricky,
 354         * so WATCH OUT!
 355         *
 356         * We have to hold mm->context.lock while committing to the
 357         * new TSB, this synchronizes us with processors in
 358         * flush_tsb_user() and switch_mm() for this address space.
 359         *
 360         * But even with that lock held, processors run asynchronously
 361         * accessing the old TSB via TLB miss handling.  This is OK
 362         * because those actions are just propagating state from the
 363         * Linux page tables into the TSB, page table mappings are not
 364         * being changed.  If a real fault occurs, the processor will
 365         * synchronize with us when it hits flush_tsb_user(), this is
 366         * also true for the case where vmscan is modifying the page
 367         * tables.  The only thing we need to be careful with is to
 368         * skip any locked TSB entries during copy_tsb().
 369         *
 370         * When we finish committing to the new TSB, we have to drop
 371         * the lock and ask all other cpus running this address space
 372         * to run tsb_context_switch() to see the new TSB table.
 373         */
 374        spin_lock_irqsave(&mm->context.lock, flags);
 375
 376        old_tsb = mm->context.tsb_block[tsb_index].tsb;
 377        old_cache_index =
 378                (mm->context.tsb_block[tsb_index].tsb_reg_val & 0x7UL);
 379        old_size = (mm->context.tsb_block[tsb_index].tsb_nentries *
 380                    sizeof(struct tsb));
 381
 382
 383        /* Handle multiple threads trying to grow the TSB at the same time.
 384         * One will get in here first, and bump the size and the RSS limit.
 385         * The others will get in here next and hit this check.
 386         */
 387        if (unlikely(old_tsb &&
 388                     (rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) {
 389                spin_unlock_irqrestore(&mm->context.lock, flags);
 390
 391                kmem_cache_free(tsb_caches[new_cache_index], new_tsb);
 392                return;
 393        }
 394
 395        mm->context.tsb_block[tsb_index].tsb_rss_limit = new_rss_limit;
 396
 397        if (old_tsb) {
 398                extern void copy_tsb(unsigned long old_tsb_base,
 399                                     unsigned long old_tsb_size,
 400                                     unsigned long new_tsb_base,
 401                                     unsigned long new_tsb_size);
 402                unsigned long old_tsb_base = (unsigned long) old_tsb;
 403                unsigned long new_tsb_base = (unsigned long) new_tsb;
 404
 405                if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
 406                        old_tsb_base = __pa(old_tsb_base);
 407                        new_tsb_base = __pa(new_tsb_base);
 408                }
 409                copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size);
 410        }
 411
 412        mm->context.tsb_block[tsb_index].tsb = new_tsb;
 413        setup_tsb_params(mm, tsb_index, new_size);
 414
 415        spin_unlock_irqrestore(&mm->context.lock, flags);
 416
 417        /* If old_tsb is NULL, we're being invoked for the first time
 418         * from init_new_context().
 419         */
 420        if (old_tsb) {
 421                /* Reload it on the local cpu.  */
 422                tsb_context_switch(mm);
 423
 424                /* Now force other processors to do the same.  */
 425                preempt_disable();
 426                smp_tsb_sync(mm);
 427                preempt_enable();
 428
 429                /* Now it is safe to free the old tsb.  */
 430                kmem_cache_free(tsb_caches[old_cache_index], old_tsb);
 431        }
 432}
 433
 434int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 435{
 436#ifdef CONFIG_HUGETLB_PAGE
 437        unsigned long huge_pte_count;
 438#endif
 439        unsigned int i;
 440
 441        spin_lock_init(&mm->context.lock);
 442
 443        mm->context.sparc64_ctx_val = 0UL;
 444
 445#ifdef CONFIG_HUGETLB_PAGE
 446        /* We reset it to zero because the fork() page copying
 447         * will re-increment the counters as the parent PTEs are
 448         * copied into the child address space.
 449         */
 450        huge_pte_count = mm->context.huge_pte_count;
 451        mm->context.huge_pte_count = 0;
 452#endif
 453
 454        /* copy_mm() copies over the parent's mm_struct before calling
 455         * us, so we need to zero out the TSB pointer or else tsb_grow()
 456         * will be confused and think there is an older TSB to free up.
 457         */
 458        for (i = 0; i < MM_NUM_TSBS; i++)
 459                mm->context.tsb_block[i].tsb = NULL;
 460
 461        /* If this is fork, inherit the parent's TSB size.  We would
 462         * grow it to that size on the first page fault anyways.
 463         */
 464        tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
 465
 466#ifdef CONFIG_HUGETLB_PAGE
 467        if (unlikely(huge_pte_count))
 468                tsb_grow(mm, MM_TSB_HUGE, huge_pte_count);
 469#endif
 470
 471        if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
 472                return -ENOMEM;
 473
 474        return 0;
 475}
 476
 477static void tsb_destroy_one(struct tsb_config *tp)
 478{
 479        unsigned long cache_index;
 480
 481        if (!tp->tsb)
 482                return;
 483        cache_index = tp->tsb_reg_val & 0x7UL;
 484        kmem_cache_free(tsb_caches[cache_index], tp->tsb);
 485        tp->tsb = NULL;
 486        tp->tsb_reg_val = 0UL;
 487}
 488
 489void destroy_context(struct mm_struct *mm)
 490{
 491        unsigned long flags, i;
 492
 493        for (i = 0; i < MM_NUM_TSBS; i++)
 494                tsb_destroy_one(&mm->context.tsb_block[i]);
 495
 496        spin_lock_irqsave(&ctx_alloc_lock, flags);
 497
 498        if (CTX_VALID(mm->context)) {
 499                unsigned long nr = CTX_NRBITS(mm->context);
 500                mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63));
 501        }
 502
 503        spin_unlock_irqrestore(&ctx_alloc_lock, flags);
 504}
 505