LXR linux/arch/sparc/mm/tsb.c

   1/* arch/sparc64/mm/tsb.c
   2 *
   3 * Copyright (C) 2006, 2008 David S. Miller <davem@davemloft.net>
   4 */
   5
   6#include <linux/kernel.h>
   7#include <linux/preempt.h>
   8#include <linux/slab.h>
   9#include <asm/page.h>
  10#include <asm/pgtable.h>
  11#include <asm/mmu_context.h>
  12#include <asm/tsb.h>
  13#include <asm/tlb.h>
  14#include <asm/oplib.h>
  15
  16extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
  17
  18static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long hash_shift, unsigned long nentries)
  19{
  20        vaddr >>= hash_shift;
  21        return vaddr & (nentries - 1);
  22}
  23
  24static inline int tag_compare(unsigned long tag, unsigned long vaddr)
  25{
  26        return (tag == (vaddr >> 22));
  27}
  28
  29/* TSB flushes need only occur on the processor initiating the address
  30 * space modification, not on each cpu the address space has run on.
  31 * Only the TLB flush needs that treatment.
  32 */
  33
  34void flush_tsb_kernel_range(unsigned long start, unsigned long end)
  35{
  36        unsigned long v;
  37
  38        for (v = start; v < end; v += PAGE_SIZE) {
  39                unsigned long hash = tsb_hash(v, PAGE_SHIFT,
  40                                              KERNEL_TSB_NENTRIES);
  41                struct tsb *ent = &swapper_tsb[hash];
  42
  43                if (tag_compare(ent->tag, v))
  44                        ent->tag = (1UL << TSB_TAG_INVALID_BIT);
  45        }
  46}
  47
  48static void __flush_tsb_one_entry(unsigned long tsb, unsigned long v,
  49                                  unsigned long hash_shift,
  50                                  unsigned long nentries)
  51{
  52        unsigned long tag, ent, hash;
  53
  54        v &= ~0x1UL;
  55        hash = tsb_hash(v, hash_shift, nentries);
  56        ent = tsb + (hash * sizeof(struct tsb));
  57        tag = (v >> 22UL);
  58
  59        tsb_flush(ent, tag);
  60}
  61
  62static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift,
  63                            unsigned long tsb, unsigned long nentries)
  64{
  65        unsigned long i;
  66
  67        for (i = 0; i < tb->tlb_nr; i++)
  68                __flush_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift, nentries);
  69}
  70
  71void flush_tsb_user(struct tlb_batch *tb)
  72{
  73        struct mm_struct *mm = tb->mm;
  74        unsigned long nentries, base, flags;
  75
  76        spin_lock_irqsave(&mm->context.lock, flags);
  77
  78        base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
  79        nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
  80        if (tlb_type == cheetah_plus || tlb_type == hypervisor)
  81                base = __pa(base);
  82        __flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
  83
  84#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
  85        if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
  86                base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
  87                nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
  88                if (tlb_type == cheetah_plus || tlb_type == hypervisor)
  89                        base = __pa(base);
  90                __flush_tsb_one(tb, HPAGE_SHIFT, base, nentries);
  91        }
  92#endif
  93        spin_unlock_irqrestore(&mm->context.lock, flags);
  94}
  95
  96void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr)
  97{
  98        unsigned long nentries, base, flags;
  99
 100        spin_lock_irqsave(&mm->context.lock, flags);
 101
 102        base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
 103        nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
 104        if (tlb_type == cheetah_plus || tlb_type == hypervisor)
 105                base = __pa(base);
 106        __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries);
 107
 108#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 109        if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
 110                base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
 111                nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
 112                if (tlb_type == cheetah_plus || tlb_type == hypervisor)
 113                        base = __pa(base);
 114                __flush_tsb_one_entry(base, vaddr, HPAGE_SHIFT, nentries);
 115        }
 116#endif
 117        spin_unlock_irqrestore(&mm->context.lock, flags);
 118}
 119
 120#define HV_PGSZ_IDX_BASE        HV_PGSZ_IDX_8K
 121#define HV_PGSZ_MASK_BASE       HV_PGSZ_MASK_8K
 122
 123#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 124#define HV_PGSZ_IDX_HUGE        HV_PGSZ_IDX_4MB
 125#define HV_PGSZ_MASK_HUGE       HV_PGSZ_MASK_4MB
 126#endif
 127
 128static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes)
 129{
 130        unsigned long tsb_reg, base, tsb_paddr;
 131        unsigned long page_sz, tte;
 132
 133        mm->context.tsb_block[tsb_idx].tsb_nentries =
 134                tsb_bytes / sizeof(struct tsb);
 135
 136        base = TSBMAP_BASE;
 137        tte = pgprot_val(PAGE_KERNEL_LOCKED);
 138        tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb);
 139        BUG_ON(tsb_paddr & (tsb_bytes - 1UL));
 140
 141        /* Use the smallest page size that can map the whole TSB
 142         * in one TLB entry.
 143         */
 144        switch (tsb_bytes) {
 145        case 8192 << 0:
 146                tsb_reg = 0x0UL;
 147#ifdef DCACHE_ALIASING_POSSIBLE
 148                base += (tsb_paddr & 8192);
 149#endif
 150                page_sz = 8192;
 151                break;
 152
 153        case 8192 << 1:
 154                tsb_reg = 0x1UL;
 155                page_sz = 64 * 1024;
 156                break;
 157
 158        case 8192 << 2:
 159                tsb_reg = 0x2UL;
 160                page_sz = 64 * 1024;
 161                break;
 162
 163        case 8192 << 3:
 164                tsb_reg = 0x3UL;
 165                page_sz = 64 * 1024;
 166                break;
 167
 168        case 8192 << 4:
 169                tsb_reg = 0x4UL;
 170                page_sz = 512 * 1024;
 171                break;
 172
 173        case 8192 << 5:
 174                tsb_reg = 0x5UL;
 175                page_sz = 512 * 1024;
 176                break;
 177
 178        case 8192 << 6:
 179                tsb_reg = 0x6UL;
 180                page_sz = 512 * 1024;
 181                break;
 182
 183        case 8192 << 7:
 184                tsb_reg = 0x7UL;
 185                page_sz = 4 * 1024 * 1024;
 186                break;
 187
 188        default:
 189                printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n",
 190                       current->comm, current->pid, tsb_bytes);
 191                do_exit(SIGSEGV);
 192        }
 193        tte |= pte_sz_bits(page_sz);
 194
 195        if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
 196                /* Physical mapping, no locked TLB entry for TSB.  */
 197                tsb_reg |= tsb_paddr;
 198
 199                mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
 200                mm->context.tsb_block[tsb_idx].tsb_map_vaddr = 0;
 201                mm->context.tsb_block[tsb_idx].tsb_map_pte = 0;
 202        } else {
 203                tsb_reg |= base;
 204                tsb_reg |= (tsb_paddr & (page_sz - 1UL));
 205                tte |= (tsb_paddr & ~(page_sz - 1UL));
 206
 207                mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
 208                mm->context.tsb_block[tsb_idx].tsb_map_vaddr = base;
 209                mm->context.tsb_block[tsb_idx].tsb_map_pte = tte;
 210        }
 211
 212        /* Setup the Hypervisor TSB descriptor.  */
 213        if (tlb_type == hypervisor) {
 214                struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx];
 215
 216                switch (tsb_idx) {
 217                case MM_TSB_BASE:
 218                        hp->pgsz_idx = HV_PGSZ_IDX_BASE;
 219                        break;
 220#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 221                case MM_TSB_HUGE:
 222                        hp->pgsz_idx = HV_PGSZ_IDX_HUGE;
 223                        break;
 224#endif
 225                default:
 226                        BUG();
 227                }
 228                hp->assoc = 1;
 229                hp->num_ttes = tsb_bytes / 16;
 230                hp->ctx_idx = 0;
 231                switch (tsb_idx) {
 232                case MM_TSB_BASE:
 233                        hp->pgsz_mask = HV_PGSZ_MASK_BASE;
 234                        break;
 235#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 236                case MM_TSB_HUGE:
 237                        hp->pgsz_mask = HV_PGSZ_MASK_HUGE;
 238                        break;
 239#endif
 240                default:
 241                        BUG();
 242                }
 243                hp->tsb_base = tsb_paddr;
 244                hp->resv = 0;
 245        }
 246}
 247
 248struct kmem_cache *pgtable_cache __read_mostly;
 249
 250static struct kmem_cache *tsb_caches[8] __read_mostly;
 251
 252static const char *tsb_cache_names[8] = {
 253        "tsb_8KB",
 254        "tsb_16KB",
 255        "tsb_32KB",
 256        "tsb_64KB",
 257        "tsb_128KB",
 258        "tsb_256KB",
 259        "tsb_512KB",
 260        "tsb_1MB",
 261};
 262
 263void __init pgtable_cache_init(void)
 264{
 265        unsigned long i;
 266
 267        pgtable_cache = kmem_cache_create("pgtable_cache",
 268                                          PAGE_SIZE, PAGE_SIZE,
 269                                          0,
 270                                          _clear_page);
 271        if (!pgtable_cache) {
 272                prom_printf("pgtable_cache_init(): Could not create!\n");
 273                prom_halt();
 274        }
 275
 276        for (i = 0; i < 8; i++) {
 277                unsigned long size = 8192 << i;
 278                const char *name = tsb_cache_names[i];
 279
 280                tsb_caches[i] = kmem_cache_create(name,
 281                                                  size, size,
 282                                                  0, NULL);
 283                if (!tsb_caches[i]) {
 284                        prom_printf("Could not create %s cache\n", name);
 285                        prom_halt();
 286                }
 287        }
 288}
 289
 290int sysctl_tsb_ratio = -2;
 291
 292static unsigned long tsb_size_to_rss_limit(unsigned long new_size)
 293{
 294        unsigned long num_ents = (new_size / sizeof(struct tsb));
 295
 296        if (sysctl_tsb_ratio < 0)
 297                return num_ents - (num_ents >> -sysctl_tsb_ratio);
 298        else
 299                return num_ents + (num_ents >> sysctl_tsb_ratio);
 300}
 301
 302/* When the RSS of an address space exceeds tsb_rss_limit for a TSB,
 303 * do_sparc64_fault() invokes this routine to try and grow it.
 304 *
 305 * When we reach the maximum TSB size supported, we stick ~0UL into
 306 * tsb_rss_limit for that TSB so the grow checks in do_sparc64_fault()
 307 * will not trigger any longer.
 308 *
 309 * The TSB can be anywhere from 8K to 1MB in size, in increasing powers
 310 * of two.  The TSB must be aligned to it's size, so f.e. a 512K TSB
 311 * must be 512K aligned.  It also must be physically contiguous, so we
 312 * cannot use vmalloc().
 313 *
 314 * The idea here is to grow the TSB when the RSS of the process approaches
 315 * the number of entries that the current TSB can hold at once.  Currently,
 316 * we trigger when the RSS hits 3/4 of the TSB capacity.
 317 */
 318void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
 319{
 320        unsigned long max_tsb_size = 1 * 1024 * 1024;
 321        unsigned long new_size, old_size, flags;
 322        struct tsb *old_tsb, *new_tsb;
 323        unsigned long new_cache_index, old_cache_index;
 324        unsigned long new_rss_limit;
 325        gfp_t gfp_flags;
 326
 327        if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))
 328                max_tsb_size = (PAGE_SIZE << MAX_ORDER);
 329
 330        new_cache_index = 0;
 331        for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) {
 332                new_rss_limit = tsb_size_to_rss_limit(new_size);
 333                if (new_rss_limit > rss)
 334                        break;
 335                new_cache_index++;
 336        }
 337
 338        if (new_size == max_tsb_size)
 339                new_rss_limit = ~0UL;
 340
 341retry_tsb_alloc:
 342        gfp_flags = GFP_KERNEL;
 343        if (new_size > (PAGE_SIZE * 2))
 344                gfp_flags |= __GFP_NOWARN | __GFP_NORETRY;
 345
 346        new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index],
 347                                        gfp_flags, numa_node_id());
 348        if (unlikely(!new_tsb)) {
 349                /* Not being able to fork due to a high-order TSB
 350                 * allocation failure is very bad behavior.  Just back
 351                 * down to a 0-order allocation and force no TSB
 352                 * growing for this address space.
 353                 */
 354                if (mm->context.tsb_block[tsb_index].tsb == NULL &&
 355                    new_cache_index > 0) {
 356                        new_cache_index = 0;
 357                        new_size = 8192;
 358                        new_rss_limit = ~0UL;
 359                        goto retry_tsb_alloc;
 360                }
 361
 362                /* If we failed on a TSB grow, we are under serious
 363                 * memory pressure so don't try to grow any more.
 364                 */
 365                if (mm->context.tsb_block[tsb_index].tsb != NULL)
 366                        mm->context.tsb_block[tsb_index].tsb_rss_limit = ~0UL;
 367                return;
 368        }
 369
 370        /* Mark all tags as invalid.  */
 371        tsb_init(new_tsb, new_size);
 372
 373        /* Ok, we are about to commit the changes.  If we are
 374         * growing an existing TSB the locking is very tricky,
 375         * so WATCH OUT!
 376         *
 377         * We have to hold mm->context.lock while committing to the
 378         * new TSB, this synchronizes us with processors in
 379         * flush_tsb_user() and switch_mm() for this address space.
 380         *
 381         * But even with that lock held, processors run asynchronously
 382         * accessing the old TSB via TLB miss handling.  This is OK
 383         * because those actions are just propagating state from the
 384         * Linux page tables into the TSB, page table mappings are not
 385         * being changed.  If a real fault occurs, the processor will
 386         * synchronize with us when it hits flush_tsb_user(), this is
 387         * also true for the case where vmscan is modifying the page
 388         * tables.  The only thing we need to be careful with is to
 389         * skip any locked TSB entries during copy_tsb().
 390         *
 391         * When we finish committing to the new TSB, we have to drop
 392         * the lock and ask all other cpus running this address space
 393         * to run tsb_context_switch() to see the new TSB table.
 394         */
 395        spin_lock_irqsave(&mm->context.lock, flags);
 396
 397        old_tsb = mm->context.tsb_block[tsb_index].tsb;
 398        old_cache_index =
 399                (mm->context.tsb_block[tsb_index].tsb_reg_val & 0x7UL);
 400        old_size = (mm->context.tsb_block[tsb_index].tsb_nentries *
 401                    sizeof(struct tsb));
 402
 403
 404        /* Handle multiple threads trying to grow the TSB at the same time.
 405         * One will get in here first, and bump the size and the RSS limit.
 406         * The others will get in here next and hit this check.
 407         */
 408        if (unlikely(old_tsb &&
 409                     (rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) {
 410                spin_unlock_irqrestore(&mm->context.lock, flags);
 411
 412                kmem_cache_free(tsb_caches[new_cache_index], new_tsb);
 413                return;
 414        }
 415
 416        mm->context.tsb_block[tsb_index].tsb_rss_limit = new_rss_limit;
 417
 418        if (old_tsb) {
 419                extern void copy_tsb(unsigned long old_tsb_base,
 420                                     unsigned long old_tsb_size,
 421                                     unsigned long new_tsb_base,
 422                                     unsigned long new_tsb_size);
 423                unsigned long old_tsb_base = (unsigned long) old_tsb;
 424                unsigned long new_tsb_base = (unsigned long) new_tsb;
 425
 426                if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
 427                        old_tsb_base = __pa(old_tsb_base);
 428                        new_tsb_base = __pa(new_tsb_base);
 429                }
 430                copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size);
 431        }
 432
 433        mm->context.tsb_block[tsb_index].tsb = new_tsb;
 434        setup_tsb_params(mm, tsb_index, new_size);
 435
 436        spin_unlock_irqrestore(&mm->context.lock, flags);
 437
 438        /* If old_tsb is NULL, we're being invoked for the first time
 439         * from init_new_context().
 440         */
 441        if (old_tsb) {
 442                /* Reload it on the local cpu.  */
 443                tsb_context_switch(mm);
 444
 445                /* Now force other processors to do the same.  */
 446                preempt_disable();
 447                smp_tsb_sync(mm);
 448                preempt_enable();
 449
 450                /* Now it is safe to free the old tsb.  */
 451                kmem_cache_free(tsb_caches[old_cache_index], old_tsb);
 452        }
 453}
 454
 455int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 456{
 457#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 458        unsigned long huge_pte_count;
 459#endif
 460        unsigned int i;
 461
 462        spin_lock_init(&mm->context.lock);
 463
 464        mm->context.sparc64_ctx_val = 0UL;
 465
 466#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 467        /* We reset it to zero because the fork() page copying
 468         * will re-increment the counters as the parent PTEs are
 469         * copied into the child address space.
 470         */
 471        huge_pte_count = mm->context.huge_pte_count;
 472        mm->context.huge_pte_count = 0;
 473#endif
 474
 475        mm->context.pgtable_page = NULL;
 476
 477        /* copy_mm() copies over the parent's mm_struct before calling
 478         * us, so we need to zero out the TSB pointer or else tsb_grow()
 479         * will be confused and think there is an older TSB to free up.
 480         */
 481        for (i = 0; i < MM_NUM_TSBS; i++)
 482                mm->context.tsb_block[i].tsb = NULL;
 483
 484        /* If this is fork, inherit the parent's TSB size.  We would
 485         * grow it to that size on the first page fault anyways.
 486         */
 487        tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
 488
 489#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 490        if (unlikely(huge_pte_count))
 491                tsb_grow(mm, MM_TSB_HUGE, huge_pte_count);
 492#endif
 493
 494        if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
 495                return -ENOMEM;
 496
 497        return 0;
 498}
 499
 500static void tsb_destroy_one(struct tsb_config *tp)
 501{
 502        unsigned long cache_index;
 503
 504        if (!tp->tsb)
 505                return;
 506        cache_index = tp->tsb_reg_val & 0x7UL;
 507        kmem_cache_free(tsb_caches[cache_index], tp->tsb);
 508        tp->tsb = NULL;
 509        tp->tsb_reg_val = 0UL;
 510}
 511
 512void destroy_context(struct mm_struct *mm)
 513{
 514        unsigned long flags, i;
 515        struct page *page;
 516
 517        for (i = 0; i < MM_NUM_TSBS; i++)
 518                tsb_destroy_one(&mm->context.tsb_block[i]);
 519
 520        page = mm->context.pgtable_page;
 521        if (page && put_page_testzero(page)) {
 522                pgtable_page_dtor(page);
 523                free_hot_cold_page(page, 0);
 524        }
 525
 526        spin_lock_irqsave(&ctx_alloc_lock, flags);
 527
 528        if (CTX_VALID(mm->context)) {
 529                unsigned long nr = CTX_NRBITS(mm->context);
 530                mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63));
 531        }
 532
 533        spin_unlock_irqrestore(&ctx_alloc_lock, flags);
 534}
 535