LXR linux/arch/sparc/mm/tsb.c

   1/* arch/sparc64/mm/tsb.c
   2 *
   3 * Copyright (C) 2006, 2008 David S. Miller <davem@davemloft.net>
   4 */
   5
   6#include <linux/kernel.h>
   7#include <linux/preempt.h>
   8#include <linux/slab.h>
   9#include <asm/page.h>
  10#include <asm/tlbflush.h>
  11#include <asm/tlb.h>
  12#include <asm/mmu_context.h>
  13#include <asm/pgtable.h>
  14#include <asm/tsb.h>
  15#include <asm/oplib.h>
  16
  17extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
  18
  19static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long hash_shift, unsigned long nentries)
  20{
  21        vaddr >>= hash_shift;
  22        return vaddr & (nentries - 1);
  23}
  24
  25static inline int tag_compare(unsigned long tag, unsigned long vaddr)
  26{
  27        return (tag == (vaddr >> 22));
  28}
  29
  30/* TSB flushes need only occur on the processor initiating the address
  31 * space modification, not on each cpu the address space has run on.
  32 * Only the TLB flush needs that treatment.
  33 */
  34
  35void flush_tsb_kernel_range(unsigned long start, unsigned long end)
  36{
  37        unsigned long v;
  38
  39        for (v = start; v < end; v += PAGE_SIZE) {
  40                unsigned long hash = tsb_hash(v, PAGE_SHIFT,
  41                                              KERNEL_TSB_NENTRIES);
  42                struct tsb *ent = &swapper_tsb[hash];
  43
  44                if (tag_compare(ent->tag, v))
  45                        ent->tag = (1UL << TSB_TAG_INVALID_BIT);
  46        }
  47}
  48
  49static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift,
  50                            unsigned long tsb, unsigned long nentries)
  51{
  52        unsigned long i;
  53
  54        for (i = 0; i < tb->tlb_nr; i++) {
  55                unsigned long v = tb->vaddrs[i];
  56                unsigned long tag, ent, hash;
  57
  58                v &= ~0x1UL;
  59
  60                hash = tsb_hash(v, hash_shift, nentries);
  61                ent = tsb + (hash * sizeof(struct tsb));
  62                tag = (v >> 22UL);
  63
  64                tsb_flush(ent, tag);
  65        }
  66}
  67
  68void flush_tsb_user(struct tlb_batch *tb)
  69{
  70        struct mm_struct *mm = tb->mm;
  71        unsigned long nentries, base, flags;
  72
  73        spin_lock_irqsave(&mm->context.lock, flags);
  74
  75        base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
  76        nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
  77        if (tlb_type == cheetah_plus || tlb_type == hypervisor)
  78                base = __pa(base);
  79        __flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
  80
  81#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
  82        if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
  83                base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
  84                nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
  85                if (tlb_type == cheetah_plus || tlb_type == hypervisor)
  86                        base = __pa(base);
  87                __flush_tsb_one(tb, HPAGE_SHIFT, base, nentries);
  88        }
  89#endif
  90        spin_unlock_irqrestore(&mm->context.lock, flags);
  91}
  92
  93#define HV_PGSZ_IDX_BASE        HV_PGSZ_IDX_8K
  94#define HV_PGSZ_MASK_BASE       HV_PGSZ_MASK_8K
  95
  96#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
  97#define HV_PGSZ_IDX_HUGE        HV_PGSZ_IDX_4MB
  98#define HV_PGSZ_MASK_HUGE       HV_PGSZ_MASK_4MB
  99#endif
 100
 101static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes)
 102{
 103        unsigned long tsb_reg, base, tsb_paddr;
 104        unsigned long page_sz, tte;
 105
 106        mm->context.tsb_block[tsb_idx].tsb_nentries =
 107                tsb_bytes / sizeof(struct tsb);
 108
 109        base = TSBMAP_BASE;
 110        tte = pgprot_val(PAGE_KERNEL_LOCKED);
 111        tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb);
 112        BUG_ON(tsb_paddr & (tsb_bytes - 1UL));
 113
 114        /* Use the smallest page size that can map the whole TSB
 115         * in one TLB entry.
 116         */
 117        switch (tsb_bytes) {
 118        case 8192 << 0:
 119                tsb_reg = 0x0UL;
 120#ifdef DCACHE_ALIASING_POSSIBLE
 121                base += (tsb_paddr & 8192);
 122#endif
 123                page_sz = 8192;
 124                break;
 125
 126        case 8192 << 1:
 127                tsb_reg = 0x1UL;
 128                page_sz = 64 * 1024;
 129                break;
 130
 131        case 8192 << 2:
 132                tsb_reg = 0x2UL;
 133                page_sz = 64 * 1024;
 134                break;
 135
 136        case 8192 << 3:
 137                tsb_reg = 0x3UL;
 138                page_sz = 64 * 1024;
 139                break;
 140
 141        case 8192 << 4:
 142                tsb_reg = 0x4UL;
 143                page_sz = 512 * 1024;
 144                break;
 145
 146        case 8192 << 5:
 147                tsb_reg = 0x5UL;
 148                page_sz = 512 * 1024;
 149                break;
 150
 151        case 8192 << 6:
 152                tsb_reg = 0x6UL;
 153                page_sz = 512 * 1024;
 154                break;
 155
 156        case 8192 << 7:
 157                tsb_reg = 0x7UL;
 158                page_sz = 4 * 1024 * 1024;
 159                break;
 160
 161        default:
 162                printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n",
 163                       current->comm, current->pid, tsb_bytes);
 164                do_exit(SIGSEGV);
 165        }
 166        tte |= pte_sz_bits(page_sz);
 167
 168        if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
 169                /* Physical mapping, no locked TLB entry for TSB.  */
 170                tsb_reg |= tsb_paddr;
 171
 172                mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
 173                mm->context.tsb_block[tsb_idx].tsb_map_vaddr = 0;
 174                mm->context.tsb_block[tsb_idx].tsb_map_pte = 0;
 175        } else {
 176                tsb_reg |= base;
 177                tsb_reg |= (tsb_paddr & (page_sz - 1UL));
 178                tte |= (tsb_paddr & ~(page_sz - 1UL));
 179
 180                mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
 181                mm->context.tsb_block[tsb_idx].tsb_map_vaddr = base;
 182                mm->context.tsb_block[tsb_idx].tsb_map_pte = tte;
 183        }
 184
 185        /* Setup the Hypervisor TSB descriptor.  */
 186        if (tlb_type == hypervisor) {
 187                struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx];
 188
 189                switch (tsb_idx) {
 190                case MM_TSB_BASE:
 191                        hp->pgsz_idx = HV_PGSZ_IDX_BASE;
 192                        break;
 193#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 194                case MM_TSB_HUGE:
 195                        hp->pgsz_idx = HV_PGSZ_IDX_HUGE;
 196                        break;
 197#endif
 198                default:
 199                        BUG();
 200                }
 201                hp->assoc = 1;
 202                hp->num_ttes = tsb_bytes / 16;
 203                hp->ctx_idx = 0;
 204                switch (tsb_idx) {
 205                case MM_TSB_BASE:
 206                        hp->pgsz_mask = HV_PGSZ_MASK_BASE;
 207                        break;
 208#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 209                case MM_TSB_HUGE:
 210                        hp->pgsz_mask = HV_PGSZ_MASK_HUGE;
 211                        break;
 212#endif
 213                default:
 214                        BUG();
 215                }
 216                hp->tsb_base = tsb_paddr;
 217                hp->resv = 0;
 218        }
 219}
 220
 221struct kmem_cache *pgtable_cache __read_mostly;
 222
 223static struct kmem_cache *tsb_caches[8] __read_mostly;
 224
 225static const char *tsb_cache_names[8] = {
 226        "tsb_8KB",
 227        "tsb_16KB",
 228        "tsb_32KB",
 229        "tsb_64KB",
 230        "tsb_128KB",
 231        "tsb_256KB",
 232        "tsb_512KB",
 233        "tsb_1MB",
 234};
 235
 236void __init pgtable_cache_init(void)
 237{
 238        unsigned long i;
 239
 240        pgtable_cache = kmem_cache_create("pgtable_cache",
 241                                          PAGE_SIZE, PAGE_SIZE,
 242                                          0,
 243                                          _clear_page);
 244        if (!pgtable_cache) {
 245                prom_printf("pgtable_cache_init(): Could not create!\n");
 246                prom_halt();
 247        }
 248
 249        for (i = 0; i < 8; i++) {
 250                unsigned long size = 8192 << i;
 251                const char *name = tsb_cache_names[i];
 252
 253                tsb_caches[i] = kmem_cache_create(name,
 254                                                  size, size,
 255                                                  0, NULL);
 256                if (!tsb_caches[i]) {
 257                        prom_printf("Could not create %s cache\n", name);
 258                        prom_halt();
 259                }
 260        }
 261}
 262
 263int sysctl_tsb_ratio = -2;
 264
 265static unsigned long tsb_size_to_rss_limit(unsigned long new_size)
 266{
 267        unsigned long num_ents = (new_size / sizeof(struct tsb));
 268
 269        if (sysctl_tsb_ratio < 0)
 270                return num_ents - (num_ents >> -sysctl_tsb_ratio);
 271        else
 272                return num_ents + (num_ents >> sysctl_tsb_ratio);
 273}
 274
 275/* When the RSS of an address space exceeds tsb_rss_limit for a TSB,
 276 * do_sparc64_fault() invokes this routine to try and grow it.
 277 *
 278 * When we reach the maximum TSB size supported, we stick ~0UL into
 279 * tsb_rss_limit for that TSB so the grow checks in do_sparc64_fault()
 280 * will not trigger any longer.
 281 *
 282 * The TSB can be anywhere from 8K to 1MB in size, in increasing powers
 283 * of two.  The TSB must be aligned to it's size, so f.e. a 512K TSB
 284 * must be 512K aligned.  It also must be physically contiguous, so we
 285 * cannot use vmalloc().
 286 *
 287 * The idea here is to grow the TSB when the RSS of the process approaches
 288 * the number of entries that the current TSB can hold at once.  Currently,
 289 * we trigger when the RSS hits 3/4 of the TSB capacity.
 290 */
 291void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
 292{
 293        unsigned long max_tsb_size = 1 * 1024 * 1024;
 294        unsigned long new_size, old_size, flags;
 295        struct tsb *old_tsb, *new_tsb;
 296        unsigned long new_cache_index, old_cache_index;
 297        unsigned long new_rss_limit;
 298        gfp_t gfp_flags;
 299
 300        if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))
 301                max_tsb_size = (PAGE_SIZE << MAX_ORDER);
 302
 303        new_cache_index = 0;
 304        for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) {
 305                new_rss_limit = tsb_size_to_rss_limit(new_size);
 306                if (new_rss_limit > rss)
 307                        break;
 308                new_cache_index++;
 309        }
 310
 311        if (new_size == max_tsb_size)
 312                new_rss_limit = ~0UL;
 313
 314retry_tsb_alloc:
 315        gfp_flags = GFP_KERNEL;
 316        if (new_size > (PAGE_SIZE * 2))
 317                gfp_flags = __GFP_NOWARN | __GFP_NORETRY;
 318
 319        new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index],
 320                                        gfp_flags, numa_node_id());
 321        if (unlikely(!new_tsb)) {
 322                /* Not being able to fork due to a high-order TSB
 323                 * allocation failure is very bad behavior.  Just back
 324                 * down to a 0-order allocation and force no TSB
 325                 * growing for this address space.
 326                 */
 327                if (mm->context.tsb_block[tsb_index].tsb == NULL &&
 328                    new_cache_index > 0) {
 329                        new_cache_index = 0;
 330                        new_size = 8192;
 331                        new_rss_limit = ~0UL;
 332                        goto retry_tsb_alloc;
 333                }
 334
 335                /* If we failed on a TSB grow, we are under serious
 336                 * memory pressure so don't try to grow any more.
 337                 */
 338                if (mm->context.tsb_block[tsb_index].tsb != NULL)
 339                        mm->context.tsb_block[tsb_index].tsb_rss_limit = ~0UL;
 340                return;
 341        }
 342
 343        /* Mark all tags as invalid.  */
 344        tsb_init(new_tsb, new_size);
 345
 346        /* Ok, we are about to commit the changes.  If we are
 347         * growing an existing TSB the locking is very tricky,
 348         * so WATCH OUT!
 349         *
 350         * We have to hold mm->context.lock while committing to the
 351         * new TSB, this synchronizes us with processors in
 352         * flush_tsb_user() and switch_mm() for this address space.
 353         *
 354         * But even with that lock held, processors run asynchronously
 355         * accessing the old TSB via TLB miss handling.  This is OK
 356         * because those actions are just propagating state from the
 357         * Linux page tables into the TSB, page table mappings are not
 358         * being changed.  If a real fault occurs, the processor will
 359         * synchronize with us when it hits flush_tsb_user(), this is
 360         * also true for the case where vmscan is modifying the page
 361         * tables.  The only thing we need to be careful with is to
 362         * skip any locked TSB entries during copy_tsb().
 363         *
 364         * When we finish committing to the new TSB, we have to drop
 365         * the lock and ask all other cpus running this address space
 366         * to run tsb_context_switch() to see the new TSB table.
 367         */
 368        spin_lock_irqsave(&mm->context.lock, flags);
 369
 370        old_tsb = mm->context.tsb_block[tsb_index].tsb;
 371        old_cache_index =
 372                (mm->context.tsb_block[tsb_index].tsb_reg_val & 0x7UL);
 373        old_size = (mm->context.tsb_block[tsb_index].tsb_nentries *
 374                    sizeof(struct tsb));
 375
 376
 377        /* Handle multiple threads trying to grow the TSB at the same time.
 378         * One will get in here first, and bump the size and the RSS limit.
 379         * The others will get in here next and hit this check.
 380         */
 381        if (unlikely(old_tsb &&
 382                     (rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) {
 383                spin_unlock_irqrestore(&mm->context.lock, flags);
 384
 385                kmem_cache_free(tsb_caches[new_cache_index], new_tsb);
 386                return;
 387        }
 388
 389        mm->context.tsb_block[tsb_index].tsb_rss_limit = new_rss_limit;
 390
 391        if (old_tsb) {
 392                extern void copy_tsb(unsigned long old_tsb_base,
 393                                     unsigned long old_tsb_size,
 394                                     unsigned long new_tsb_base,
 395                                     unsigned long new_tsb_size);
 396                unsigned long old_tsb_base = (unsigned long) old_tsb;
 397                unsigned long new_tsb_base = (unsigned long) new_tsb;
 398
 399                if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
 400                        old_tsb_base = __pa(old_tsb_base);
 401                        new_tsb_base = __pa(new_tsb_base);
 402                }
 403                copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size);
 404        }
 405
 406        mm->context.tsb_block[tsb_index].tsb = new_tsb;
 407        setup_tsb_params(mm, tsb_index, new_size);
 408
 409        spin_unlock_irqrestore(&mm->context.lock, flags);
 410
 411        /* If old_tsb is NULL, we're being invoked for the first time
 412         * from init_new_context().
 413         */
 414        if (old_tsb) {
 415                /* Reload it on the local cpu.  */
 416                tsb_context_switch(mm);
 417
 418                /* Now force other processors to do the same.  */
 419                preempt_disable();
 420                smp_tsb_sync(mm);
 421                preempt_enable();
 422
 423                /* Now it is safe to free the old tsb.  */
 424                kmem_cache_free(tsb_caches[old_cache_index], old_tsb);
 425        }
 426}
 427
 428int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 429{
 430#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 431        unsigned long huge_pte_count;
 432#endif
 433        unsigned int i;
 434
 435        spin_lock_init(&mm->context.lock);
 436
 437        mm->context.sparc64_ctx_val = 0UL;
 438
 439#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 440        /* We reset it to zero because the fork() page copying
 441         * will re-increment the counters as the parent PTEs are
 442         * copied into the child address space.
 443         */
 444        huge_pte_count = mm->context.huge_pte_count;
 445        mm->context.huge_pte_count = 0;
 446#endif
 447
 448        mm->context.pgtable_page = NULL;
 449
 450        /* copy_mm() copies over the parent's mm_struct before calling
 451         * us, so we need to zero out the TSB pointer or else tsb_grow()
 452         * will be confused and think there is an older TSB to free up.
 453         */
 454        for (i = 0; i < MM_NUM_TSBS; i++)
 455                mm->context.tsb_block[i].tsb = NULL;
 456
 457        /* If this is fork, inherit the parent's TSB size.  We would
 458         * grow it to that size on the first page fault anyways.
 459         */
 460        tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
 461
 462#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 463        if (unlikely(huge_pte_count))
 464                tsb_grow(mm, MM_TSB_HUGE, huge_pte_count);
 465#endif
 466
 467        if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
 468                return -ENOMEM;
 469
 470        return 0;
 471}
 472
 473static void tsb_destroy_one(struct tsb_config *tp)
 474{
 475        unsigned long cache_index;
 476
 477        if (!tp->tsb)
 478                return;
 479        cache_index = tp->tsb_reg_val & 0x7UL;
 480        kmem_cache_free(tsb_caches[cache_index], tp->tsb);
 481        tp->tsb = NULL;
 482        tp->tsb_reg_val = 0UL;
 483}
 484
 485void destroy_context(struct mm_struct *mm)
 486{
 487        unsigned long flags, i;
 488        struct page *page;
 489
 490        for (i = 0; i < MM_NUM_TSBS; i++)
 491                tsb_destroy_one(&mm->context.tsb_block[i]);
 492
 493        page = mm->context.pgtable_page;
 494        if (page && put_page_testzero(page)) {
 495                pgtable_page_dtor(page);
 496                free_hot_cold_page(page, 0);
 497        }
 498
 499        spin_lock_irqsave(&ctx_alloc_lock, flags);
 500
 501        if (CTX_VALID(mm->context)) {
 502                unsigned long nr = CTX_NRBITS(mm->context);
 503                mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63));
 504        }
 505
 506        spin_unlock_irqrestore(&ctx_alloc_lock, flags);
 507}
 508