LXR linux/arch/sparc/mm/hugetlbpage.c

   1/*
   2 * SPARC64 Huge TLB page support.
   3 *
   4 * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net)
   5 */
   6
   7#include <linux/init.h>
   8#include <linux/module.h>
   9#include <linux/fs.h>
  10#include <linux/mm.h>
  11#include <linux/hugetlb.h>
  12#include <linux/pagemap.h>
  13#include <linux/sysctl.h>
  14
  15#include <asm/mman.h>
  16#include <asm/pgalloc.h>
  17#include <asm/tlb.h>
  18#include <asm/tlbflush.h>
  19#include <asm/cacheflush.h>
  20#include <asm/mmu_context.h>
  21
  22/* Slightly simplified from the non-hugepage variant because by
  23 * definition we don't have to worry about any page coloring stuff
  24 */
  25#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL))
  26#define VA_EXCLUDE_END   (0xfffff80000000000UL + (1UL << 32UL))
  27
  28static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
  29                                                        unsigned long addr,
  30                                                        unsigned long len,
  31                                                        unsigned long pgoff,
  32                                                        unsigned long flags)
  33{
  34        struct mm_struct *mm = current->mm;
  35        struct vm_area_struct * vma;
  36        unsigned long task_size = TASK_SIZE;
  37        unsigned long start_addr;
  38
  39        if (test_thread_flag(TIF_32BIT))
  40                task_size = STACK_TOP32;
  41        if (unlikely(len >= VA_EXCLUDE_START))
  42                return -ENOMEM;
  43
  44        if (len > mm->cached_hole_size) {
  45                start_addr = addr = mm->free_area_cache;
  46        } else {
  47                start_addr = addr = TASK_UNMAPPED_BASE;
  48                mm->cached_hole_size = 0;
  49        }
  50
  51        task_size -= len;
  52
  53full_search:
  54        addr = ALIGN(addr, HPAGE_SIZE);
  55
  56        for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
  57                /* At this point:  (!vma || addr < vma->vm_end). */
  58                if (addr < VA_EXCLUDE_START &&
  59                    (addr + len) >= VA_EXCLUDE_START) {
  60                        addr = VA_EXCLUDE_END;
  61                        vma = find_vma(mm, VA_EXCLUDE_END);
  62                }
  63                if (unlikely(task_size < addr)) {
  64                        if (start_addr != TASK_UNMAPPED_BASE) {
  65                                start_addr = addr = TASK_UNMAPPED_BASE;
  66                                mm->cached_hole_size = 0;
  67                                goto full_search;
  68                        }
  69                        return -ENOMEM;
  70                }
  71                if (likely(!vma || addr + len <= vma->vm_start)) {
  72                        /*
  73                         * Remember the place where we stopped the search:
  74                         */
  75                        mm->free_area_cache = addr + len;
  76                        return addr;
  77                }
  78                if (addr + mm->cached_hole_size < vma->vm_start)
  79                        mm->cached_hole_size = vma->vm_start - addr;
  80
  81                addr = ALIGN(vma->vm_end, HPAGE_SIZE);
  82        }
  83}
  84
  85static unsigned long
  86hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
  87                                  const unsigned long len,
  88                                  const unsigned long pgoff,
  89                                  const unsigned long flags)
  90{
  91        struct vm_area_struct *vma;
  92        struct mm_struct *mm = current->mm;
  93        unsigned long addr = addr0;
  94
  95        /* This should only ever run for 32-bit processes.  */
  96        BUG_ON(!test_thread_flag(TIF_32BIT));
  97
  98        /* check if free_area_cache is useful for us */
  99        if (len <= mm->cached_hole_size) {
 100                mm->cached_hole_size = 0;
 101                mm->free_area_cache = mm->mmap_base;
 102        }
 103
 104        /* either no address requested or can't fit in requested address hole */
 105        addr = mm->free_area_cache & HPAGE_MASK;
 106
 107        /* make sure it can fit in the remaining address space */
 108        if (likely(addr > len)) {
 109                vma = find_vma(mm, addr-len);
 110                if (!vma || addr <= vma->vm_start) {
 111                        /* remember the address as a hint for next time */
 112                        return (mm->free_area_cache = addr-len);
 113                }
 114        }
 115
 116        if (unlikely(mm->mmap_base < len))
 117                goto bottomup;
 118
 119        addr = (mm->mmap_base-len) & HPAGE_MASK;
 120
 121        do {
 122                /*
 123                 * Lookup failure means no vma is above this address,
 124                 * else if new region fits below vma->vm_start,
 125                 * return with success:
 126                 */
 127                vma = find_vma(mm, addr);
 128                if (likely(!vma || addr+len <= vma->vm_start)) {
 129                        /* remember the address as a hint for next time */
 130                        return (mm->free_area_cache = addr);
 131                }
 132
 133                /* remember the largest hole we saw so far */
 134                if (addr + mm->cached_hole_size < vma->vm_start)
 135                        mm->cached_hole_size = vma->vm_start - addr;
 136
 137                /* try just below the current vma->vm_start */
 138                addr = (vma->vm_start-len) & HPAGE_MASK;
 139        } while (likely(len < vma->vm_start));
 140
 141bottomup:
 142        /*
 143         * A failed mmap() very likely causes application failure,
 144         * so fall back to the bottom-up function here. This scenario
 145         * can happen with large stack limits and large mmap()
 146         * allocations.
 147         */
 148        mm->cached_hole_size = ~0UL;
 149        mm->free_area_cache = TASK_UNMAPPED_BASE;
 150        addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
 151        /*
 152         * Restore the topdown base:
 153         */
 154        mm->free_area_cache = mm->mmap_base;
 155        mm->cached_hole_size = ~0UL;
 156
 157        return addr;
 158}
 159
 160unsigned long
 161hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 162                unsigned long len, unsigned long pgoff, unsigned long flags)
 163{
 164        struct mm_struct *mm = current->mm;
 165        struct vm_area_struct *vma;
 166        unsigned long task_size = TASK_SIZE;
 167
 168        if (test_thread_flag(TIF_32BIT))
 169                task_size = STACK_TOP32;
 170
 171        if (len & ~HPAGE_MASK)
 172                return -EINVAL;
 173        if (len > task_size)
 174                return -ENOMEM;
 175
 176        if (flags & MAP_FIXED) {
 177                if (prepare_hugepage_range(file, addr, len))
 178                        return -EINVAL;
 179                return addr;
 180        }
 181
 182        if (addr) {
 183                addr = ALIGN(addr, HPAGE_SIZE);
 184                vma = find_vma(mm, addr);
 185                if (task_size - len >= addr &&
 186                    (!vma || addr + len <= vma->vm_start))
 187                        return addr;
 188        }
 189        if (mm->get_unmapped_area == arch_get_unmapped_area)
 190                return hugetlb_get_unmapped_area_bottomup(file, addr, len,
 191                                pgoff, flags);
 192        else
 193                return hugetlb_get_unmapped_area_topdown(file, addr, len,
 194                                pgoff, flags);
 195}
 196
 197pte_t *huge_pte_alloc(struct mm_struct *mm,
 198                        unsigned long addr, unsigned long sz)
 199{
 200        pgd_t *pgd;
 201        pud_t *pud;
 202        pmd_t *pmd;
 203        pte_t *pte = NULL;
 204
 205        /* We must align the address, because our caller will run
 206         * set_huge_pte_at() on whatever we return, which writes out
 207         * all of the sub-ptes for the hugepage range.  So we have
 208         * to give it the first such sub-pte.
 209         */
 210        addr &= HPAGE_MASK;
 211
 212        pgd = pgd_offset(mm, addr);
 213        pud = pud_alloc(mm, pgd, addr);
 214        if (pud) {
 215                pmd = pmd_alloc(mm, pud, addr);
 216                if (pmd)
 217                        pte = pte_alloc_map(mm, NULL, pmd, addr);
 218        }
 219        return pte;
 220}
 221
 222pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 223{
 224        pgd_t *pgd;
 225        pud_t *pud;
 226        pmd_t *pmd;
 227        pte_t *pte = NULL;
 228
 229        addr &= HPAGE_MASK;
 230
 231        pgd = pgd_offset(mm, addr);
 232        if (!pgd_none(*pgd)) {
 233                pud = pud_offset(pgd, addr);
 234                if (!pud_none(*pud)) {
 235                        pmd = pmd_offset(pud, addr);
 236                        if (!pmd_none(*pmd))
 237                                pte = pte_offset_map(pmd, addr);
 238                }
 239        }
 240        return pte;
 241}
 242
 243int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
 244{
 245        return 0;
 246}
 247
 248void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 249                     pte_t *ptep, pte_t entry)
 250{
 251        int i;
 252
 253        if (!pte_present(*ptep) && pte_present(entry))
 254                mm->context.huge_pte_count++;
 255
 256        addr &= HPAGE_MASK;
 257        for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
 258                set_pte_at(mm, addr, ptep, entry);
 259                ptep++;
 260                addr += PAGE_SIZE;
 261                pte_val(entry) += PAGE_SIZE;
 262        }
 263}
 264
 265pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 266                              pte_t *ptep)
 267{
 268        pte_t entry;
 269        int i;
 270
 271        entry = *ptep;
 272        if (pte_present(entry))
 273                mm->context.huge_pte_count--;
 274
 275        addr &= HPAGE_MASK;
 276
 277        for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
 278                pte_clear(mm, addr, ptep);
 279                addr += PAGE_SIZE;
 280                ptep++;
 281        }
 282
 283        return entry;
 284}
 285
 286struct page *follow_huge_addr(struct mm_struct *mm,
 287                              unsigned long address, int write)
 288{
 289        return ERR_PTR(-EINVAL);
 290}
 291
 292int pmd_huge(pmd_t pmd)
 293{
 294        return 0;
 295}
 296
 297int pud_huge(pud_t pud)
 298{
 299        return 0;
 300}
 301
 302struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 303                             pmd_t *pmd, int write)
 304{
 305        return NULL;
 306}
 307
 308static void context_reload(void *__data)
 309{
 310        struct mm_struct *mm = __data;
 311
 312        if (mm == current->mm)
 313                load_secondary_context(mm);
 314}
 315
 316void hugetlb_prefault_arch_hook(struct mm_struct *mm)
 317{
 318        struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE];
 319
 320        if (likely(tp->tsb != NULL))
 321                return;
 322
 323        tsb_grow(mm, MM_TSB_HUGE, 0);
 324        tsb_context_switch(mm);
 325        smp_tsb_sync(mm);
 326
 327        /* On UltraSPARC-III+ and later, configure the second half of
 328         * the Data-TLB for huge pages.
 329         */
 330        if (tlb_type == cheetah_plus) {
 331                unsigned long ctx;
 332
 333                spin_lock(&ctx_alloc_lock);
 334                ctx = mm->context.sparc64_ctx_val;
 335                ctx &= ~CTX_PGSZ_MASK;
 336                ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
 337                ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT;
 338
 339                if (ctx != mm->context.sparc64_ctx_val) {
 340                        /* When changing the page size fields, we
 341                         * must perform a context flush so that no
 342                         * stale entries match.  This flush must
 343                         * occur with the original context register
 344                         * settings.
 345                         */
 346                        do_flush_tlb_mm(mm);
 347
 348                        /* Reload the context register of all processors
 349                         * also executing in this address space.
 350                         */
 351                        mm->context.sparc64_ctx_val = ctx;
 352                        on_each_cpu(context_reload, mm, 0);
 353                }
 354                spin_unlock(&ctx_alloc_lock);
 355        }
 356}
 357