LXR linux/arch/sparc/mm/hugetlbpage.c

   1/*
   2 * SPARC64 Huge TLB page support.
   3 *
   4 * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net)
   5 */
   6
   7#include <linux/init.h>
   8#include <linux/module.h>
   9#include <linux/fs.h>
  10#include <linux/mm.h>
  11#include <linux/hugetlb.h>
  12#include <linux/pagemap.h>
  13#include <linux/slab.h>
  14#include <linux/sysctl.h>
  15
  16#include <asm/mman.h>
  17#include <asm/pgalloc.h>
  18#include <asm/tlb.h>
  19#include <asm/tlbflush.h>
  20#include <asm/cacheflush.h>
  21#include <asm/mmu_context.h>
  22
  23/* Slightly simplified from the non-hugepage variant because by
  24 * definition we don't have to worry about any page coloring stuff
  25 */
  26#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL))
  27#define VA_EXCLUDE_END   (0xfffff80000000000UL + (1UL << 32UL))
  28
  29static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
  30                                                        unsigned long addr,
  31                                                        unsigned long len,
  32                                                        unsigned long pgoff,
  33                                                        unsigned long flags)
  34{
  35        struct mm_struct *mm = current->mm;
  36        struct vm_area_struct * vma;
  37        unsigned long task_size = TASK_SIZE;
  38        unsigned long start_addr;
  39
  40        if (test_thread_flag(TIF_32BIT))
  41                task_size = STACK_TOP32;
  42        if (unlikely(len >= VA_EXCLUDE_START))
  43                return -ENOMEM;
  44
  45        if (len > mm->cached_hole_size) {
  46                start_addr = addr = mm->free_area_cache;
  47        } else {
  48                start_addr = addr = TASK_UNMAPPED_BASE;
  49                mm->cached_hole_size = 0;
  50        }
  51
  52        task_size -= len;
  53
  54full_search:
  55        addr = ALIGN(addr, HPAGE_SIZE);
  56
  57        for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
  58                /* At this point:  (!vma || addr < vma->vm_end). */
  59                if (addr < VA_EXCLUDE_START &&
  60                    (addr + len) >= VA_EXCLUDE_START) {
  61                        addr = VA_EXCLUDE_END;
  62                        vma = find_vma(mm, VA_EXCLUDE_END);
  63                }
  64                if (unlikely(task_size < addr)) {
  65                        if (start_addr != TASK_UNMAPPED_BASE) {
  66                                start_addr = addr = TASK_UNMAPPED_BASE;
  67                                mm->cached_hole_size = 0;
  68                                goto full_search;
  69                        }
  70                        return -ENOMEM;
  71                }
  72                if (likely(!vma || addr + len <= vma->vm_start)) {
  73                        /*
  74                         * Remember the place where we stopped the search:
  75                         */
  76                        mm->free_area_cache = addr + len;
  77                        return addr;
  78                }
  79                if (addr + mm->cached_hole_size < vma->vm_start)
  80                        mm->cached_hole_size = vma->vm_start - addr;
  81
  82                addr = ALIGN(vma->vm_end, HPAGE_SIZE);
  83        }
  84}
  85
  86static unsigned long
  87hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
  88                                  const unsigned long len,
  89                                  const unsigned long pgoff,
  90                                  const unsigned long flags)
  91{
  92        struct vm_area_struct *vma;
  93        struct mm_struct *mm = current->mm;
  94        unsigned long addr = addr0;
  95
  96        /* This should only ever run for 32-bit processes.  */
  97        BUG_ON(!test_thread_flag(TIF_32BIT));
  98
  99        /* check if free_area_cache is useful for us */
 100        if (len <= mm->cached_hole_size) {
 101                mm->cached_hole_size = 0;
 102                mm->free_area_cache = mm->mmap_base;
 103        }
 104
 105        /* either no address requested or can't fit in requested address hole */
 106        addr = mm->free_area_cache & HPAGE_MASK;
 107
 108        /* make sure it can fit in the remaining address space */
 109        if (likely(addr > len)) {
 110                vma = find_vma(mm, addr-len);
 111                if (!vma || addr <= vma->vm_start) {
 112                        /* remember the address as a hint for next time */
 113                        return (mm->free_area_cache = addr-len);
 114                }
 115        }
 116
 117        if (unlikely(mm->mmap_base < len))
 118                goto bottomup;
 119
 120        addr = (mm->mmap_base-len) & HPAGE_MASK;
 121
 122        do {
 123                /*
 124                 * Lookup failure means no vma is above this address,
 125                 * else if new region fits below vma->vm_start,
 126                 * return with success:
 127                 */
 128                vma = find_vma(mm, addr);
 129                if (likely(!vma || addr+len <= vma->vm_start)) {
 130                        /* remember the address as a hint for next time */
 131                        return (mm->free_area_cache = addr);
 132                }
 133
 134                /* remember the largest hole we saw so far */
 135                if (addr + mm->cached_hole_size < vma->vm_start)
 136                        mm->cached_hole_size = vma->vm_start - addr;
 137
 138                /* try just below the current vma->vm_start */
 139                addr = (vma->vm_start-len) & HPAGE_MASK;
 140        } while (likely(len < vma->vm_start));
 141
 142bottomup:
 143        /*
 144         * A failed mmap() very likely causes application failure,
 145         * so fall back to the bottom-up function here. This scenario
 146         * can happen with large stack limits and large mmap()
 147         * allocations.
 148         */
 149        mm->cached_hole_size = ~0UL;
 150        mm->free_area_cache = TASK_UNMAPPED_BASE;
 151        addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
 152        /*
 153         * Restore the topdown base:
 154         */
 155        mm->free_area_cache = mm->mmap_base;
 156        mm->cached_hole_size = ~0UL;
 157
 158        return addr;
 159}
 160
 161unsigned long
 162hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 163                unsigned long len, unsigned long pgoff, unsigned long flags)
 164{
 165        struct mm_struct *mm = current->mm;
 166        struct vm_area_struct *vma;
 167        unsigned long task_size = TASK_SIZE;
 168
 169        if (test_thread_flag(TIF_32BIT))
 170                task_size = STACK_TOP32;
 171
 172        if (len & ~HPAGE_MASK)
 173                return -EINVAL;
 174        if (len > task_size)
 175                return -ENOMEM;
 176
 177        if (flags & MAP_FIXED) {
 178                if (prepare_hugepage_range(file, addr, len))
 179                        return -EINVAL;
 180                return addr;
 181        }
 182
 183        if (addr) {
 184                addr = ALIGN(addr, HPAGE_SIZE);
 185                vma = find_vma(mm, addr);
 186                if (task_size - len >= addr &&
 187                    (!vma || addr + len <= vma->vm_start))
 188                        return addr;
 189        }
 190        if (mm->get_unmapped_area == arch_get_unmapped_area)
 191                return hugetlb_get_unmapped_area_bottomup(file, addr, len,
 192                                pgoff, flags);
 193        else
 194                return hugetlb_get_unmapped_area_topdown(file, addr, len,
 195                                pgoff, flags);
 196}
 197
 198pte_t *huge_pte_alloc(struct mm_struct *mm,
 199                        unsigned long addr, unsigned long sz)
 200{
 201        pgd_t *pgd;
 202        pud_t *pud;
 203        pmd_t *pmd;
 204        pte_t *pte = NULL;
 205
 206        /* We must align the address, because our caller will run
 207         * set_huge_pte_at() on whatever we return, which writes out
 208         * all of the sub-ptes for the hugepage range.  So we have
 209         * to give it the first such sub-pte.
 210         */
 211        addr &= HPAGE_MASK;
 212
 213        pgd = pgd_offset(mm, addr);
 214        pud = pud_alloc(mm, pgd, addr);
 215        if (pud) {
 216                pmd = pmd_alloc(mm, pud, addr);
 217                if (pmd)
 218                        pte = pte_alloc_map(mm, pmd, addr);
 219        }
 220        return pte;
 221}
 222
 223pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 224{
 225        pgd_t *pgd;
 226        pud_t *pud;
 227        pmd_t *pmd;
 228        pte_t *pte = NULL;
 229
 230        addr &= HPAGE_MASK;
 231
 232        pgd = pgd_offset(mm, addr);
 233        if (!pgd_none(*pgd)) {
 234                pud = pud_offset(pgd, addr);
 235                if (!pud_none(*pud)) {
 236                        pmd = pmd_offset(pud, addr);
 237                        if (!pmd_none(*pmd))
 238                                pte = pte_offset_map(pmd, addr);
 239                }
 240        }
 241        return pte;
 242}
 243
 244int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
 245{
 246        return 0;
 247}
 248
 249void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 250                     pte_t *ptep, pte_t entry)
 251{
 252        int i;
 253
 254        if (!pte_present(*ptep) && pte_present(entry))
 255                mm->context.huge_pte_count++;
 256
 257        addr &= HPAGE_MASK;
 258        for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
 259                set_pte_at(mm, addr, ptep, entry);
 260                ptep++;
 261                addr += PAGE_SIZE;
 262                pte_val(entry) += PAGE_SIZE;
 263        }
 264}
 265
 266pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 267                              pte_t *ptep)
 268{
 269        pte_t entry;
 270        int i;
 271
 272        entry = *ptep;
 273        if (pte_present(entry))
 274                mm->context.huge_pte_count--;
 275
 276        addr &= HPAGE_MASK;
 277
 278        for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
 279                pte_clear(mm, addr, ptep);
 280                addr += PAGE_SIZE;
 281                ptep++;
 282        }
 283
 284        return entry;
 285}
 286
 287struct page *follow_huge_addr(struct mm_struct *mm,
 288                              unsigned long address, int write)
 289{
 290        return ERR_PTR(-EINVAL);
 291}
 292
 293int pmd_huge(pmd_t pmd)
 294{
 295        return 0;
 296}
 297
 298int pud_huge(pud_t pud)
 299{
 300        return 0;
 301}
 302
 303struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 304                             pmd_t *pmd, int write)
 305{
 306        return NULL;
 307}
 308
 309static void context_reload(void *__data)
 310{
 311        struct mm_struct *mm = __data;
 312
 313        if (mm == current->mm)
 314                load_secondary_context(mm);
 315}
 316
 317void hugetlb_prefault_arch_hook(struct mm_struct *mm)
 318{
 319        struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE];
 320
 321        if (likely(tp->tsb != NULL))
 322                return;
 323
 324        tsb_grow(mm, MM_TSB_HUGE, 0);
 325        tsb_context_switch(mm);
 326        smp_tsb_sync(mm);
 327
 328        /* On UltraSPARC-III+ and later, configure the second half of
 329         * the Data-TLB for huge pages.
 330         */
 331        if (tlb_type == cheetah_plus) {
 332                unsigned long ctx;
 333
 334                spin_lock(&ctx_alloc_lock);
 335                ctx = mm->context.sparc64_ctx_val;
 336                ctx &= ~CTX_PGSZ_MASK;
 337                ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
 338                ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT;
 339
 340                if (ctx != mm->context.sparc64_ctx_val) {
 341                        /* When changing the page size fields, we
 342                         * must perform a context flush so that no
 343                         * stale entries match.  This flush must
 344                         * occur with the original context register
 345                         * settings.
 346                         */
 347                        do_flush_tlb_mm(mm);
 348
 349                        /* Reload the context register of all processors
 350                         * also executing in this address space.
 351                         */
 352                        mm->context.sparc64_ctx_val = ctx;
 353                        on_each_cpu(context_reload, mm, 0);
 354                }
 355                spin_unlock(&ctx_alloc_lock);
 356        }
 357}
 358