linux/arch/sparc/mm/hugetlbpage.c
<<
>>
Prefs
   1/*
   2 * SPARC64 Huge TLB page support.
   3 *
   4 * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net)
   5 */
   6
   7#include <linux/fs.h>
   8#include <linux/mm.h>
   9#include <linux/sched/mm.h>
  10#include <linux/hugetlb.h>
  11#include <linux/pagemap.h>
  12#include <linux/sysctl.h>
  13
  14#include <asm/mman.h>
  15#include <asm/pgalloc.h>
  16#include <asm/pgtable.h>
  17#include <asm/tlb.h>
  18#include <asm/tlbflush.h>
  19#include <asm/cacheflush.h>
  20#include <asm/mmu_context.h>
  21
  22/* Slightly simplified from the non-hugepage variant because by
  23 * definition we don't have to worry about any page coloring stuff
  24 */
  25
  26static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
  27                                                        unsigned long addr,
  28                                                        unsigned long len,
  29                                                        unsigned long pgoff,
  30                                                        unsigned long flags)
  31{
  32        struct hstate *h = hstate_file(filp);
  33        unsigned long task_size = TASK_SIZE;
  34        struct vm_unmapped_area_info info;
  35
  36        if (test_thread_flag(TIF_32BIT))
  37                task_size = STACK_TOP32;
  38
  39        info.flags = 0;
  40        info.length = len;
  41        info.low_limit = TASK_UNMAPPED_BASE;
  42        info.high_limit = min(task_size, VA_EXCLUDE_START);
  43        info.align_mask = PAGE_MASK & ~huge_page_mask(h);
  44        info.align_offset = 0;
  45        addr = vm_unmapped_area(&info);
  46
  47        if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) {
  48                VM_BUG_ON(addr != -ENOMEM);
  49                info.low_limit = VA_EXCLUDE_END;
  50                info.high_limit = task_size;
  51                addr = vm_unmapped_area(&info);
  52        }
  53
  54        return addr;
  55}
  56
  57static unsigned long
  58hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
  59                                  const unsigned long len,
  60                                  const unsigned long pgoff,
  61                                  const unsigned long flags)
  62{
  63        struct hstate *h = hstate_file(filp);
  64        struct mm_struct *mm = current->mm;
  65        unsigned long addr = addr0;
  66        struct vm_unmapped_area_info info;
  67
  68        /* This should only ever run for 32-bit processes.  */
  69        BUG_ON(!test_thread_flag(TIF_32BIT));
  70
  71        info.flags = VM_UNMAPPED_AREA_TOPDOWN;
  72        info.length = len;
  73        info.low_limit = PAGE_SIZE;
  74        info.high_limit = mm->mmap_base;
  75        info.align_mask = PAGE_MASK & ~huge_page_mask(h);
  76        info.align_offset = 0;
  77        addr = vm_unmapped_area(&info);
  78
  79        /*
  80         * A failed mmap() very likely causes application failure,
  81         * so fall back to the bottom-up function here. This scenario
  82         * can happen with large stack limits and large mmap()
  83         * allocations.
  84         */
  85        if (addr & ~PAGE_MASK) {
  86                VM_BUG_ON(addr != -ENOMEM);
  87                info.flags = 0;
  88                info.low_limit = TASK_UNMAPPED_BASE;
  89                info.high_limit = STACK_TOP32;
  90                addr = vm_unmapped_area(&info);
  91        }
  92
  93        return addr;
  94}
  95
  96unsigned long
  97hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
  98                unsigned long len, unsigned long pgoff, unsigned long flags)
  99{
 100        struct hstate *h = hstate_file(file);
 101        struct mm_struct *mm = current->mm;
 102        struct vm_area_struct *vma;
 103        unsigned long task_size = TASK_SIZE;
 104
 105        if (test_thread_flag(TIF_32BIT))
 106                task_size = STACK_TOP32;
 107
 108        if (len & ~huge_page_mask(h))
 109                return -EINVAL;
 110        if (len > task_size)
 111                return -ENOMEM;
 112
 113        if (flags & MAP_FIXED) {
 114                if (prepare_hugepage_range(file, addr, len))
 115                        return -EINVAL;
 116                return addr;
 117        }
 118
 119        if (addr) {
 120                addr = ALIGN(addr, huge_page_size(h));
 121                vma = find_vma(mm, addr);
 122                if (task_size - len >= addr &&
 123                    (!vma || addr + len <= vma->vm_start))
 124                        return addr;
 125        }
 126        if (mm->get_unmapped_area == arch_get_unmapped_area)
 127                return hugetlb_get_unmapped_area_bottomup(file, addr, len,
 128                                pgoff, flags);
 129        else
 130                return hugetlb_get_unmapped_area_topdown(file, addr, len,
 131                                pgoff, flags);
 132}
 133
 134static pte_t sun4u_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
 135{
 136        return entry;
 137}
 138
 139static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
 140{
 141        unsigned long hugepage_size = _PAGE_SZ4MB_4V;
 142
 143        pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;
 144
 145        switch (shift) {
 146        case HPAGE_2GB_SHIFT:
 147                hugepage_size = _PAGE_SZ2GB_4V;
 148                pte_val(entry) |= _PAGE_PMD_HUGE;
 149                break;
 150        case HPAGE_256MB_SHIFT:
 151                hugepage_size = _PAGE_SZ256MB_4V;
 152                pte_val(entry) |= _PAGE_PMD_HUGE;
 153                break;
 154        case HPAGE_SHIFT:
 155                pte_val(entry) |= _PAGE_PMD_HUGE;
 156                break;
 157        case HPAGE_64K_SHIFT:
 158                hugepage_size = _PAGE_SZ64K_4V;
 159                break;
 160        default:
 161                WARN_ONCE(1, "unsupported hugepage shift=%u\n", shift);
 162        }
 163
 164        pte_val(entry) = pte_val(entry) | hugepage_size;
 165        return entry;
 166}
 167
 168static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int shift)
 169{
 170        if (tlb_type == hypervisor)
 171                return sun4v_hugepage_shift_to_tte(entry, shift);
 172        else
 173                return sun4u_hugepage_shift_to_tte(entry, shift);
 174}
 175
 176pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 177                         struct page *page, int writeable)
 178{
 179        unsigned int shift = huge_page_shift(hstate_vma(vma));
 180
 181        return hugepage_shift_to_tte(entry, shift);
 182}
 183
 184static unsigned int sun4v_huge_tte_to_shift(pte_t entry)
 185{
 186        unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4V;
 187        unsigned int shift;
 188
 189        switch (tte_szbits) {
 190        case _PAGE_SZ2GB_4V:
 191                shift = HPAGE_2GB_SHIFT;
 192                break;
 193        case _PAGE_SZ256MB_4V:
 194                shift = HPAGE_256MB_SHIFT;
 195                break;
 196        case _PAGE_SZ4MB_4V:
 197                shift = REAL_HPAGE_SHIFT;
 198                break;
 199        case _PAGE_SZ64K_4V:
 200                shift = HPAGE_64K_SHIFT;
 201                break;
 202        default:
 203                shift = PAGE_SHIFT;
 204                break;
 205        }
 206        return shift;
 207}
 208
 209static unsigned int sun4u_huge_tte_to_shift(pte_t entry)
 210{
 211        unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4U;
 212        unsigned int shift;
 213
 214        switch (tte_szbits) {
 215        case _PAGE_SZ256MB_4U:
 216                shift = HPAGE_256MB_SHIFT;
 217                break;
 218        case _PAGE_SZ4MB_4U:
 219                shift = REAL_HPAGE_SHIFT;
 220                break;
 221        case _PAGE_SZ64K_4U:
 222                shift = HPAGE_64K_SHIFT;
 223                break;
 224        default:
 225                shift = PAGE_SHIFT;
 226                break;
 227        }
 228        return shift;
 229}
 230
 231static unsigned int huge_tte_to_shift(pte_t entry)
 232{
 233        unsigned long shift;
 234
 235        if (tlb_type == hypervisor)
 236                shift = sun4v_huge_tte_to_shift(entry);
 237        else
 238                shift = sun4u_huge_tte_to_shift(entry);
 239
 240        if (shift == PAGE_SHIFT)
 241                WARN_ONCE(1, "tto_to_shift: invalid hugepage tte=0x%lx\n",
 242                          pte_val(entry));
 243
 244        return shift;
 245}
 246
 247static unsigned long huge_tte_to_size(pte_t pte)
 248{
 249        unsigned long size = 1UL << huge_tte_to_shift(pte);
 250
 251        if (size == REAL_HPAGE_SIZE)
 252                size = HPAGE_SIZE;
 253        return size;
 254}
 255
 256pte_t *huge_pte_alloc(struct mm_struct *mm,
 257                        unsigned long addr, unsigned long sz)
 258{
 259        pgd_t *pgd;
 260        pud_t *pud;
 261        pmd_t *pmd;
 262        pte_t *pte = NULL;
 263
 264        pgd = pgd_offset(mm, addr);
 265        pud = pud_alloc(mm, pgd, addr);
 266        if (pud) {
 267                pmd = pmd_alloc(mm, pud, addr);
 268                if (!pmd)
 269                        return NULL;
 270
 271                if (sz >= PMD_SIZE)
 272                        pte = (pte_t *)pmd;
 273                else
 274                        pte = pte_alloc_map(mm, pmd, addr);
 275        }
 276
 277        return pte;
 278}
 279
 280pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 281{
 282        pgd_t *pgd;
 283        pud_t *pud;
 284        pmd_t *pmd;
 285        pte_t *pte = NULL;
 286
 287        pgd = pgd_offset(mm, addr);
 288        if (!pgd_none(*pgd)) {
 289                pud = pud_offset(pgd, addr);
 290                if (!pud_none(*pud)) {
 291                        pmd = pmd_offset(pud, addr);
 292                        if (!pmd_none(*pmd)) {
 293                                if (is_hugetlb_pmd(*pmd))
 294                                        pte = (pte_t *)pmd;
 295                                else
 296                                        pte = pte_offset_map(pmd, addr);
 297                        }
 298                }
 299        }
 300
 301        return pte;
 302}
 303
 304void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 305                     pte_t *ptep, pte_t entry)
 306{
 307        unsigned int i, nptes, orig_shift, shift;
 308        unsigned long size;
 309        pte_t orig;
 310
 311        size = huge_tte_to_size(entry);
 312        shift = size >= HPAGE_SIZE ? PMD_SHIFT : PAGE_SHIFT;
 313        nptes = size >> shift;
 314
 315        if (!pte_present(*ptep) && pte_present(entry))
 316                mm->context.hugetlb_pte_count += nptes;
 317
 318        addr &= ~(size - 1);
 319        orig = *ptep;
 320        orig_shift = pte_none(orig) ? PAGE_SHIFT : huge_tte_to_shift(orig);
 321
 322        for (i = 0; i < nptes; i++)
 323                ptep[i] = __pte(pte_val(entry) + (i << shift));
 324
 325        maybe_tlb_batch_add(mm, addr, ptep, orig, 0, orig_shift);
 326        /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
 327        if (size == HPAGE_SIZE)
 328                maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0,
 329                                    orig_shift);
 330}
 331
 332pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 333                              pte_t *ptep)
 334{
 335        unsigned int i, nptes, hugepage_shift;
 336        unsigned long size;
 337        pte_t entry;
 338
 339        entry = *ptep;
 340        size = huge_tte_to_size(entry);
 341        if (size >= HPAGE_SIZE)
 342                nptes = size >> PMD_SHIFT;
 343        else
 344                nptes = size >> PAGE_SHIFT;
 345
 346        hugepage_shift = pte_none(entry) ? PAGE_SHIFT :
 347                huge_tte_to_shift(entry);
 348
 349        if (pte_present(entry))
 350                mm->context.hugetlb_pte_count -= nptes;
 351
 352        addr &= ~(size - 1);
 353        for (i = 0; i < nptes; i++)
 354                ptep[i] = __pte(0UL);
 355
 356        maybe_tlb_batch_add(mm, addr, ptep, entry, 0, hugepage_shift);
 357        /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
 358        if (size == HPAGE_SIZE)
 359                maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0,
 360                                    hugepage_shift);
 361
 362        return entry;
 363}
 364
 365int pmd_huge(pmd_t pmd)
 366{
 367        return !pmd_none(pmd) &&
 368                (pmd_val(pmd) & (_PAGE_VALID|_PAGE_PMD_HUGE)) != _PAGE_VALID;
 369}
 370
 371int pud_huge(pud_t pud)
 372{
 373        return 0;
 374}
 375
 376static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
 377                           unsigned long addr)
 378{
 379        pgtable_t token = pmd_pgtable(*pmd);
 380
 381        pmd_clear(pmd);
 382        pte_free_tlb(tlb, token, addr);
 383        atomic_long_dec(&tlb->mm->nr_ptes);
 384}
 385
 386static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
 387                                   unsigned long addr, unsigned long end,
 388                                   unsigned long floor, unsigned long ceiling)
 389{
 390        pmd_t *pmd;
 391        unsigned long next;
 392        unsigned long start;
 393
 394        start = addr;
 395        pmd = pmd_offset(pud, addr);
 396        do {
 397                next = pmd_addr_end(addr, end);
 398                if (pmd_none(*pmd))
 399                        continue;
 400                if (is_hugetlb_pmd(*pmd))
 401                        pmd_clear(pmd);
 402                else
 403                        hugetlb_free_pte_range(tlb, pmd, addr);
 404        } while (pmd++, addr = next, addr != end);
 405
 406        start &= PUD_MASK;
 407        if (start < floor)
 408                return;
 409        if (ceiling) {
 410                ceiling &= PUD_MASK;
 411                if (!ceiling)
 412                        return;
 413        }
 414        if (end - 1 > ceiling - 1)
 415                return;
 416
 417        pmd = pmd_offset(pud, start);
 418        pud_clear(pud);
 419        pmd_free_tlb(tlb, pmd, start);
 420        mm_dec_nr_pmds(tlb->mm);
 421}
 422
 423static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 424                                   unsigned long addr, unsigned long end,
 425                                   unsigned long floor, unsigned long ceiling)
 426{
 427        pud_t *pud;
 428        unsigned long next;
 429        unsigned long start;
 430
 431        start = addr;
 432        pud = pud_offset(pgd, addr);
 433        do {
 434                next = pud_addr_end(addr, end);
 435                if (pud_none_or_clear_bad(pud))
 436                        continue;
 437                hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
 438                                       ceiling);
 439        } while (pud++, addr = next, addr != end);
 440
 441        start &= PGDIR_MASK;
 442        if (start < floor)
 443                return;
 444        if (ceiling) {
 445                ceiling &= PGDIR_MASK;
 446                if (!ceiling)
 447                        return;
 448        }
 449        if (end - 1 > ceiling - 1)
 450                return;
 451
 452        pud = pud_offset(pgd, start);
 453        pgd_clear(pgd);
 454        pud_free_tlb(tlb, pud, start);
 455}
 456
 457void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 458                            unsigned long addr, unsigned long end,
 459                            unsigned long floor, unsigned long ceiling)
 460{
 461        pgd_t *pgd;
 462        unsigned long next;
 463
 464        addr &= PMD_MASK;
 465        if (addr < floor) {
 466                addr += PMD_SIZE;
 467                if (!addr)
 468                        return;
 469        }
 470        if (ceiling) {
 471                ceiling &= PMD_MASK;
 472                if (!ceiling)
 473                        return;
 474        }
 475        if (end - 1 > ceiling - 1)
 476                end -= PMD_SIZE;
 477        if (addr > end - 1)
 478                return;
 479
 480        pgd = pgd_offset(tlb->mm, addr);
 481        do {
 482                next = pgd_addr_end(addr, end);
 483                if (pgd_none_or_clear_bad(pgd))
 484                        continue;
 485                hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
 486        } while (pgd++, addr = next, addr != end);
 487}
 488