linux/arch/sparc/mm/hugetlbpage.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * SPARC64 Huge TLB page support.
   4 *
   5 * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net)
   6 */
   7
   8#include <linux/fs.h>
   9#include <linux/mm.h>
  10#include <linux/sched/mm.h>
  11#include <linux/hugetlb.h>
  12#include <linux/pagemap.h>
  13#include <linux/sysctl.h>
  14
  15#include <asm/mman.h>
  16#include <asm/pgalloc.h>
  17#include <asm/pgtable.h>
  18#include <asm/tlb.h>
  19#include <asm/tlbflush.h>
  20#include <asm/cacheflush.h>
  21#include <asm/mmu_context.h>
  22
  23/* Slightly simplified from the non-hugepage variant because by
  24 * definition we don't have to worry about any page coloring stuff
  25 */
  26
  27static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
  28                                                        unsigned long addr,
  29                                                        unsigned long len,
  30                                                        unsigned long pgoff,
  31                                                        unsigned long flags)
  32{
  33        struct hstate *h = hstate_file(filp);
  34        unsigned long task_size = TASK_SIZE;
  35        struct vm_unmapped_area_info info;
  36
  37        if (test_thread_flag(TIF_32BIT))
  38                task_size = STACK_TOP32;
  39
  40        info.flags = 0;
  41        info.length = len;
  42        info.low_limit = TASK_UNMAPPED_BASE;
  43        info.high_limit = min(task_size, VA_EXCLUDE_START);
  44        info.align_mask = PAGE_MASK & ~huge_page_mask(h);
  45        info.align_offset = 0;
  46        addr = vm_unmapped_area(&info);
  47
  48        if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) {
  49                VM_BUG_ON(addr != -ENOMEM);
  50                info.low_limit = VA_EXCLUDE_END;
  51                info.high_limit = task_size;
  52                addr = vm_unmapped_area(&info);
  53        }
  54
  55        return addr;
  56}
  57
  58static unsigned long
  59hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
  60                                  const unsigned long len,
  61                                  const unsigned long pgoff,
  62                                  const unsigned long flags)
  63{
  64        struct hstate *h = hstate_file(filp);
  65        struct mm_struct *mm = current->mm;
  66        unsigned long addr = addr0;
  67        struct vm_unmapped_area_info info;
  68
  69        /* This should only ever run for 32-bit processes.  */
  70        BUG_ON(!test_thread_flag(TIF_32BIT));
  71
  72        info.flags = VM_UNMAPPED_AREA_TOPDOWN;
  73        info.length = len;
  74        info.low_limit = PAGE_SIZE;
  75        info.high_limit = mm->mmap_base;
  76        info.align_mask = PAGE_MASK & ~huge_page_mask(h);
  77        info.align_offset = 0;
  78        addr = vm_unmapped_area(&info);
  79
  80        /*
  81         * A failed mmap() very likely causes application failure,
  82         * so fall back to the bottom-up function here. This scenario
  83         * can happen with large stack limits and large mmap()
  84         * allocations.
  85         */
  86        if (addr & ~PAGE_MASK) {
  87                VM_BUG_ON(addr != -ENOMEM);
  88                info.flags = 0;
  89                info.low_limit = TASK_UNMAPPED_BASE;
  90                info.high_limit = STACK_TOP32;
  91                addr = vm_unmapped_area(&info);
  92        }
  93
  94        return addr;
  95}
  96
  97unsigned long
  98hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
  99                unsigned long len, unsigned long pgoff, unsigned long flags)
 100{
 101        struct hstate *h = hstate_file(file);
 102        struct mm_struct *mm = current->mm;
 103        struct vm_area_struct *vma;
 104        unsigned long task_size = TASK_SIZE;
 105
 106        if (test_thread_flag(TIF_32BIT))
 107                task_size = STACK_TOP32;
 108
 109        if (len & ~huge_page_mask(h))
 110                return -EINVAL;
 111        if (len > task_size)
 112                return -ENOMEM;
 113
 114        if (flags & MAP_FIXED) {
 115                if (prepare_hugepage_range(file, addr, len))
 116                        return -EINVAL;
 117                return addr;
 118        }
 119
 120        if (addr) {
 121                addr = ALIGN(addr, huge_page_size(h));
 122                vma = find_vma(mm, addr);
 123                if (task_size - len >= addr &&
 124                    (!vma || addr + len <= vm_start_gap(vma)))
 125                        return addr;
 126        }
 127        if (mm->get_unmapped_area == arch_get_unmapped_area)
 128                return hugetlb_get_unmapped_area_bottomup(file, addr, len,
 129                                pgoff, flags);
 130        else
 131                return hugetlb_get_unmapped_area_topdown(file, addr, len,
 132                                pgoff, flags);
 133}
 134
 135static pte_t sun4u_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
 136{
 137        return entry;
 138}
 139
 140static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
 141{
 142        unsigned long hugepage_size = _PAGE_SZ4MB_4V;
 143
 144        pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;
 145
 146        switch (shift) {
 147        case HPAGE_16GB_SHIFT:
 148                hugepage_size = _PAGE_SZ16GB_4V;
 149                pte_val(entry) |= _PAGE_PUD_HUGE;
 150                break;
 151        case HPAGE_2GB_SHIFT:
 152                hugepage_size = _PAGE_SZ2GB_4V;
 153                pte_val(entry) |= _PAGE_PMD_HUGE;
 154                break;
 155        case HPAGE_256MB_SHIFT:
 156                hugepage_size = _PAGE_SZ256MB_4V;
 157                pte_val(entry) |= _PAGE_PMD_HUGE;
 158                break;
 159        case HPAGE_SHIFT:
 160                pte_val(entry) |= _PAGE_PMD_HUGE;
 161                break;
 162        case HPAGE_64K_SHIFT:
 163                hugepage_size = _PAGE_SZ64K_4V;
 164                break;
 165        default:
 166                WARN_ONCE(1, "unsupported hugepage shift=%u\n", shift);
 167        }
 168
 169        pte_val(entry) = pte_val(entry) | hugepage_size;
 170        return entry;
 171}
 172
 173static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int shift)
 174{
 175        if (tlb_type == hypervisor)
 176                return sun4v_hugepage_shift_to_tte(entry, shift);
 177        else
 178                return sun4u_hugepage_shift_to_tte(entry, shift);
 179}
 180
 181pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 182                         struct page *page, int writeable)
 183{
 184        unsigned int shift = huge_page_shift(hstate_vma(vma));
 185        pte_t pte;
 186
 187        pte = hugepage_shift_to_tte(entry, shift);
 188
 189#ifdef CONFIG_SPARC64
 190        /* If this vma has ADI enabled on it, turn on TTE.mcd
 191         */
 192        if (vma->vm_flags & VM_SPARC_ADI)
 193                return pte_mkmcd(pte);
 194        else
 195                return pte_mknotmcd(pte);
 196#else
 197        return pte;
 198#endif
 199}
 200
 201static unsigned int sun4v_huge_tte_to_shift(pte_t entry)
 202{
 203        unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4V;
 204        unsigned int shift;
 205
 206        switch (tte_szbits) {
 207        case _PAGE_SZ16GB_4V:
 208                shift = HPAGE_16GB_SHIFT;
 209                break;
 210        case _PAGE_SZ2GB_4V:
 211                shift = HPAGE_2GB_SHIFT;
 212                break;
 213        case _PAGE_SZ256MB_4V:
 214                shift = HPAGE_256MB_SHIFT;
 215                break;
 216        case _PAGE_SZ4MB_4V:
 217                shift = REAL_HPAGE_SHIFT;
 218                break;
 219        case _PAGE_SZ64K_4V:
 220                shift = HPAGE_64K_SHIFT;
 221                break;
 222        default:
 223                shift = PAGE_SHIFT;
 224                break;
 225        }
 226        return shift;
 227}
 228
 229static unsigned int sun4u_huge_tte_to_shift(pte_t entry)
 230{
 231        unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4U;
 232        unsigned int shift;
 233
 234        switch (tte_szbits) {
 235        case _PAGE_SZ256MB_4U:
 236                shift = HPAGE_256MB_SHIFT;
 237                break;
 238        case _PAGE_SZ4MB_4U:
 239                shift = REAL_HPAGE_SHIFT;
 240                break;
 241        case _PAGE_SZ64K_4U:
 242                shift = HPAGE_64K_SHIFT;
 243                break;
 244        default:
 245                shift = PAGE_SHIFT;
 246                break;
 247        }
 248        return shift;
 249}
 250
 251static unsigned int huge_tte_to_shift(pte_t entry)
 252{
 253        unsigned long shift;
 254
 255        if (tlb_type == hypervisor)
 256                shift = sun4v_huge_tte_to_shift(entry);
 257        else
 258                shift = sun4u_huge_tte_to_shift(entry);
 259
 260        if (shift == PAGE_SHIFT)
 261                WARN_ONCE(1, "tto_to_shift: invalid hugepage tte=0x%lx\n",
 262                          pte_val(entry));
 263
 264        return shift;
 265}
 266
 267static unsigned long huge_tte_to_size(pte_t pte)
 268{
 269        unsigned long size = 1UL << huge_tte_to_shift(pte);
 270
 271        if (size == REAL_HPAGE_SIZE)
 272                size = HPAGE_SIZE;
 273        return size;
 274}
 275
 276pte_t *huge_pte_alloc(struct mm_struct *mm,
 277                        unsigned long addr, unsigned long sz)
 278{
 279        pgd_t *pgd;
 280        pud_t *pud;
 281        pmd_t *pmd;
 282
 283        pgd = pgd_offset(mm, addr);
 284        pud = pud_alloc(mm, pgd, addr);
 285        if (!pud)
 286                return NULL;
 287        if (sz >= PUD_SIZE)
 288                return (pte_t *)pud;
 289        pmd = pmd_alloc(mm, pud, addr);
 290        if (!pmd)
 291                return NULL;
 292        if (sz >= PMD_SIZE)
 293                return (pte_t *)pmd;
 294        return pte_alloc_map(mm, pmd, addr);
 295}
 296
 297pte_t *huge_pte_offset(struct mm_struct *mm,
 298                       unsigned long addr, unsigned long sz)
 299{
 300        pgd_t *pgd;
 301        pud_t *pud;
 302        pmd_t *pmd;
 303
 304        pgd = pgd_offset(mm, addr);
 305        if (pgd_none(*pgd))
 306                return NULL;
 307        pud = pud_offset(pgd, addr);
 308        if (pud_none(*pud))
 309                return NULL;
 310        if (is_hugetlb_pud(*pud))
 311                return (pte_t *)pud;
 312        pmd = pmd_offset(pud, addr);
 313        if (pmd_none(*pmd))
 314                return NULL;
 315        if (is_hugetlb_pmd(*pmd))
 316                return (pte_t *)pmd;
 317        return pte_offset_map(pmd, addr);
 318}
 319
 320void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 321                     pte_t *ptep, pte_t entry)
 322{
 323        unsigned int nptes, orig_shift, shift;
 324        unsigned long i, size;
 325        pte_t orig;
 326
 327        size = huge_tte_to_size(entry);
 328
 329        shift = PAGE_SHIFT;
 330        if (size >= PUD_SIZE)
 331                shift = PUD_SHIFT;
 332        else if (size >= PMD_SIZE)
 333                shift = PMD_SHIFT;
 334        else
 335                shift = PAGE_SHIFT;
 336
 337        nptes = size >> shift;
 338
 339        if (!pte_present(*ptep) && pte_present(entry))
 340                mm->context.hugetlb_pte_count += nptes;
 341
 342        addr &= ~(size - 1);
 343        orig = *ptep;
 344        orig_shift = pte_none(orig) ? PAGE_SHIFT : huge_tte_to_shift(orig);
 345
 346        for (i = 0; i < nptes; i++)
 347                ptep[i] = __pte(pte_val(entry) + (i << shift));
 348
 349        maybe_tlb_batch_add(mm, addr, ptep, orig, 0, orig_shift);
 350        /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
 351        if (size == HPAGE_SIZE)
 352                maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0,
 353                                    orig_shift);
 354}
 355
 356pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 357                              pte_t *ptep)
 358{
 359        unsigned int i, nptes, orig_shift, shift;
 360        unsigned long size;
 361        pte_t entry;
 362
 363        entry = *ptep;
 364        size = huge_tte_to_size(entry);
 365
 366        shift = PAGE_SHIFT;
 367        if (size >= PUD_SIZE)
 368                shift = PUD_SHIFT;
 369        else if (size >= PMD_SIZE)
 370                shift = PMD_SHIFT;
 371        else
 372                shift = PAGE_SHIFT;
 373
 374        nptes = size >> shift;
 375        orig_shift = pte_none(entry) ? PAGE_SHIFT : huge_tte_to_shift(entry);
 376
 377        if (pte_present(entry))
 378                mm->context.hugetlb_pte_count -= nptes;
 379
 380        addr &= ~(size - 1);
 381        for (i = 0; i < nptes; i++)
 382                ptep[i] = __pte(0UL);
 383
 384        maybe_tlb_batch_add(mm, addr, ptep, entry, 0, orig_shift);
 385        /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
 386        if (size == HPAGE_SIZE)
 387                maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0,
 388                                    orig_shift);
 389
 390        return entry;
 391}
 392
 393int pmd_huge(pmd_t pmd)
 394{
 395        return !pmd_none(pmd) &&
 396                (pmd_val(pmd) & (_PAGE_VALID|_PAGE_PMD_HUGE)) != _PAGE_VALID;
 397}
 398
 399int pud_huge(pud_t pud)
 400{
 401        return !pud_none(pud) &&
 402                (pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID;
 403}
 404
 405static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
 406                           unsigned long addr)
 407{
 408        pgtable_t token = pmd_pgtable(*pmd);
 409
 410        pmd_clear(pmd);
 411        pte_free_tlb(tlb, token, addr);
 412        mm_dec_nr_ptes(tlb->mm);
 413}
 414
 415static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
 416                                   unsigned long addr, unsigned long end,
 417                                   unsigned long floor, unsigned long ceiling)
 418{
 419        pmd_t *pmd;
 420        unsigned long next;
 421        unsigned long start;
 422
 423        start = addr;
 424        pmd = pmd_offset(pud, addr);
 425        do {
 426                next = pmd_addr_end(addr, end);
 427                if (pmd_none(*pmd))
 428                        continue;
 429                if (is_hugetlb_pmd(*pmd))
 430                        pmd_clear(pmd);
 431                else
 432                        hugetlb_free_pte_range(tlb, pmd, addr);
 433        } while (pmd++, addr = next, addr != end);
 434
 435        start &= PUD_MASK;
 436        if (start < floor)
 437                return;
 438        if (ceiling) {
 439                ceiling &= PUD_MASK;
 440                if (!ceiling)
 441                        return;
 442        }
 443        if (end - 1 > ceiling - 1)
 444                return;
 445
 446        pmd = pmd_offset(pud, start);
 447        pud_clear(pud);
 448        pmd_free_tlb(tlb, pmd, start);
 449        mm_dec_nr_pmds(tlb->mm);
 450}
 451
 452static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 453                                   unsigned long addr, unsigned long end,
 454                                   unsigned long floor, unsigned long ceiling)
 455{
 456        pud_t *pud;
 457        unsigned long next;
 458        unsigned long start;
 459
 460        start = addr;
 461        pud = pud_offset(pgd, addr);
 462        do {
 463                next = pud_addr_end(addr, end);
 464                if (pud_none_or_clear_bad(pud))
 465                        continue;
 466                if (is_hugetlb_pud(*pud))
 467                        pud_clear(pud);
 468                else
 469                        hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
 470                                               ceiling);
 471        } while (pud++, addr = next, addr != end);
 472
 473        start &= PGDIR_MASK;
 474        if (start < floor)
 475                return;
 476        if (ceiling) {
 477                ceiling &= PGDIR_MASK;
 478                if (!ceiling)
 479                        return;
 480        }
 481        if (end - 1 > ceiling - 1)
 482                return;
 483
 484        pud = pud_offset(pgd, start);
 485        pgd_clear(pgd);
 486        pud_free_tlb(tlb, pud, start);
 487        mm_dec_nr_puds(tlb->mm);
 488}
 489
 490void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 491                            unsigned long addr, unsigned long end,
 492                            unsigned long floor, unsigned long ceiling)
 493{
 494        pgd_t *pgd;
 495        unsigned long next;
 496
 497        addr &= PMD_MASK;
 498        if (addr < floor) {
 499                addr += PMD_SIZE;
 500                if (!addr)
 501                        return;
 502        }
 503        if (ceiling) {
 504                ceiling &= PMD_MASK;
 505                if (!ceiling)
 506                        return;
 507        }
 508        if (end - 1 > ceiling - 1)
 509                end -= PMD_SIZE;
 510        if (addr > end - 1)
 511                return;
 512
 513        pgd = pgd_offset(tlb->mm, addr);
 514        do {
 515                next = pgd_addr_end(addr, end);
 516                if (pgd_none_or_clear_bad(pgd))
 517                        continue;
 518                hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
 519        } while (pgd++, addr = next, addr != end);
 520}
 521