linux/arch/arm64/mm/hugetlbpage.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * arch/arm64/mm/hugetlbpage.c
   4 *
   5 * Copyright (C) 2013 Linaro Ltd.
   6 *
   7 * Based on arch/x86/mm/hugetlbpage.c.
   8 */
   9
  10#include <linux/init.h>
  11#include <linux/fs.h>
  12#include <linux/mm.h>
  13#include <linux/hugetlb.h>
  14#include <linux/pagemap.h>
  15#include <linux/err.h>
  16#include <linux/sysctl.h>
  17#include <asm/mman.h>
  18#include <asm/tlb.h>
  19#include <asm/tlbflush.h>
  20#include <asm/pgalloc.h>
  21
  22#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
  23bool arch_hugetlb_migration_supported(struct hstate *h)
  24{
  25        size_t pagesize = huge_page_size(h);
  26
  27        switch (pagesize) {
  28#ifdef CONFIG_ARM64_4K_PAGES
  29        case PUD_SIZE:
  30#endif
  31        case PMD_SIZE:
  32        case CONT_PMD_SIZE:
  33        case CONT_PTE_SIZE:
  34                return true;
  35        }
  36        pr_warn("%s: unrecognized huge page size 0x%lx\n",
  37                        __func__, pagesize);
  38        return false;
  39}
  40#endif
  41
  42int pmd_huge(pmd_t pmd)
  43{
  44        return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
  45}
  46
  47int pud_huge(pud_t pud)
  48{
  49#ifndef __PAGETABLE_PMD_FOLDED
  50        return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT);
  51#else
  52        return 0;
  53#endif
  54}
  55
  56/*
  57 * Select all bits except the pfn
  58 */
  59static inline pgprot_t pte_pgprot(pte_t pte)
  60{
  61        unsigned long pfn = pte_pfn(pte);
  62
  63        return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
  64}
  65
  66static int find_num_contig(struct mm_struct *mm, unsigned long addr,
  67                           pte_t *ptep, size_t *pgsize)
  68{
  69        pgd_t *pgdp = pgd_offset(mm, addr);
  70        p4d_t *p4dp;
  71        pud_t *pudp;
  72        pmd_t *pmdp;
  73
  74        *pgsize = PAGE_SIZE;
  75        p4dp = p4d_offset(pgdp, addr);
  76        pudp = pud_offset(p4dp, addr);
  77        pmdp = pmd_offset(pudp, addr);
  78        if ((pte_t *)pmdp == ptep) {
  79                *pgsize = PMD_SIZE;
  80                return CONT_PMDS;
  81        }
  82        return CONT_PTES;
  83}
  84
  85static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
  86{
  87        int contig_ptes = 0;
  88
  89        *pgsize = size;
  90
  91        switch (size) {
  92#ifdef CONFIG_ARM64_4K_PAGES
  93        case PUD_SIZE:
  94#endif
  95        case PMD_SIZE:
  96                contig_ptes = 1;
  97                break;
  98        case CONT_PMD_SIZE:
  99                *pgsize = PMD_SIZE;
 100                contig_ptes = CONT_PMDS;
 101                break;
 102        case CONT_PTE_SIZE:
 103                *pgsize = PAGE_SIZE;
 104                contig_ptes = CONT_PTES;
 105                break;
 106        }
 107
 108        return contig_ptes;
 109}
 110
 111/*
 112 * Changing some bits of contiguous entries requires us to follow a
 113 * Break-Before-Make approach, breaking the whole contiguous set
 114 * before we can change any entries. See ARM DDI 0487A.k_iss10775,
 115 * "Misprogramming of the Contiguous bit", page D4-1762.
 116 *
 117 * This helper performs the break step.
 118 */
 119static pte_t get_clear_flush(struct mm_struct *mm,
 120                             unsigned long addr,
 121                             pte_t *ptep,
 122                             unsigned long pgsize,
 123                             unsigned long ncontig)
 124{
 125        pte_t orig_pte = huge_ptep_get(ptep);
 126        bool valid = pte_valid(orig_pte);
 127        unsigned long i, saddr = addr;
 128
 129        for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
 130                pte_t pte = ptep_get_and_clear(mm, addr, ptep);
 131
 132                /*
 133                 * If HW_AFDBM is enabled, then the HW could turn on
 134                 * the dirty or accessed bit for any page in the set,
 135                 * so check them all.
 136                 */
 137                if (pte_dirty(pte))
 138                        orig_pte = pte_mkdirty(orig_pte);
 139
 140                if (pte_young(pte))
 141                        orig_pte = pte_mkyoung(orig_pte);
 142        }
 143
 144        if (valid) {
 145                struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
 146                flush_tlb_range(&vma, saddr, addr);
 147        }
 148        return orig_pte;
 149}
 150
 151/*
 152 * Changing some bits of contiguous entries requires us to follow a
 153 * Break-Before-Make approach, breaking the whole contiguous set
 154 * before we can change any entries. See ARM DDI 0487A.k_iss10775,
 155 * "Misprogramming of the Contiguous bit", page D4-1762.
 156 *
 157 * This helper performs the break step for use cases where the
 158 * original pte is not needed.
 159 */
 160static void clear_flush(struct mm_struct *mm,
 161                             unsigned long addr,
 162                             pte_t *ptep,
 163                             unsigned long pgsize,
 164                             unsigned long ncontig)
 165{
 166        struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
 167        unsigned long i, saddr = addr;
 168
 169        for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
 170                pte_clear(mm, addr, ptep);
 171
 172        flush_tlb_range(&vma, saddr, addr);
 173}
 174
 175void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 176                            pte_t *ptep, pte_t pte)
 177{
 178        size_t pgsize;
 179        int i;
 180        int ncontig;
 181        unsigned long pfn, dpfn;
 182        pgprot_t hugeprot;
 183
 184        /*
 185         * Code needs to be expanded to handle huge swap and migration
 186         * entries. Needed for HUGETLB and MEMORY_FAILURE.
 187         */
 188        WARN_ON(!pte_present(pte));
 189
 190        if (!pte_cont(pte)) {
 191                set_pte_at(mm, addr, ptep, pte);
 192                return;
 193        }
 194
 195        ncontig = find_num_contig(mm, addr, ptep, &pgsize);
 196        pfn = pte_pfn(pte);
 197        dpfn = pgsize >> PAGE_SHIFT;
 198        hugeprot = pte_pgprot(pte);
 199
 200        clear_flush(mm, addr, ptep, pgsize, ncontig);
 201
 202        for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
 203                set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
 204}
 205
 206void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
 207                          pte_t *ptep, pte_t pte, unsigned long sz)
 208{
 209        int i, ncontig;
 210        size_t pgsize;
 211
 212        ncontig = num_contig_ptes(sz, &pgsize);
 213
 214        for (i = 0; i < ncontig; i++, ptep++)
 215                set_pte(ptep, pte);
 216}
 217
 218pte_t *huge_pte_alloc(struct mm_struct *mm,
 219                      unsigned long addr, unsigned long sz)
 220{
 221        pgd_t *pgdp;
 222        p4d_t *p4dp;
 223        pud_t *pudp;
 224        pmd_t *pmdp;
 225        pte_t *ptep = NULL;
 226
 227        pgdp = pgd_offset(mm, addr);
 228        p4dp = p4d_offset(pgdp, addr);
 229        pudp = pud_alloc(mm, p4dp, addr);
 230        if (!pudp)
 231                return NULL;
 232
 233        if (sz == PUD_SIZE) {
 234                ptep = (pte_t *)pudp;
 235        } else if (sz == (CONT_PTE_SIZE)) {
 236                pmdp = pmd_alloc(mm, pudp, addr);
 237                if (!pmdp)
 238                        return NULL;
 239
 240                WARN_ON(addr & (sz - 1));
 241                /*
 242                 * Note that if this code were ever ported to the
 243                 * 32-bit arm platform then it will cause trouble in
 244                 * the case where CONFIG_HIGHPTE is set, since there
 245                 * will be no pte_unmap() to correspond with this
 246                 * pte_alloc_map().
 247                 */
 248                ptep = pte_alloc_map(mm, pmdp, addr);
 249        } else if (sz == PMD_SIZE) {
 250                if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
 251                    pud_none(READ_ONCE(*pudp)))
 252                        ptep = huge_pmd_share(mm, addr, pudp);
 253                else
 254                        ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
 255        } else if (sz == (CONT_PMD_SIZE)) {
 256                pmdp = pmd_alloc(mm, pudp, addr);
 257                WARN_ON(addr & (sz - 1));
 258                return (pte_t *)pmdp;
 259        }
 260
 261        return ptep;
 262}
 263
 264pte_t *huge_pte_offset(struct mm_struct *mm,
 265                       unsigned long addr, unsigned long sz)
 266{
 267        pgd_t *pgdp;
 268        p4d_t *p4dp;
 269        pud_t *pudp, pud;
 270        pmd_t *pmdp, pmd;
 271
 272        pgdp = pgd_offset(mm, addr);
 273        if (!pgd_present(READ_ONCE(*pgdp)))
 274                return NULL;
 275
 276        p4dp = p4d_offset(pgdp, addr);
 277        if (!p4d_present(READ_ONCE(*p4dp)))
 278                return NULL;
 279
 280        pudp = pud_offset(p4dp, addr);
 281        pud = READ_ONCE(*pudp);
 282        if (sz != PUD_SIZE && pud_none(pud))
 283                return NULL;
 284        /* hugepage or swap? */
 285        if (pud_huge(pud) || !pud_present(pud))
 286                return (pte_t *)pudp;
 287        /* table; check the next level */
 288
 289        if (sz == CONT_PMD_SIZE)
 290                addr &= CONT_PMD_MASK;
 291
 292        pmdp = pmd_offset(pudp, addr);
 293        pmd = READ_ONCE(*pmdp);
 294        if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
 295            pmd_none(pmd))
 296                return NULL;
 297        if (pmd_huge(pmd) || !pmd_present(pmd))
 298                return (pte_t *)pmdp;
 299
 300        if (sz == CONT_PTE_SIZE)
 301                return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));
 302
 303        return NULL;
 304}
 305
 306pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 307                         struct page *page, int writable)
 308{
 309        size_t pagesize = huge_page_size(hstate_vma(vma));
 310
 311        if (pagesize == CONT_PTE_SIZE) {
 312                entry = pte_mkcont(entry);
 313        } else if (pagesize == CONT_PMD_SIZE) {
 314                entry = pmd_pte(pmd_mkcont(pte_pmd(entry)));
 315        } else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) {
 316                pr_warn("%s: unrecognized huge page size 0x%lx\n",
 317                        __func__, pagesize);
 318        }
 319        return entry;
 320}
 321
 322void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 323                    pte_t *ptep, unsigned long sz)
 324{
 325        int i, ncontig;
 326        size_t pgsize;
 327
 328        ncontig = num_contig_ptes(sz, &pgsize);
 329
 330        for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
 331                pte_clear(mm, addr, ptep);
 332}
 333
 334pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 335                              unsigned long addr, pte_t *ptep)
 336{
 337        int ncontig;
 338        size_t pgsize;
 339        pte_t orig_pte = huge_ptep_get(ptep);
 340
 341        if (!pte_cont(orig_pte))
 342                return ptep_get_and_clear(mm, addr, ptep);
 343
 344        ncontig = find_num_contig(mm, addr, ptep, &pgsize);
 345
 346        return get_clear_flush(mm, addr, ptep, pgsize, ncontig);
 347}
 348
 349/*
 350 * huge_ptep_set_access_flags will update access flags (dirty, accesssed)
 351 * and write permission.
 352 *
 353 * For a contiguous huge pte range we need to check whether or not write
 354 * permission has to change only on the first pte in the set. Then for
 355 * all the contiguous ptes we need to check whether or not there is a
 356 * discrepancy between dirty or young.
 357 */
 358static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig)
 359{
 360        int i;
 361
 362        if (pte_write(pte) != pte_write(huge_ptep_get(ptep)))
 363                return 1;
 364
 365        for (i = 0; i < ncontig; i++) {
 366                pte_t orig_pte = huge_ptep_get(ptep + i);
 367
 368                if (pte_dirty(pte) != pte_dirty(orig_pte))
 369                        return 1;
 370
 371                if (pte_young(pte) != pte_young(orig_pte))
 372                        return 1;
 373        }
 374
 375        return 0;
 376}
 377
 378int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 379                               unsigned long addr, pte_t *ptep,
 380                               pte_t pte, int dirty)
 381{
 382        int ncontig, i;
 383        size_t pgsize = 0;
 384        unsigned long pfn = pte_pfn(pte), dpfn;
 385        pgprot_t hugeprot;
 386        pte_t orig_pte;
 387
 388        if (!pte_cont(pte))
 389                return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
 390
 391        ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
 392        dpfn = pgsize >> PAGE_SHIFT;
 393
 394        if (!__cont_access_flags_changed(ptep, pte, ncontig))
 395                return 0;
 396
 397        orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
 398
 399        /* Make sure we don't lose the dirty or young state */
 400        if (pte_dirty(orig_pte))
 401                pte = pte_mkdirty(pte);
 402
 403        if (pte_young(orig_pte))
 404                pte = pte_mkyoung(pte);
 405
 406        hugeprot = pte_pgprot(pte);
 407        for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
 408                set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));
 409
 410        return 1;
 411}
 412
 413void huge_ptep_set_wrprotect(struct mm_struct *mm,
 414                             unsigned long addr, pte_t *ptep)
 415{
 416        unsigned long pfn, dpfn;
 417        pgprot_t hugeprot;
 418        int ncontig, i;
 419        size_t pgsize;
 420        pte_t pte;
 421
 422        if (!pte_cont(READ_ONCE(*ptep))) {
 423                ptep_set_wrprotect(mm, addr, ptep);
 424                return;
 425        }
 426
 427        ncontig = find_num_contig(mm, addr, ptep, &pgsize);
 428        dpfn = pgsize >> PAGE_SHIFT;
 429
 430        pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig);
 431        pte = pte_wrprotect(pte);
 432
 433        hugeprot = pte_pgprot(pte);
 434        pfn = pte_pfn(pte);
 435
 436        for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
 437                set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
 438}
 439
 440void huge_ptep_clear_flush(struct vm_area_struct *vma,
 441                           unsigned long addr, pte_t *ptep)
 442{
 443        size_t pgsize;
 444        int ncontig;
 445
 446        if (!pte_cont(READ_ONCE(*ptep))) {
 447                ptep_clear_flush(vma, addr, ptep);
 448                return;
 449        }
 450
 451        ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
 452        clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
 453}
 454
 455static int __init hugetlbpage_init(void)
 456{
 457#ifdef CONFIG_ARM64_4K_PAGES
 458        hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
 459#endif
 460        hugetlb_add_hstate((CONT_PMD_SHIFT + PMD_SHIFT) - PAGE_SHIFT);
 461        hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
 462        hugetlb_add_hstate((CONT_PTE_SHIFT + PAGE_SHIFT) - PAGE_SHIFT);
 463
 464        return 0;
 465}
 466arch_initcall(hugetlbpage_init);
 467
 468bool __init arch_hugetlb_valid_size(unsigned long size)
 469{
 470        switch (size) {
 471#ifdef CONFIG_ARM64_4K_PAGES
 472        case PUD_SIZE:
 473#endif
 474        case CONT_PMD_SIZE:
 475        case PMD_SIZE:
 476        case CONT_PTE_SIZE:
 477                return true;
 478        }
 479
 480        return false;
 481}
 482