linux/arch/arm64/mm/hugetlbpage.c
<<
>>
Prefs
   1/*
   2 * arch/arm64/mm/hugetlbpage.c
   3 *
   4 * Copyright (C) 2013 Linaro Ltd.
   5 *
   6 * Based on arch/x86/mm/hugetlbpage.c.
   7 *
   8 * This program is free software; you can redistribute it and/or modify
   9 * it under the terms of the GNU General Public License version 2 as
  10 * published by the Free Software Foundation.
  11 *
  12 * This program is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 * GNU General Public License for more details.
  16 */
  17
  18#include <linux/init.h>
  19#include <linux/fs.h>
  20#include <linux/mm.h>
  21#include <linux/hugetlb.h>
  22#include <linux/pagemap.h>
  23#include <linux/err.h>
  24#include <linux/sysctl.h>
  25#include <asm/mman.h>
  26#include <asm/tlb.h>
  27#include <asm/tlbflush.h>
  28#include <asm/pgalloc.h>
  29
  30int pmd_huge(pmd_t pmd)
  31{
  32        return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
  33}
  34
  35int pud_huge(pud_t pud)
  36{
  37#ifndef __PAGETABLE_PMD_FOLDED
  38        return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT);
  39#else
  40        return 0;
  41#endif
  42}
  43
  44/*
  45 * Select all bits except the pfn
  46 */
  47static inline pgprot_t pte_pgprot(pte_t pte)
  48{
  49        unsigned long pfn = pte_pfn(pte);
  50
  51        return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
  52}
  53
  54static int find_num_contig(struct mm_struct *mm, unsigned long addr,
  55                           pte_t *ptep, size_t *pgsize)
  56{
  57        pgd_t *pgdp = pgd_offset(mm, addr);
  58        pud_t *pudp;
  59        pmd_t *pmdp;
  60
  61        *pgsize = PAGE_SIZE;
  62        pudp = pud_offset(pgdp, addr);
  63        pmdp = pmd_offset(pudp, addr);
  64        if ((pte_t *)pmdp == ptep) {
  65                *pgsize = PMD_SIZE;
  66                return CONT_PMDS;
  67        }
  68        return CONT_PTES;
  69}
  70
  71static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
  72{
  73        int contig_ptes = 0;
  74
  75        *pgsize = size;
  76
  77        switch (size) {
  78#ifdef CONFIG_ARM64_4K_PAGES
  79        case PUD_SIZE:
  80#endif
  81        case PMD_SIZE:
  82                contig_ptes = 1;
  83                break;
  84        case CONT_PMD_SIZE:
  85                *pgsize = PMD_SIZE;
  86                contig_ptes = CONT_PMDS;
  87                break;
  88        case CONT_PTE_SIZE:
  89                *pgsize = PAGE_SIZE;
  90                contig_ptes = CONT_PTES;
  91                break;
  92        }
  93
  94        return contig_ptes;
  95}
  96
  97/*
  98 * Changing some bits of contiguous entries requires us to follow a
  99 * Break-Before-Make approach, breaking the whole contiguous set
 100 * before we can change any entries. See ARM DDI 0487A.k_iss10775,
 101 * "Misprogramming of the Contiguous bit", page D4-1762.
 102 *
 103 * This helper performs the break step.
 104 */
 105static pte_t get_clear_flush(struct mm_struct *mm,
 106                             unsigned long addr,
 107                             pte_t *ptep,
 108                             unsigned long pgsize,
 109                             unsigned long ncontig)
 110{
 111        pte_t orig_pte = huge_ptep_get(ptep);
 112        bool valid = pte_valid(orig_pte);
 113        unsigned long i, saddr = addr;
 114
 115        for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
 116                pte_t pte = ptep_get_and_clear(mm, addr, ptep);
 117
 118                /*
 119                 * If HW_AFDBM is enabled, then the HW could turn on
 120                 * the dirty or accessed bit for any page in the set,
 121                 * so check them all.
 122                 */
 123                if (pte_dirty(pte))
 124                        orig_pte = pte_mkdirty(orig_pte);
 125
 126                if (pte_young(pte))
 127                        orig_pte = pte_mkyoung(orig_pte);
 128        }
 129
 130        if (valid) {
 131                struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
 132                flush_tlb_range(&vma, saddr, addr);
 133        }
 134        return orig_pte;
 135}
 136
 137/*
 138 * Changing some bits of contiguous entries requires us to follow a
 139 * Break-Before-Make approach, breaking the whole contiguous set
 140 * before we can change any entries. See ARM DDI 0487A.k_iss10775,
 141 * "Misprogramming of the Contiguous bit", page D4-1762.
 142 *
 143 * This helper performs the break step for use cases where the
 144 * original pte is not needed.
 145 */
 146static void clear_flush(struct mm_struct *mm,
 147                             unsigned long addr,
 148                             pte_t *ptep,
 149                             unsigned long pgsize,
 150                             unsigned long ncontig)
 151{
 152        struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
 153        unsigned long i, saddr = addr;
 154
 155        for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
 156                pte_clear(mm, addr, ptep);
 157
 158        flush_tlb_range(&vma, saddr, addr);
 159}
 160
 161void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 162                            pte_t *ptep, pte_t pte)
 163{
 164        size_t pgsize;
 165        int i;
 166        int ncontig;
 167        unsigned long pfn, dpfn;
 168        pgprot_t hugeprot;
 169
 170        /*
 171         * Code needs to be expanded to handle huge swap and migration
 172         * entries. Needed for HUGETLB and MEMORY_FAILURE.
 173         */
 174        WARN_ON(!pte_present(pte));
 175
 176        if (!pte_cont(pte)) {
 177                set_pte_at(mm, addr, ptep, pte);
 178                return;
 179        }
 180
 181        ncontig = find_num_contig(mm, addr, ptep, &pgsize);
 182        pfn = pte_pfn(pte);
 183        dpfn = pgsize >> PAGE_SHIFT;
 184        hugeprot = pte_pgprot(pte);
 185
 186        clear_flush(mm, addr, ptep, pgsize, ncontig);
 187
 188        for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
 189                set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
 190}
 191
 192void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
 193                          pte_t *ptep, pte_t pte, unsigned long sz)
 194{
 195        int i, ncontig;
 196        size_t pgsize;
 197
 198        ncontig = num_contig_ptes(sz, &pgsize);
 199
 200        for (i = 0; i < ncontig; i++, ptep++)
 201                set_pte(ptep, pte);
 202}
 203
 204pte_t *huge_pte_alloc(struct mm_struct *mm,
 205                      unsigned long addr, unsigned long sz)
 206{
 207        pgd_t *pgdp;
 208        pud_t *pudp;
 209        pmd_t *pmdp;
 210        pte_t *ptep = NULL;
 211
 212        pgdp = pgd_offset(mm, addr);
 213        pudp = pud_alloc(mm, pgdp, addr);
 214        if (!pudp)
 215                return NULL;
 216
 217        if (sz == PUD_SIZE) {
 218                ptep = (pte_t *)pudp;
 219        } else if (sz == (CONT_PTE_SIZE)) {
 220                pmdp = pmd_alloc(mm, pudp, addr);
 221
 222                WARN_ON(addr & (sz - 1));
 223                /*
 224                 * Note that if this code were ever ported to the
 225                 * 32-bit arm platform then it will cause trouble in
 226                 * the case where CONFIG_HIGHPTE is set, since there
 227                 * will be no pte_unmap() to correspond with this
 228                 * pte_alloc_map().
 229                 */
 230                ptep = pte_alloc_map(mm, pmdp, addr);
 231        } else if (sz == PMD_SIZE) {
 232                if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
 233                    pud_none(READ_ONCE(*pudp)))
 234                        ptep = huge_pmd_share(mm, addr, pudp);
 235                else
 236                        ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
 237        } else if (sz == (CONT_PMD_SIZE)) {
 238                pmdp = pmd_alloc(mm, pudp, addr);
 239                WARN_ON(addr & (sz - 1));
 240                return (pte_t *)pmdp;
 241        }
 242
 243        return ptep;
 244}
 245
 246pte_t *huge_pte_offset(struct mm_struct *mm,
 247                       unsigned long addr, unsigned long sz)
 248{
 249        pgd_t *pgdp;
 250        pud_t *pudp, pud;
 251        pmd_t *pmdp, pmd;
 252
 253        pgdp = pgd_offset(mm, addr);
 254        if (!pgd_present(READ_ONCE(*pgdp)))
 255                return NULL;
 256
 257        pudp = pud_offset(pgdp, addr);
 258        pud = READ_ONCE(*pudp);
 259        if (sz != PUD_SIZE && pud_none(pud))
 260                return NULL;
 261        /* hugepage or swap? */
 262        if (pud_huge(pud) || !pud_present(pud))
 263                return (pte_t *)pudp;
 264        /* table; check the next level */
 265
 266        if (sz == CONT_PMD_SIZE)
 267                addr &= CONT_PMD_MASK;
 268
 269        pmdp = pmd_offset(pudp, addr);
 270        pmd = READ_ONCE(*pmdp);
 271        if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
 272            pmd_none(pmd))
 273                return NULL;
 274        if (pmd_huge(pmd) || !pmd_present(pmd))
 275                return (pte_t *)pmdp;
 276
 277        if (sz == CONT_PTE_SIZE)
 278                return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));
 279
 280        return NULL;
 281}
 282
 283pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 284                         struct page *page, int writable)
 285{
 286        size_t pagesize = huge_page_size(hstate_vma(vma));
 287
 288        if (pagesize == CONT_PTE_SIZE) {
 289                entry = pte_mkcont(entry);
 290        } else if (pagesize == CONT_PMD_SIZE) {
 291                entry = pmd_pte(pmd_mkcont(pte_pmd(entry)));
 292        } else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) {
 293                pr_warn("%s: unrecognized huge page size 0x%lx\n",
 294                        __func__, pagesize);
 295        }
 296        return entry;
 297}
 298
 299void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 300                    pte_t *ptep, unsigned long sz)
 301{
 302        int i, ncontig;
 303        size_t pgsize;
 304
 305        ncontig = num_contig_ptes(sz, &pgsize);
 306
 307        for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
 308                pte_clear(mm, addr, ptep);
 309}
 310
 311pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 312                              unsigned long addr, pte_t *ptep)
 313{
 314        int ncontig;
 315        size_t pgsize;
 316        pte_t orig_pte = huge_ptep_get(ptep);
 317
 318        if (!pte_cont(orig_pte))
 319                return ptep_get_and_clear(mm, addr, ptep);
 320
 321        ncontig = find_num_contig(mm, addr, ptep, &pgsize);
 322
 323        return get_clear_flush(mm, addr, ptep, pgsize, ncontig);
 324}
 325
 326/*
 327 * huge_ptep_set_access_flags will update access flags (dirty, accesssed)
 328 * and write permission.
 329 *
 330 * For a contiguous huge pte range we need to check whether or not write
 331 * permission has to change only on the first pte in the set. Then for
 332 * all the contiguous ptes we need to check whether or not there is a
 333 * discrepancy between dirty or young.
 334 */
 335static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig)
 336{
 337        int i;
 338
 339        if (pte_write(pte) != pte_write(huge_ptep_get(ptep)))
 340                return 1;
 341
 342        for (i = 0; i < ncontig; i++) {
 343                pte_t orig_pte = huge_ptep_get(ptep + i);
 344
 345                if (pte_dirty(pte) != pte_dirty(orig_pte))
 346                        return 1;
 347
 348                if (pte_young(pte) != pte_young(orig_pte))
 349                        return 1;
 350        }
 351
 352        return 0;
 353}
 354
 355int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 356                               unsigned long addr, pte_t *ptep,
 357                               pte_t pte, int dirty)
 358{
 359        int ncontig, i;
 360        size_t pgsize = 0;
 361        unsigned long pfn = pte_pfn(pte), dpfn;
 362        pgprot_t hugeprot;
 363        pte_t orig_pte;
 364
 365        if (!pte_cont(pte))
 366                return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
 367
 368        ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
 369        dpfn = pgsize >> PAGE_SHIFT;
 370
 371        if (!__cont_access_flags_changed(ptep, pte, ncontig))
 372                return 0;
 373
 374        orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
 375
 376        /* Make sure we don't lose the dirty or young state */
 377        if (pte_dirty(orig_pte))
 378                pte = pte_mkdirty(pte);
 379
 380        if (pte_young(orig_pte))
 381                pte = pte_mkyoung(pte);
 382
 383        hugeprot = pte_pgprot(pte);
 384        for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
 385                set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));
 386
 387        return 1;
 388}
 389
 390void huge_ptep_set_wrprotect(struct mm_struct *mm,
 391                             unsigned long addr, pte_t *ptep)
 392{
 393        unsigned long pfn, dpfn;
 394        pgprot_t hugeprot;
 395        int ncontig, i;
 396        size_t pgsize;
 397        pte_t pte;
 398
 399        if (!pte_cont(READ_ONCE(*ptep))) {
 400                ptep_set_wrprotect(mm, addr, ptep);
 401                return;
 402        }
 403
 404        ncontig = find_num_contig(mm, addr, ptep, &pgsize);
 405        dpfn = pgsize >> PAGE_SHIFT;
 406
 407        pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig);
 408        pte = pte_wrprotect(pte);
 409
 410        hugeprot = pte_pgprot(pte);
 411        pfn = pte_pfn(pte);
 412
 413        for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
 414                set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
 415}
 416
 417void huge_ptep_clear_flush(struct vm_area_struct *vma,
 418                           unsigned long addr, pte_t *ptep)
 419{
 420        size_t pgsize;
 421        int ncontig;
 422
 423        if (!pte_cont(READ_ONCE(*ptep))) {
 424                ptep_clear_flush(vma, addr, ptep);
 425                return;
 426        }
 427
 428        ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
 429        clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
 430}
 431
 432static int __init hugetlbpage_init(void)
 433{
 434#ifdef CONFIG_ARM64_4K_PAGES
 435        hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
 436#endif
 437        hugetlb_add_hstate((CONT_PMD_SHIFT + PMD_SHIFT) - PAGE_SHIFT);
 438        hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
 439        hugetlb_add_hstate((CONT_PTE_SHIFT + PAGE_SHIFT) - PAGE_SHIFT);
 440
 441        return 0;
 442}
 443arch_initcall(hugetlbpage_init);
 444
 445bool __init arch_hugetlb_valid_size(unsigned long size)
 446{
 447        switch (size) {
 448#ifdef CONFIG_ARM64_4K_PAGES
 449        case PUD_SIZE:
 450#endif
 451        case CONT_PMD_SIZE:
 452        case PMD_SIZE:
 453        case CONT_PTE_SIZE:
 454                return true;
 455        }
 456
 457        return false;
 458}
 459