linux/arch/arm64/mm/hugetlbpage.c
<<
>>
Prefs
   1/*
   2 * arch/arm64/mm/hugetlbpage.c
   3 *
   4 * Copyright (C) 2013 Linaro Ltd.
   5 *
   6 * Based on arch/x86/mm/hugetlbpage.c.
   7 *
   8 * This program is free software; you can redistribute it and/or modify
   9 * it under the terms of the GNU General Public License version 2 as
  10 * published by the Free Software Foundation.
  11 *
  12 * This program is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 * GNU General Public License for more details.
  16 */
  17
  18#include <linux/init.h>
  19#include <linux/fs.h>
  20#include <linux/mm.h>
  21#include <linux/hugetlb.h>
  22#include <linux/pagemap.h>
  23#include <linux/err.h>
  24#include <linux/sysctl.h>
  25#include <asm/mman.h>
  26#include <asm/tlb.h>
  27#include <asm/tlbflush.h>
  28#include <asm/pgalloc.h>
  29
  30int pmd_huge(pmd_t pmd)
  31{
  32        return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
  33}
  34
  35int pud_huge(pud_t pud)
  36{
  37#ifndef __PAGETABLE_PMD_FOLDED
  38        return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT);
  39#else
  40        return 0;
  41#endif
  42}
  43
  44/*
  45 * Select all bits except the pfn
  46 */
  47static inline pgprot_t pte_pgprot(pte_t pte)
  48{
  49        unsigned long pfn = pte_pfn(pte);
  50
  51        return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
  52}
  53
  54static int find_num_contig(struct mm_struct *mm, unsigned long addr,
  55                           pte_t *ptep, size_t *pgsize)
  56{
  57        pgd_t *pgd = pgd_offset(mm, addr);
  58        pud_t *pud;
  59        pmd_t *pmd;
  60
  61        *pgsize = PAGE_SIZE;
  62        pud = pud_offset(pgd, addr);
  63        pmd = pmd_offset(pud, addr);
  64        if ((pte_t *)pmd == ptep) {
  65                *pgsize = PMD_SIZE;
  66                return CONT_PMDS;
  67        }
  68        return CONT_PTES;
  69}
  70
  71static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
  72{
  73        int contig_ptes = 0;
  74
  75        *pgsize = size;
  76
  77        switch (size) {
  78#ifdef CONFIG_ARM64_4K_PAGES
  79        case PUD_SIZE:
  80#endif
  81        case PMD_SIZE:
  82                contig_ptes = 1;
  83                break;
  84        case CONT_PMD_SIZE:
  85                *pgsize = PMD_SIZE;
  86                contig_ptes = CONT_PMDS;
  87                break;
  88        case CONT_PTE_SIZE:
  89                *pgsize = PAGE_SIZE;
  90                contig_ptes = CONT_PTES;
  91                break;
  92        }
  93
  94        return contig_ptes;
  95}
  96
  97/*
  98 * Changing some bits of contiguous entries requires us to follow a
  99 * Break-Before-Make approach, breaking the whole contiguous set
 100 * before we can change any entries. See ARM DDI 0487A.k_iss10775,
 101 * "Misprogramming of the Contiguous bit", page D4-1762.
 102 *
 103 * This helper performs the break step.
 104 */
 105static pte_t get_clear_flush(struct mm_struct *mm,
 106                             unsigned long addr,
 107                             pte_t *ptep,
 108                             unsigned long pgsize,
 109                             unsigned long ncontig)
 110{
 111        struct vm_area_struct vma = { .vm_mm = mm };
 112        pte_t orig_pte = huge_ptep_get(ptep);
 113        bool valid = pte_valid(orig_pte);
 114        unsigned long i, saddr = addr;
 115
 116        for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
 117                pte_t pte = ptep_get_and_clear(mm, addr, ptep);
 118
 119                /*
 120                 * If HW_AFDBM is enabled, then the HW could turn on
 121                 * the dirty bit for any page in the set, so check
 122                 * them all.  All hugetlb entries are already young.
 123                 */
 124                if (pte_dirty(pte))
 125                        orig_pte = pte_mkdirty(orig_pte);
 126        }
 127
 128        if (valid)
 129                flush_tlb_range(&vma, saddr, addr);
 130        return orig_pte;
 131}
 132
 133/*
 134 * Changing some bits of contiguous entries requires us to follow a
 135 * Break-Before-Make approach, breaking the whole contiguous set
 136 * before we can change any entries. See ARM DDI 0487A.k_iss10775,
 137 * "Misprogramming of the Contiguous bit", page D4-1762.
 138 *
 139 * This helper performs the break step for use cases where the
 140 * original pte is not needed.
 141 */
 142static void clear_flush(struct mm_struct *mm,
 143                             unsigned long addr,
 144                             pte_t *ptep,
 145                             unsigned long pgsize,
 146                             unsigned long ncontig)
 147{
 148        struct vm_area_struct vma = { .vm_mm = mm };
 149        unsigned long i, saddr = addr;
 150
 151        for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
 152                pte_clear(mm, addr, ptep);
 153
 154        flush_tlb_range(&vma, saddr, addr);
 155}
 156
 157void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 158                            pte_t *ptep, pte_t pte)
 159{
 160        size_t pgsize;
 161        int i;
 162        int ncontig;
 163        unsigned long pfn, dpfn;
 164        pgprot_t hugeprot;
 165
 166        /*
 167         * Code needs to be expanded to handle huge swap and migration
 168         * entries. Needed for HUGETLB and MEMORY_FAILURE.
 169         */
 170        WARN_ON(!pte_present(pte));
 171
 172        if (!pte_cont(pte)) {
 173                set_pte_at(mm, addr, ptep, pte);
 174                return;
 175        }
 176
 177        ncontig = find_num_contig(mm, addr, ptep, &pgsize);
 178        pfn = pte_pfn(pte);
 179        dpfn = pgsize >> PAGE_SHIFT;
 180        hugeprot = pte_pgprot(pte);
 181
 182        clear_flush(mm, addr, ptep, pgsize, ncontig);
 183
 184        for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
 185                pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
 186                         pte_val(pfn_pte(pfn, hugeprot)));
 187                set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
 188        }
 189}
 190
 191void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
 192                          pte_t *ptep, pte_t pte, unsigned long sz)
 193{
 194        int i, ncontig;
 195        size_t pgsize;
 196
 197        ncontig = num_contig_ptes(sz, &pgsize);
 198
 199        for (i = 0; i < ncontig; i++, ptep++)
 200                set_pte(ptep, pte);
 201}
 202
 203pte_t *huge_pte_alloc(struct mm_struct *mm,
 204                      unsigned long addr, unsigned long sz)
 205{
 206        pgd_t *pgd;
 207        pud_t *pud;
 208        pte_t *pte = NULL;
 209
 210        pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz);
 211        pgd = pgd_offset(mm, addr);
 212        pud = pud_alloc(mm, pgd, addr);
 213        if (!pud)
 214                return NULL;
 215
 216        if (sz == PUD_SIZE) {
 217                pte = (pte_t *)pud;
 218        } else if (sz == (PAGE_SIZE * CONT_PTES)) {
 219                pmd_t *pmd = pmd_alloc(mm, pud, addr);
 220
 221                WARN_ON(addr & (sz - 1));
 222                /*
 223                 * Note that if this code were ever ported to the
 224                 * 32-bit arm platform then it will cause trouble in
 225                 * the case where CONFIG_HIGHPTE is set, since there
 226                 * will be no pte_unmap() to correspond with this
 227                 * pte_alloc_map().
 228                 */
 229                pte = pte_alloc_map(mm, pmd, addr);
 230        } else if (sz == PMD_SIZE) {
 231                if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
 232                    pud_none(*pud))
 233                        pte = huge_pmd_share(mm, addr, pud);
 234                else
 235                        pte = (pte_t *)pmd_alloc(mm, pud, addr);
 236        } else if (sz == (PMD_SIZE * CONT_PMDS)) {
 237                pmd_t *pmd;
 238
 239                pmd = pmd_alloc(mm, pud, addr);
 240                WARN_ON(addr & (sz - 1));
 241                return (pte_t *)pmd;
 242        }
 243
 244        pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr,
 245               sz, pte, pte_val(*pte));
 246        return pte;
 247}
 248
 249pte_t *huge_pte_offset(struct mm_struct *mm,
 250                       unsigned long addr, unsigned long sz)
 251{
 252        pgd_t *pgd;
 253        pud_t *pud;
 254        pmd_t *pmd;
 255
 256        pgd = pgd_offset(mm, addr);
 257        pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd);
 258        if (!pgd_present(*pgd))
 259                return NULL;
 260
 261        pud = pud_offset(pgd, addr);
 262        if (sz != PUD_SIZE && pud_none(*pud))
 263                return NULL;
 264        /* hugepage or swap? */
 265        if (pud_huge(*pud) || !pud_present(*pud))
 266                return (pte_t *)pud;
 267        /* table; check the next level */
 268
 269        if (sz == CONT_PMD_SIZE)
 270                addr &= CONT_PMD_MASK;
 271
 272        pmd = pmd_offset(pud, addr);
 273        if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
 274            pmd_none(*pmd))
 275                return NULL;
 276        if (pmd_huge(*pmd) || !pmd_present(*pmd))
 277                return (pte_t *)pmd;
 278
 279        if (sz == CONT_PTE_SIZE) {
 280                pte_t *pte = pte_offset_kernel(pmd, (addr & CONT_PTE_MASK));
 281                return pte;
 282        }
 283
 284        return NULL;
 285}
 286
 287pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 288                         struct page *page, int writable)
 289{
 290        size_t pagesize = huge_page_size(hstate_vma(vma));
 291
 292        if (pagesize == CONT_PTE_SIZE) {
 293                entry = pte_mkcont(entry);
 294        } else if (pagesize == CONT_PMD_SIZE) {
 295                entry = pmd_pte(pmd_mkcont(pte_pmd(entry)));
 296        } else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) {
 297                pr_warn("%s: unrecognized huge page size 0x%lx\n",
 298                        __func__, pagesize);
 299        }
 300        return entry;
 301}
 302
 303void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 304                    pte_t *ptep, unsigned long sz)
 305{
 306        int i, ncontig;
 307        size_t pgsize;
 308
 309        ncontig = num_contig_ptes(sz, &pgsize);
 310
 311        for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
 312                pte_clear(mm, addr, ptep);
 313}
 314
 315pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 316                              unsigned long addr, pte_t *ptep)
 317{
 318        int ncontig;
 319        size_t pgsize;
 320        pte_t orig_pte = huge_ptep_get(ptep);
 321
 322        if (!pte_cont(orig_pte))
 323                return ptep_get_and_clear(mm, addr, ptep);
 324
 325        ncontig = find_num_contig(mm, addr, ptep, &pgsize);
 326
 327        return get_clear_flush(mm, addr, ptep, pgsize, ncontig);
 328}
 329
 330int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 331                               unsigned long addr, pte_t *ptep,
 332                               pte_t pte, int dirty)
 333{
 334        int ncontig, i, changed = 0;
 335        size_t pgsize = 0;
 336        unsigned long pfn = pte_pfn(pte), dpfn;
 337        pgprot_t hugeprot;
 338        pte_t orig_pte;
 339
 340        if (!pte_cont(pte))
 341                return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
 342
 343        ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
 344        dpfn = pgsize >> PAGE_SHIFT;
 345
 346        orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
 347        if (!pte_same(orig_pte, pte))
 348                changed = 1;
 349
 350        /* Make sure we don't lose the dirty state */
 351        if (pte_dirty(orig_pte))
 352                pte = pte_mkdirty(pte);
 353
 354        hugeprot = pte_pgprot(pte);
 355        for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
 356                set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));
 357
 358        return changed;
 359}
 360
 361void huge_ptep_set_wrprotect(struct mm_struct *mm,
 362                             unsigned long addr, pte_t *ptep)
 363{
 364        unsigned long pfn, dpfn;
 365        pgprot_t hugeprot;
 366        int ncontig, i;
 367        size_t pgsize;
 368        pte_t pte;
 369
 370        if (!pte_cont(*ptep)) {
 371                ptep_set_wrprotect(mm, addr, ptep);
 372                return;
 373        }
 374
 375        ncontig = find_num_contig(mm, addr, ptep, &pgsize);
 376        dpfn = pgsize >> PAGE_SHIFT;
 377
 378        pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig);
 379        pte = pte_wrprotect(pte);
 380
 381        hugeprot = pte_pgprot(pte);
 382        pfn = pte_pfn(pte);
 383
 384        for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
 385                set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
 386}
 387
 388void huge_ptep_clear_flush(struct vm_area_struct *vma,
 389                           unsigned long addr, pte_t *ptep)
 390{
 391        size_t pgsize;
 392        int ncontig;
 393
 394        if (!pte_cont(*ptep)) {
 395                ptep_clear_flush(vma, addr, ptep);
 396                return;
 397        }
 398
 399        ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
 400        clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
 401}
 402
 403static __init int setup_hugepagesz(char *opt)
 404{
 405        unsigned long ps = memparse(opt, &opt);
 406
 407        switch (ps) {
 408#ifdef CONFIG_ARM64_4K_PAGES
 409        case PUD_SIZE:
 410#endif
 411        case PMD_SIZE * CONT_PMDS:
 412        case PMD_SIZE:
 413        case PAGE_SIZE * CONT_PTES:
 414                hugetlb_add_hstate(ilog2(ps) - PAGE_SHIFT);
 415                return 1;
 416        }
 417
 418        hugetlb_bad_size();
 419        pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
 420        return 0;
 421}
 422__setup("hugepagesz=", setup_hugepagesz);
 423
 424#ifdef CONFIG_ARM64_64K_PAGES
 425static __init int add_default_hugepagesz(void)
 426{
 427        if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
 428                hugetlb_add_hstate(CONT_PTE_SHIFT);
 429        return 0;
 430}
 431arch_initcall(add_default_hugepagesz);
 432#endif
 433