linux/arch/arm64/mm/hugetlbpage.c
<<
>>
Prefs
   1/*
   2 * arch/arm64/mm/hugetlbpage.c
   3 *
   4 * Copyright (C) 2013 Linaro Ltd.
   5 *
   6 * Based on arch/x86/mm/hugetlbpage.c.
   7 *
   8 * This program is free software; you can redistribute it and/or modify
   9 * it under the terms of the GNU General Public License version 2 as
  10 * published by the Free Software Foundation.
  11 *
  12 * This program is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 * GNU General Public License for more details.
  16 */
  17
  18#include <linux/init.h>
  19#include <linux/fs.h>
  20#include <linux/mm.h>
  21#include <linux/hugetlb.h>
  22#include <linux/pagemap.h>
  23#include <linux/err.h>
  24#include <linux/sysctl.h>
  25#include <asm/mman.h>
  26#include <asm/tlb.h>
  27#include <asm/tlbflush.h>
  28#include <asm/pgalloc.h>
  29
  30int pmd_huge(pmd_t pmd)
  31{
  32        return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
  33}
  34
  35int pud_huge(pud_t pud)
  36{
  37#ifndef __PAGETABLE_PMD_FOLDED
  38        return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT);
  39#else
  40        return 0;
  41#endif
  42}
  43
  44static int find_num_contig(struct mm_struct *mm, unsigned long addr,
  45                           pte_t *ptep, pte_t pte, size_t *pgsize)
  46{
  47        pgd_t *pgd = pgd_offset(mm, addr);
  48        pud_t *pud;
  49        pmd_t *pmd;
  50
  51        *pgsize = PAGE_SIZE;
  52        if (!pte_cont(pte))
  53                return 1;
  54        if (!pgd_present(*pgd)) {
  55                VM_BUG_ON(!pgd_present(*pgd));
  56                return 1;
  57        }
  58        pud = pud_offset(pgd, addr);
  59        if (!pud_present(*pud)) {
  60                VM_BUG_ON(!pud_present(*pud));
  61                return 1;
  62        }
  63        pmd = pmd_offset(pud, addr);
  64        if (!pmd_present(*pmd)) {
  65                VM_BUG_ON(!pmd_present(*pmd));
  66                return 1;
  67        }
  68        if ((pte_t *)pmd == ptep) {
  69                *pgsize = PMD_SIZE;
  70                return CONT_PMDS;
  71        }
  72        return CONT_PTES;
  73}
  74
  75void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
  76                            pte_t *ptep, pte_t pte)
  77{
  78        size_t pgsize;
  79        int i;
  80        int ncontig = find_num_contig(mm, addr, ptep, pte, &pgsize);
  81        unsigned long pfn;
  82        pgprot_t hugeprot;
  83
  84        if (ncontig == 1) {
  85                set_pte_at(mm, addr, ptep, pte);
  86                return;
  87        }
  88
  89        pfn = pte_pfn(pte);
  90        hugeprot = __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
  91        for (i = 0; i < ncontig; i++) {
  92                pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
  93                         pte_val(pfn_pte(pfn, hugeprot)));
  94                set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
  95                ptep++;
  96                pfn += pgsize >> PAGE_SHIFT;
  97                addr += pgsize;
  98        }
  99}
 100
 101pte_t *huge_pte_alloc(struct mm_struct *mm,
 102                      unsigned long addr, unsigned long sz)
 103{
 104        pgd_t *pgd;
 105        pud_t *pud;
 106        pte_t *pte = NULL;
 107
 108        pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz);
 109        pgd = pgd_offset(mm, addr);
 110        pud = pud_alloc(mm, pgd, addr);
 111        if (!pud)
 112                return NULL;
 113
 114        if (sz == PUD_SIZE) {
 115                pte = (pte_t *)pud;
 116        } else if (sz == (PAGE_SIZE * CONT_PTES)) {
 117                pmd_t *pmd = pmd_alloc(mm, pud, addr);
 118
 119                WARN_ON(addr & (sz - 1));
 120                /*
 121                 * Note that if this code were ever ported to the
 122                 * 32-bit arm platform then it will cause trouble in
 123                 * the case where CONFIG_HIGHPTE is set, since there
 124                 * will be no pte_unmap() to correspond with this
 125                 * pte_alloc_map().
 126                 */
 127                pte = pte_alloc_map(mm, pmd, addr);
 128        } else if (sz == PMD_SIZE) {
 129                if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
 130                    pud_none(*pud))
 131                        pte = huge_pmd_share(mm, addr, pud);
 132                else
 133                        pte = (pte_t *)pmd_alloc(mm, pud, addr);
 134        } else if (sz == (PMD_SIZE * CONT_PMDS)) {
 135                pmd_t *pmd;
 136
 137                pmd = pmd_alloc(mm, pud, addr);
 138                WARN_ON(addr & (sz - 1));
 139                return (pte_t *)pmd;
 140        }
 141
 142        pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr,
 143               sz, pte, pte_val(*pte));
 144        return pte;
 145}
 146
 147pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 148{
 149        pgd_t *pgd;
 150        pud_t *pud;
 151        pmd_t *pmd = NULL;
 152        pte_t *pte = NULL;
 153
 154        pgd = pgd_offset(mm, addr);
 155        pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd);
 156        if (!pgd_present(*pgd))
 157                return NULL;
 158        pud = pud_offset(pgd, addr);
 159        if (!pud_present(*pud))
 160                return NULL;
 161
 162        if (pud_huge(*pud))
 163                return (pte_t *)pud;
 164        pmd = pmd_offset(pud, addr);
 165        if (!pmd_present(*pmd))
 166                return NULL;
 167
 168        if (pte_cont(pmd_pte(*pmd))) {
 169                pmd = pmd_offset(
 170                        pud, (addr & CONT_PMD_MASK));
 171                return (pte_t *)pmd;
 172        }
 173        if (pmd_huge(*pmd))
 174                return (pte_t *)pmd;
 175        pte = pte_offset_kernel(pmd, addr);
 176        if (pte_present(*pte) && pte_cont(*pte)) {
 177                pte = pte_offset_kernel(
 178                        pmd, (addr & CONT_PTE_MASK));
 179                return pte;
 180        }
 181        return NULL;
 182}
 183
 184pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 185                         struct page *page, int writable)
 186{
 187        size_t pagesize = huge_page_size(hstate_vma(vma));
 188
 189        if (pagesize == CONT_PTE_SIZE) {
 190                entry = pte_mkcont(entry);
 191        } else if (pagesize == CONT_PMD_SIZE) {
 192                entry = pmd_pte(pmd_mkcont(pte_pmd(entry)));
 193        } else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) {
 194                pr_warn("%s: unrecognized huge page size 0x%lx\n",
 195                        __func__, pagesize);
 196        }
 197        return entry;
 198}
 199
 200pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 201                              unsigned long addr, pte_t *ptep)
 202{
 203        pte_t pte;
 204
 205        if (pte_cont(*ptep)) {
 206                int ncontig, i;
 207                size_t pgsize;
 208                pte_t *cpte;
 209                bool is_dirty = false;
 210
 211                cpte = huge_pte_offset(mm, addr);
 212                ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize);
 213                /* save the 1st pte to return */
 214                pte = ptep_get_and_clear(mm, addr, cpte);
 215                for (i = 1; i < ncontig; ++i) {
 216                        /*
 217                         * If HW_AFDBM is enabled, then the HW could
 218                         * turn on the dirty bit for any of the page
 219                         * in the set, so check them all.
 220                         */
 221                        ++cpte;
 222                        if (pte_dirty(ptep_get_and_clear(mm, addr, cpte)))
 223                                is_dirty = true;
 224                }
 225                if (is_dirty)
 226                        return pte_mkdirty(pte);
 227                else
 228                        return pte;
 229        } else {
 230                return ptep_get_and_clear(mm, addr, ptep);
 231        }
 232}
 233
 234int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 235                               unsigned long addr, pte_t *ptep,
 236                               pte_t pte, int dirty)
 237{
 238        pte_t *cpte;
 239
 240        if (pte_cont(pte)) {
 241                int ncontig, i, changed = 0;
 242                size_t pgsize = 0;
 243                unsigned long pfn = pte_pfn(pte);
 244                /* Select all bits except the pfn */
 245                pgprot_t hugeprot =
 246                        __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^
 247                                 pte_val(pte));
 248
 249                cpte = huge_pte_offset(vma->vm_mm, addr);
 250                pfn = pte_pfn(*cpte);
 251                ncontig = find_num_contig(vma->vm_mm, addr, cpte,
 252                                          *cpte, &pgsize);
 253                for (i = 0; i < ncontig; ++i, ++cpte) {
 254                        changed = ptep_set_access_flags(vma, addr, cpte,
 255                                                        pfn_pte(pfn,
 256                                                                hugeprot),
 257                                                        dirty);
 258                        pfn += pgsize >> PAGE_SHIFT;
 259                }
 260                return changed;
 261        } else {
 262                return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
 263        }
 264}
 265
 266void huge_ptep_set_wrprotect(struct mm_struct *mm,
 267                             unsigned long addr, pte_t *ptep)
 268{
 269        if (pte_cont(*ptep)) {
 270                int ncontig, i;
 271                pte_t *cpte;
 272                size_t pgsize = 0;
 273
 274                cpte = huge_pte_offset(mm, addr);
 275                ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize);
 276                for (i = 0; i < ncontig; ++i, ++cpte)
 277                        ptep_set_wrprotect(mm, addr, cpte);
 278        } else {
 279                ptep_set_wrprotect(mm, addr, ptep);
 280        }
 281}
 282
 283void huge_ptep_clear_flush(struct vm_area_struct *vma,
 284                           unsigned long addr, pte_t *ptep)
 285{
 286        if (pte_cont(*ptep)) {
 287                int ncontig, i;
 288                pte_t *cpte;
 289                size_t pgsize = 0;
 290
 291                cpte = huge_pte_offset(vma->vm_mm, addr);
 292                ncontig = find_num_contig(vma->vm_mm, addr, cpte,
 293                                          *cpte, &pgsize);
 294                for (i = 0; i < ncontig; ++i, ++cpte)
 295                        ptep_clear_flush(vma, addr, cpte);
 296        } else {
 297                ptep_clear_flush(vma, addr, ptep);
 298        }
 299}
 300
 301static __init int setup_hugepagesz(char *opt)
 302{
 303        unsigned long ps = memparse(opt, &opt);
 304
 305        if (ps == PMD_SIZE) {
 306                hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
 307        } else if (ps == PUD_SIZE) {
 308                hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
 309        } else {
 310                pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
 311                return 0;
 312        }
 313        return 1;
 314}
 315__setup("hugepagesz=", setup_hugepagesz);
 316