linux/include/asm-generic/pgtable.h
<<
>>
Prefs
   1#ifndef _ASM_GENERIC_PGTABLE_H
   2#define _ASM_GENERIC_PGTABLE_H
   3
   4#include <linux/pfn.h>
   5
   6#ifndef __ASSEMBLY__
   7#ifdef CONFIG_MMU
   8
   9#include <linux/mm_types.h>
  10#include <linux/bug.h>
  11#include <linux/errno.h>
  12
  13#if 4 - defined(__PAGETABLE_PUD_FOLDED) - defined(__PAGETABLE_PMD_FOLDED) != \
  14        CONFIG_PGTABLE_LEVELS
  15#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{PUD,PMD}_FOLDED
  16#endif
  17
  18/*
  19 * On almost all architectures and configurations, 0 can be used as the
  20 * upper ceiling to free_pgtables(): on many architectures it has the same
  21 * effect as using TASK_SIZE.  However, there is one configuration which
  22 * must impose a more careful limit, to avoid freeing kernel pgtables.
  23 */
  24#ifndef USER_PGTABLES_CEILING
  25#define USER_PGTABLES_CEILING   0UL
  26#endif
  27
  28#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
  29extern int ptep_set_access_flags(struct vm_area_struct *vma,
  30                                 unsigned long address, pte_t *ptep,
  31                                 pte_t entry, int dirty);
  32#endif
  33
  34#ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
  35#ifdef CONFIG_TRANSPARENT_HUGEPAGE
  36extern int pmdp_set_access_flags(struct vm_area_struct *vma,
  37                                 unsigned long address, pmd_t *pmdp,
  38                                 pmd_t entry, int dirty);
  39#else
  40static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
  41                                        unsigned long address, pmd_t *pmdp,
  42                                        pmd_t entry, int dirty)
  43{
  44        BUILD_BUG();
  45        return 0;
  46}
  47#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  48#endif
  49
  50#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
  51static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
  52                                            unsigned long address,
  53                                            pte_t *ptep)
  54{
  55        pte_t pte = *ptep;
  56        int r = 1;
  57        if (!pte_young(pte))
  58                r = 0;
  59        else
  60                set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte));
  61        return r;
  62}
  63#endif
  64
  65#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
  66#ifdef CONFIG_TRANSPARENT_HUGEPAGE
  67static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
  68                                            unsigned long address,
  69                                            pmd_t *pmdp)
  70{
  71        pmd_t pmd = *pmdp;
  72        int r = 1;
  73        if (!pmd_young(pmd))
  74                r = 0;
  75        else
  76                set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd));
  77        return r;
  78}
  79#else
  80static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
  81                                            unsigned long address,
  82                                            pmd_t *pmdp)
  83{
  84        BUILD_BUG();
  85        return 0;
  86}
  87#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  88#endif
  89
  90#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
  91int ptep_clear_flush_young(struct vm_area_struct *vma,
  92                           unsigned long address, pte_t *ptep);
  93#endif
  94
  95#ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
  96#ifdef CONFIG_TRANSPARENT_HUGEPAGE
  97extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
  98                                  unsigned long address, pmd_t *pmdp);
  99#else
 100/*
 101 * Despite relevant to THP only, this API is called from generic rmap code
 102 * under PageTransHuge(), hence needs a dummy implementation for !THP
 103 */
 104static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
 105                                         unsigned long address, pmd_t *pmdp)
 106{
 107        BUILD_BUG();
 108        return 0;
 109}
 110#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 111#endif
 112
 113#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
 114static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 115                                       unsigned long address,
 116                                       pte_t *ptep)
 117{
 118        pte_t pte = *ptep;
 119        pte_clear(mm, address, ptep);
 120        return pte;
 121}
 122#endif
 123
 124#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
 125#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 126static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 127                                            unsigned long address,
 128                                            pmd_t *pmdp)
 129{
 130        pmd_t pmd = *pmdp;
 131        pmd_clear(pmdp);
 132        return pmd;
 133}
 134#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 135#endif
 136
 137#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
 138#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 139static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
 140                                            unsigned long address, pmd_t *pmdp,
 141                                            int full)
 142{
 143        return pmdp_huge_get_and_clear(mm, address, pmdp);
 144}
 145#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 146#endif
 147
 148#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
 149static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 150                                            unsigned long address, pte_t *ptep,
 151                                            int full)
 152{
 153        pte_t pte;
 154        pte = ptep_get_and_clear(mm, address, ptep);
 155        return pte;
 156}
 157#endif
 158
 159/*
 160 * Some architectures may be able to avoid expensive synchronization
 161 * primitives when modifications are made to PTE's which are already
 162 * not present, or in the process of an address space destruction.
 163 */
 164#ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL
 165static inline void pte_clear_not_present_full(struct mm_struct *mm,
 166                                              unsigned long address,
 167                                              pte_t *ptep,
 168                                              int full)
 169{
 170        pte_clear(mm, address, ptep);
 171}
 172#endif
 173
 174#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
 175extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
 176                              unsigned long address,
 177                              pte_t *ptep);
 178#endif
 179
 180#ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
 181extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
 182                              unsigned long address,
 183                              pmd_t *pmdp);
 184#endif
 185
 186#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
 187struct mm_struct;
 188static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
 189{
 190        pte_t old_pte = *ptep;
 191        set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
 192}
 193#endif
 194
 195#ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
 196#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 197static inline void pmdp_set_wrprotect(struct mm_struct *mm,
 198                                      unsigned long address, pmd_t *pmdp)
 199{
 200        pmd_t old_pmd = *pmdp;
 201        set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd));
 202}
 203#else
 204static inline void pmdp_set_wrprotect(struct mm_struct *mm,
 205                                      unsigned long address, pmd_t *pmdp)
 206{
 207        BUILD_BUG();
 208}
 209#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 210#endif
 211
 212#ifndef pmdp_collapse_flush
 213#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 214extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
 215                                 unsigned long address, pmd_t *pmdp);
 216#else
 217static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
 218                                        unsigned long address,
 219                                        pmd_t *pmdp)
 220{
 221        BUILD_BUG();
 222        return *pmdp;
 223}
 224#define pmdp_collapse_flush pmdp_collapse_flush
 225#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 226#endif
 227
 228#ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
 229extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
 230                                       pgtable_t pgtable);
 231#endif
 232
 233#ifndef __HAVE_ARCH_PGTABLE_WITHDRAW
 234extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
 235#endif
 236
 237#ifndef __HAVE_ARCH_PMDP_INVALIDATE
 238extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 239                            pmd_t *pmdp);
 240#endif
 241
 242#ifndef __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE
 243static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma,
 244                                           unsigned long address, pmd_t *pmdp)
 245{
 246
 247}
 248#endif
 249
 250#ifndef __HAVE_ARCH_PTE_SAME
 251static inline int pte_same(pte_t pte_a, pte_t pte_b)
 252{
 253        return pte_val(pte_a) == pte_val(pte_b);
 254}
 255#endif
 256
 257#ifndef __HAVE_ARCH_PTE_UNUSED
 258/*
 259 * Some architectures provide facilities to virtualization guests
 260 * so that they can flag allocated pages as unused. This allows the
 261 * host to transparently reclaim unused pages. This function returns
 262 * whether the pte's page is unused.
 263 */
 264static inline int pte_unused(pte_t pte)
 265{
 266        return 0;
 267}
 268#endif
 269
 270#ifndef __HAVE_ARCH_PMD_SAME
 271#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 272static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 273{
 274        return pmd_val(pmd_a) == pmd_val(pmd_b);
 275}
 276#else /* CONFIG_TRANSPARENT_HUGEPAGE */
 277static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 278{
 279        BUILD_BUG();
 280        return 0;
 281}
 282#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 283#endif
 284
 285#ifndef __HAVE_ARCH_PGD_OFFSET_GATE
 286#define pgd_offset_gate(mm, addr)       pgd_offset(mm, addr)
 287#endif
 288
 289#ifndef __HAVE_ARCH_MOVE_PTE
 290#define move_pte(pte, prot, old_addr, new_addr) (pte)
 291#endif
 292
 293#ifndef pte_accessible
 294# define pte_accessible(mm, pte)        ((void)(pte), 1)
 295#endif
 296
 297#ifndef flush_tlb_fix_spurious_fault
 298#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
 299#endif
 300
 301#ifndef pgprot_noncached
 302#define pgprot_noncached(prot)  (prot)
 303#endif
 304
 305#ifndef pgprot_writecombine
 306#define pgprot_writecombine pgprot_noncached
 307#endif
 308
 309#ifndef pgprot_writethrough
 310#define pgprot_writethrough pgprot_noncached
 311#endif
 312
 313#ifndef pgprot_device
 314#define pgprot_device pgprot_noncached
 315#endif
 316
 317#ifndef pgprot_modify
 318#define pgprot_modify pgprot_modify
 319static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 320{
 321        if (pgprot_val(oldprot) == pgprot_val(pgprot_noncached(oldprot)))
 322                newprot = pgprot_noncached(newprot);
 323        if (pgprot_val(oldprot) == pgprot_val(pgprot_writecombine(oldprot)))
 324                newprot = pgprot_writecombine(newprot);
 325        if (pgprot_val(oldprot) == pgprot_val(pgprot_device(oldprot)))
 326                newprot = pgprot_device(newprot);
 327        return newprot;
 328}
 329#endif
 330
 331/*
 332 * When walking page tables, get the address of the next boundary,
 333 * or the end address of the range if that comes earlier.  Although no
 334 * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
 335 */
 336
 337#define pgd_addr_end(addr, end)                                         \
 338({      unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;  \
 339        (__boundary - 1 < (end) - 1)? __boundary: (end);                \
 340})
 341
 342#ifndef pud_addr_end
 343#define pud_addr_end(addr, end)                                         \
 344({      unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK;      \
 345        (__boundary - 1 < (end) - 1)? __boundary: (end);                \
 346})
 347#endif
 348
 349#ifndef pmd_addr_end
 350#define pmd_addr_end(addr, end)                                         \
 351({      unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK;      \
 352        (__boundary - 1 < (end) - 1)? __boundary: (end);                \
 353})
 354#endif
 355
 356/*
 357 * When walking page tables, we usually want to skip any p?d_none entries;
 358 * and any p?d_bad entries - reporting the error before resetting to none.
 359 * Do the tests inline, but report and clear the bad entry in mm/memory.c.
 360 */
 361void pgd_clear_bad(pgd_t *);
 362void pud_clear_bad(pud_t *);
 363void pmd_clear_bad(pmd_t *);
 364
 365static inline int pgd_none_or_clear_bad(pgd_t *pgd)
 366{
 367        if (pgd_none(*pgd))
 368                return 1;
 369        if (unlikely(pgd_bad(*pgd))) {
 370                pgd_clear_bad(pgd);
 371                return 1;
 372        }
 373        return 0;
 374}
 375
 376static inline int pud_none_or_clear_bad(pud_t *pud)
 377{
 378        if (pud_none(*pud))
 379                return 1;
 380        if (unlikely(pud_bad(*pud))) {
 381                pud_clear_bad(pud);
 382                return 1;
 383        }
 384        return 0;
 385}
 386
 387static inline int pmd_none_or_clear_bad(pmd_t *pmd)
 388{
 389        if (pmd_none(*pmd))
 390                return 1;
 391        if (unlikely(pmd_bad(*pmd))) {
 392                pmd_clear_bad(pmd);
 393                return 1;
 394        }
 395        return 0;
 396}
 397
 398static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm,
 399                                             unsigned long addr,
 400                                             pte_t *ptep)
 401{
 402        /*
 403         * Get the current pte state, but zero it out to make it
 404         * non-present, preventing the hardware from asynchronously
 405         * updating it.
 406         */
 407        return ptep_get_and_clear(mm, addr, ptep);
 408}
 409
 410static inline void __ptep_modify_prot_commit(struct mm_struct *mm,
 411                                             unsigned long addr,
 412                                             pte_t *ptep, pte_t pte)
 413{
 414        /*
 415         * The pte is non-present, so there's no hardware state to
 416         * preserve.
 417         */
 418        set_pte_at(mm, addr, ptep, pte);
 419}
 420
 421#ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
 422/*
 423 * Start a pte protection read-modify-write transaction, which
 424 * protects against asynchronous hardware modifications to the pte.
 425 * The intention is not to prevent the hardware from making pte
 426 * updates, but to prevent any updates it may make from being lost.
 427 *
 428 * This does not protect against other software modifications of the
 429 * pte; the appropriate pte lock must be held over the transation.
 430 *
 431 * Note that this interface is intended to be batchable, meaning that
 432 * ptep_modify_prot_commit may not actually update the pte, but merely
 433 * queue the update to be done at some later time.  The update must be
 434 * actually committed before the pte lock is released, however.
 435 */
 436static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
 437                                           unsigned long addr,
 438                                           pte_t *ptep)
 439{
 440        return __ptep_modify_prot_start(mm, addr, ptep);
 441}
 442
 443/*
 444 * Commit an update to a pte, leaving any hardware-controlled bits in
 445 * the PTE unmodified.
 446 */
 447static inline void ptep_modify_prot_commit(struct mm_struct *mm,
 448                                           unsigned long addr,
 449                                           pte_t *ptep, pte_t pte)
 450{
 451        __ptep_modify_prot_commit(mm, addr, ptep, pte);
 452}
 453#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
 454#endif /* CONFIG_MMU */
 455
 456/*
 457 * A facility to provide lazy MMU batching.  This allows PTE updates and
 458 * page invalidations to be delayed until a call to leave lazy MMU mode
 459 * is issued.  Some architectures may benefit from doing this, and it is
 460 * beneficial for both shadow and direct mode hypervisors, which may batch
 461 * the PTE updates which happen during this window.  Note that using this
 462 * interface requires that read hazards be removed from the code.  A read
 463 * hazard could result in the direct mode hypervisor case, since the actual
 464 * write to the page tables may not yet have taken place, so reads though
 465 * a raw PTE pointer after it has been modified are not guaranteed to be
 466 * up to date.  This mode can only be entered and left under the protection of
 467 * the page table locks for all page tables which may be modified.  In the UP
 468 * case, this is required so that preemption is disabled, and in the SMP case,
 469 * it must synchronize the delayed page table writes properly on other CPUs.
 470 */
 471#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
 472#define arch_enter_lazy_mmu_mode()      do {} while (0)
 473#define arch_leave_lazy_mmu_mode()      do {} while (0)
 474#define arch_flush_lazy_mmu_mode()      do {} while (0)
 475#endif
 476
 477/*
 478 * A facility to provide batching of the reload of page tables and
 479 * other process state with the actual context switch code for
 480 * paravirtualized guests.  By convention, only one of the batched
 481 * update (lazy) modes (CPU, MMU) should be active at any given time,
 482 * entry should never be nested, and entry and exits should always be
 483 * paired.  This is for sanity of maintaining and reasoning about the
 484 * kernel code.  In this case, the exit (end of the context switch) is
 485 * in architecture-specific code, and so doesn't need a generic
 486 * definition.
 487 */
 488#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
 489#define arch_start_context_switch(prev) do {} while (0)
 490#endif
 491
 492#ifndef CONFIG_HAVE_ARCH_SOFT_DIRTY
 493static inline int pte_soft_dirty(pte_t pte)
 494{
 495        return 0;
 496}
 497
 498static inline int pmd_soft_dirty(pmd_t pmd)
 499{
 500        return 0;
 501}
 502
 503static inline pte_t pte_mksoft_dirty(pte_t pte)
 504{
 505        return pte;
 506}
 507
 508static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
 509{
 510        return pmd;
 511}
 512
 513static inline pte_t pte_clear_soft_dirty(pte_t pte)
 514{
 515        return pte;
 516}
 517
 518static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
 519{
 520        return pmd;
 521}
 522
 523static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
 524{
 525        return pte;
 526}
 527
 528static inline int pte_swp_soft_dirty(pte_t pte)
 529{
 530        return 0;
 531}
 532
 533static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
 534{
 535        return pte;
 536}
 537#endif
 538
 539#ifndef __HAVE_PFNMAP_TRACKING
 540/*
 541 * Interfaces that can be used by architecture code to keep track of
 542 * memory type of pfn mappings specified by the remap_pfn_range,
 543 * vm_insert_pfn.
 544 */
 545
 546/*
 547 * track_pfn_remap is called when a _new_ pfn mapping is being established
 548 * by remap_pfn_range() for physical range indicated by pfn and size.
 549 */
 550static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
 551                                  unsigned long pfn, unsigned long addr,
 552                                  unsigned long size)
 553{
 554        return 0;
 555}
 556
 557/*
 558 * track_pfn_insert is called when a _new_ single pfn is established
 559 * by vm_insert_pfn().
 560 */
 561static inline int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
 562                                   pfn_t pfn)
 563{
 564        return 0;
 565}
 566
 567/*
 568 * track_pfn_copy is called when vma that is covering the pfnmap gets
 569 * copied through copy_page_range().
 570 */
 571static inline int track_pfn_copy(struct vm_area_struct *vma)
 572{
 573        return 0;
 574}
 575
 576/*
 577 * untrack_pfn is called while unmapping a pfnmap for a region.
 578 * untrack can be called for a specific region indicated by pfn and size or
 579 * can be for the entire vma (in which case pfn, size are zero).
 580 */
 581static inline void untrack_pfn(struct vm_area_struct *vma,
 582                               unsigned long pfn, unsigned long size)
 583{
 584}
 585
 586/*
 587 * untrack_pfn_moved is called while mremapping a pfnmap for a new region.
 588 */
 589static inline void untrack_pfn_moved(struct vm_area_struct *vma)
 590{
 591}
 592#else
 593extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
 594                           unsigned long pfn, unsigned long addr,
 595                           unsigned long size);
 596extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
 597                            pfn_t pfn);
 598extern int track_pfn_copy(struct vm_area_struct *vma);
 599extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
 600                        unsigned long size);
 601extern void untrack_pfn_moved(struct vm_area_struct *vma);
 602#endif
 603
 604#ifdef __HAVE_COLOR_ZERO_PAGE
 605static inline int is_zero_pfn(unsigned long pfn)
 606{
 607        extern unsigned long zero_pfn;
 608        unsigned long offset_from_zero_pfn = pfn - zero_pfn;
 609        return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
 610}
 611
 612#define my_zero_pfn(addr)       page_to_pfn(ZERO_PAGE(addr))
 613
 614#else
 615static inline int is_zero_pfn(unsigned long pfn)
 616{
 617        extern unsigned long zero_pfn;
 618        return pfn == zero_pfn;
 619}
 620
 621static inline unsigned long my_zero_pfn(unsigned long addr)
 622{
 623        extern unsigned long zero_pfn;
 624        return zero_pfn;
 625}
 626#endif
 627
 628#ifdef CONFIG_MMU
 629
 630#ifndef CONFIG_TRANSPARENT_HUGEPAGE
 631static inline int pmd_trans_huge(pmd_t pmd)
 632{
 633        return 0;
 634}
 635#ifndef __HAVE_ARCH_PMD_WRITE
 636static inline int pmd_write(pmd_t pmd)
 637{
 638        BUG();
 639        return 0;
 640}
 641#endif /* __HAVE_ARCH_PMD_WRITE */
 642#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 643
 644#ifndef pmd_read_atomic
 645static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
 646{
 647        /*
 648         * Depend on compiler for an atomic pmd read. NOTE: this is
 649         * only going to work, if the pmdval_t isn't larger than
 650         * an unsigned long.
 651         */
 652        return *pmdp;
 653}
 654#endif
 655
 656#ifndef pmd_move_must_withdraw
 657static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl,
 658                                         spinlock_t *old_pmd_ptl)
 659{
 660        /*
 661         * With split pmd lock we also need to move preallocated
 662         * PTE page table if new_pmd is on different PMD page table.
 663         */
 664        return new_pmd_ptl != old_pmd_ptl;
 665}
 666#endif
 667
 668/*
 669 * This function is meant to be used by sites walking pagetables with
 670 * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
 671 * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd
 672 * into a null pmd and the transhuge page fault can convert a null pmd
 673 * into an hugepmd or into a regular pmd (if the hugepage allocation
 674 * fails). While holding the mmap_sem in read mode the pmd becomes
 675 * stable and stops changing under us only if it's not null and not a
 676 * transhuge pmd. When those races occurs and this function makes a
 677 * difference vs the standard pmd_none_or_clear_bad, the result is
 678 * undefined so behaving like if the pmd was none is safe (because it
 679 * can return none anyway). The compiler level barrier() is critically
 680 * important to compute the two checks atomically on the same pmdval.
 681 *
 682 * For 32bit kernels with a 64bit large pmd_t this automatically takes
 683 * care of reading the pmd atomically to avoid SMP race conditions
 684 * against pmd_populate() when the mmap_sem is hold for reading by the
 685 * caller (a special atomic read not done by "gcc" as in the generic
 686 * version above, is also needed when THP is disabled because the page
 687 * fault can populate the pmd from under us).
 688 */
 689static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
 690{
 691        pmd_t pmdval = pmd_read_atomic(pmd);
 692        /*
 693         * The barrier will stabilize the pmdval in a register or on
 694         * the stack so that it will stop changing under the code.
 695         *
 696         * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE,
 697         * pmd_read_atomic is allowed to return a not atomic pmdval
 698         * (for example pointing to an hugepage that has never been
 699         * mapped in the pmd). The below checks will only care about
 700         * the low part of the pmd with 32bit PAE x86 anyway, with the
 701         * exception of pmd_none(). So the important thing is that if
 702         * the low part of the pmd is found null, the high part will
 703         * be also null or the pmd_none() check below would be
 704         * confused.
 705         */
 706#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 707        barrier();
 708#endif
 709        if (pmd_none(pmdval) || pmd_trans_huge(pmdval))
 710                return 1;
 711        if (unlikely(pmd_bad(pmdval))) {
 712                pmd_clear_bad(pmd);
 713                return 1;
 714        }
 715        return 0;
 716}
 717
 718/*
 719 * This is a noop if Transparent Hugepage Support is not built into
 720 * the kernel. Otherwise it is equivalent to
 721 * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in
 722 * places that already verified the pmd is not none and they want to
 723 * walk ptes while holding the mmap sem in read mode (write mode don't
 724 * need this). If THP is not enabled, the pmd can't go away under the
 725 * code even if MADV_DONTNEED runs, but if THP is enabled we need to
 726 * run a pmd_trans_unstable before walking the ptes after
 727 * split_huge_page_pmd returns (because it may have run when the pmd
 728 * become null, but then a page fault can map in a THP and not a
 729 * regular page).
 730 */
 731static inline int pmd_trans_unstable(pmd_t *pmd)
 732{
 733#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 734        return pmd_none_or_trans_huge_or_clear_bad(pmd);
 735#else
 736        return 0;
 737#endif
 738}
 739
 740#ifndef CONFIG_NUMA_BALANCING
 741/*
 742 * Technically a PTE can be PROTNONE even when not doing NUMA balancing but
 743 * the only case the kernel cares is for NUMA balancing and is only ever set
 744 * when the VMA is accessible. For PROT_NONE VMAs, the PTEs are not marked
 745 * _PAGE_PROTNONE so by by default, implement the helper as "always no". It
 746 * is the responsibility of the caller to distinguish between PROT_NONE
 747 * protections and NUMA hinting fault protections.
 748 */
 749static inline int pte_protnone(pte_t pte)
 750{
 751        return 0;
 752}
 753
 754static inline int pmd_protnone(pmd_t pmd)
 755{
 756        return 0;
 757}
 758#endif /* CONFIG_NUMA_BALANCING */
 759
 760#endif /* CONFIG_MMU */
 761
 762#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
 763int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
 764int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
 765int pud_clear_huge(pud_t *pud);
 766int pmd_clear_huge(pmd_t *pmd);
 767#else   /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
 768static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
 769{
 770        return 0;
 771}
 772static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
 773{
 774        return 0;
 775}
 776static inline int pud_clear_huge(pud_t *pud)
 777{
 778        return 0;
 779}
 780static inline int pmd_clear_huge(pmd_t *pmd)
 781{
 782        return 0;
 783}
 784#endif  /* CONFIG_HAVE_ARCH_HUGE_VMAP */
 785
 786#ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
 787#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 788/*
 789 * ARCHes with special requirements for evicting THP backing TLB entries can
 790 * implement this. Otherwise also, it can help optimize normal TLB flush in
 791 * THP regime. stock flush_tlb_range() typically has optimization to nuke the
 792 * entire TLB TLB if flush span is greater than a threshold, which will
 793 * likely be true for a single huge page. Thus a single thp flush will
 794 * invalidate the entire TLB which is not desitable.
 795 * e.g. see arch/arc: flush_pmd_tlb_range
 796 */
 797#define flush_pmd_tlb_range(vma, addr, end)     flush_tlb_range(vma, addr, end)
 798#else
 799#define flush_pmd_tlb_range(vma, addr, end)     BUILD_BUG()
 800#endif
 801#endif
 802
 803#endif /* !__ASSEMBLY__ */
 804
 805#ifndef io_remap_pfn_range
 806#define io_remap_pfn_range remap_pfn_range
 807#endif
 808
 809#endif /* _ASM_GENERIC_PGTABLE_H */
 810