linux/arch/powerpc/include/asm/book3s/64/pgtable.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
   3#define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
   4
   5#include <asm-generic/pgtable-nop4d.h>
   6
   7#ifndef __ASSEMBLY__
   8#include <linux/mmdebug.h>
   9#include <linux/bug.h>
  10#include <linux/sizes.h>
  11#endif
  12
  13/*
  14 * Common bits between hash and Radix page table
  15 */
  16#define _PAGE_BIT_SWAP_TYPE     0
  17
  18#define _PAGE_EXEC              0x00001 /* execute permission */
  19#define _PAGE_WRITE             0x00002 /* write access allowed */
  20#define _PAGE_READ              0x00004 /* read access allowed */
  21#define _PAGE_RW                (_PAGE_READ | _PAGE_WRITE)
  22#define _PAGE_RWX               (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
  23#define _PAGE_PRIVILEGED        0x00008 /* kernel access only */
  24#define _PAGE_SAO               0x00010 /* Strong access order */
  25#define _PAGE_NON_IDEMPOTENT    0x00020 /* non idempotent memory */
  26#define _PAGE_TOLERANT          0x00030 /* tolerant memory, cache inhibited */
  27#define _PAGE_DIRTY             0x00080 /* C: page changed */
  28#define _PAGE_ACCESSED          0x00100 /* R: page referenced */
  29/*
  30 * Software bits
  31 */
  32#define _RPAGE_SW0              0x2000000000000000UL
  33#define _RPAGE_SW1              0x00800
  34#define _RPAGE_SW2              0x00400
  35#define _RPAGE_SW3              0x00200
  36#define _RPAGE_RSV1             0x00040UL
  37
  38#define _RPAGE_PKEY_BIT4        0x1000000000000000UL
  39#define _RPAGE_PKEY_BIT3        0x0800000000000000UL
  40#define _RPAGE_PKEY_BIT2        0x0400000000000000UL
  41#define _RPAGE_PKEY_BIT1        0x0200000000000000UL
  42#define _RPAGE_PKEY_BIT0        0x0100000000000000UL
  43
  44#define _PAGE_PTE               0x4000000000000000UL    /* distinguishes PTEs from pointers */
  45#define _PAGE_PRESENT           0x8000000000000000UL    /* pte contains a translation */
  46/*
  47 * We need to mark a pmd pte invalid while splitting. We can do that by clearing
  48 * the _PAGE_PRESENT bit. But then that will be taken as a swap pte. In order to
  49 * differentiate between two use a SW field when invalidating.
  50 *
  51 * We do that temporary invalidate for regular pte entry in ptep_set_access_flags
  52 *
  53 * This is used only when _PAGE_PRESENT is cleared.
  54 */
  55#define _PAGE_INVALID           _RPAGE_SW0
  56
  57/*
  58 * Top and bottom bits of RPN which can be used by hash
  59 * translation mode, because we expect them to be zero
  60 * otherwise.
  61 */
  62#define _RPAGE_RPN0             0x01000
  63#define _RPAGE_RPN1             0x02000
  64#define _RPAGE_RPN43            0x0080000000000000UL
  65#define _RPAGE_RPN42            0x0040000000000000UL
  66#define _RPAGE_RPN41            0x0020000000000000UL
  67
  68/* Max physical address bit as per radix table */
  69#define _RPAGE_PA_MAX           56
  70
  71/*
  72 * Max physical address bit we will use for now.
  73 *
  74 * This is mostly a hardware limitation and for now Power9 has
  75 * a 51 bit limit.
  76 *
  77 * This is different from the number of physical bit required to address
  78 * the last byte of memory. That is defined by MAX_PHYSMEM_BITS.
  79 * MAX_PHYSMEM_BITS is a linux limitation imposed by the maximum
  80 * number of sections we can support (SECTIONS_SHIFT).
  81 *
  82 * This is different from Radix page table limitation above and
  83 * should always be less than that. The limit is done such that
  84 * we can overload the bits between _RPAGE_PA_MAX and _PAGE_PA_MAX
  85 * for hash linux page table specific bits.
  86 *
  87 * In order to be compatible with future hardware generations we keep
  88 * some offsets and limit this for now to 53
  89 */
  90#define _PAGE_PA_MAX            53
  91
  92#define _PAGE_SOFT_DIRTY        _RPAGE_SW3 /* software: software dirty tracking */
  93#define _PAGE_SPECIAL           _RPAGE_SW2 /* software: special page */
  94#define _PAGE_DEVMAP            _RPAGE_SW1 /* software: ZONE_DEVICE page */
  95
  96/*
  97 * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE
  98 * Instead of fixing all of them, add an alternate define which
  99 * maps CI pte mapping.
 100 */
 101#define _PAGE_NO_CACHE          _PAGE_TOLERANT
 102/*
 103 * We support _RPAGE_PA_MAX bit real address in pte. On the linux side
 104 * we are limited by _PAGE_PA_MAX. Clear everything above _PAGE_PA_MAX
 105 * and every thing below PAGE_SHIFT;
 106 */
 107#define PTE_RPN_MASK    (((1UL << _PAGE_PA_MAX) - 1) & (PAGE_MASK))
 108/*
 109 * set of bits not changed in pmd_modify. Even though we have hash specific bits
 110 * in here, on radix we expect them to be zero.
 111 */
 112#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
 113                         _PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \
 114                         _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
 115/*
 116 * user access blocked by key
 117 */
 118#define _PAGE_KERNEL_RW         (_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY)
 119#define _PAGE_KERNEL_RO          (_PAGE_PRIVILEGED | _PAGE_READ)
 120#define _PAGE_KERNEL_ROX         (_PAGE_PRIVILEGED | _PAGE_READ | _PAGE_EXEC)
 121#define _PAGE_KERNEL_RWX        (_PAGE_PRIVILEGED | _PAGE_DIRTY |       \
 122                                 _PAGE_RW | _PAGE_EXEC)
 123/*
 124 * _PAGE_CHG_MASK masks of bits that are to be preserved across
 125 * pgprot changes
 126 */
 127#define _PAGE_CHG_MASK  (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
 128                         _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE |   \
 129                         _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
 130
 131/*
 132 * We define 2 sets of base prot bits, one for basic pages (ie,
 133 * cacheable kernel and user pages) and one for non cacheable
 134 * pages. We always set _PAGE_COHERENT when SMP is enabled or
 135 * the processor might need it for DMA coherency.
 136 */
 137#define _PAGE_BASE_NC   (_PAGE_PRESENT | _PAGE_ACCESSED)
 138#define _PAGE_BASE      (_PAGE_BASE_NC)
 139
 140/* Permission masks used to generate the __P and __S table,
 141 *
 142 * Note:__pgprot is defined in arch/powerpc/include/asm/page.h
 143 *
 144 * Write permissions imply read permissions for now (we could make write-only
 145 * pages on BookE but we don't bother for now). Execute permission control is
 146 * possible on platforms that define _PAGE_EXEC
 147 */
 148#define PAGE_NONE       __pgprot(_PAGE_BASE | _PAGE_PRIVILEGED)
 149#define PAGE_SHARED     __pgprot(_PAGE_BASE | _PAGE_RW)
 150#define PAGE_SHARED_X   __pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_EXEC)
 151#define PAGE_COPY       __pgprot(_PAGE_BASE | _PAGE_READ)
 152#define PAGE_COPY_X     __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
 153#define PAGE_READONLY   __pgprot(_PAGE_BASE | _PAGE_READ)
 154#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
 155
 156/* Permission masks used for kernel mappings */
 157#define PAGE_KERNEL     __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
 158#define PAGE_KERNEL_NC  __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
 159                                 _PAGE_TOLERANT)
 160#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
 161                                 _PAGE_NON_IDEMPOTENT)
 162#define PAGE_KERNEL_X   __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
 163#define PAGE_KERNEL_RO  __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
 164#define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
 165
 166/*
 167 * Protection used for kernel text. We want the debuggers to be able to
 168 * set breakpoints anywhere, so don't write protect the kernel text
 169 * on platforms where such control is possible.
 170 */
 171#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) || \
 172        defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
 173#define PAGE_KERNEL_TEXT        PAGE_KERNEL_X
 174#else
 175#define PAGE_KERNEL_TEXT        PAGE_KERNEL_ROX
 176#endif
 177
 178/* Make modules code happy. We don't set RO yet */
 179#define PAGE_KERNEL_EXEC        PAGE_KERNEL_X
 180#define PAGE_AGP                (PAGE_KERNEL_NC)
 181
 182#ifndef __ASSEMBLY__
 183/*
 184 * page table defines
 185 */
 186extern unsigned long __pte_index_size;
 187extern unsigned long __pmd_index_size;
 188extern unsigned long __pud_index_size;
 189extern unsigned long __pgd_index_size;
 190extern unsigned long __pud_cache_index;
 191#define PTE_INDEX_SIZE  __pte_index_size
 192#define PMD_INDEX_SIZE  __pmd_index_size
 193#define PUD_INDEX_SIZE  __pud_index_size
 194#define PGD_INDEX_SIZE  __pgd_index_size
 195/* pmd table use page table fragments */
 196#define PMD_CACHE_INDEX  0
 197#define PUD_CACHE_INDEX __pud_cache_index
 198/*
 199 * Because of use of pte fragments and THP, size of page table
 200 * are not always derived out of index size above.
 201 */
 202extern unsigned long __pte_table_size;
 203extern unsigned long __pmd_table_size;
 204extern unsigned long __pud_table_size;
 205extern unsigned long __pgd_table_size;
 206#define PTE_TABLE_SIZE  __pte_table_size
 207#define PMD_TABLE_SIZE  __pmd_table_size
 208#define PUD_TABLE_SIZE  __pud_table_size
 209#define PGD_TABLE_SIZE  __pgd_table_size
 210
 211extern unsigned long __pmd_val_bits;
 212extern unsigned long __pud_val_bits;
 213extern unsigned long __pgd_val_bits;
 214#define PMD_VAL_BITS    __pmd_val_bits
 215#define PUD_VAL_BITS    __pud_val_bits
 216#define PGD_VAL_BITS    __pgd_val_bits
 217
 218extern unsigned long __pte_frag_nr;
 219#define PTE_FRAG_NR __pte_frag_nr
 220extern unsigned long __pte_frag_size_shift;
 221#define PTE_FRAG_SIZE_SHIFT __pte_frag_size_shift
 222#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
 223
 224extern unsigned long __pmd_frag_nr;
 225#define PMD_FRAG_NR __pmd_frag_nr
 226extern unsigned long __pmd_frag_size_shift;
 227#define PMD_FRAG_SIZE_SHIFT __pmd_frag_size_shift
 228#define PMD_FRAG_SIZE (1UL << PMD_FRAG_SIZE_SHIFT)
 229
 230#define PTRS_PER_PTE    (1 << PTE_INDEX_SIZE)
 231#define PTRS_PER_PMD    (1 << PMD_INDEX_SIZE)
 232#define PTRS_PER_PUD    (1 << PUD_INDEX_SIZE)
 233#define PTRS_PER_PGD    (1 << PGD_INDEX_SIZE)
 234
 235#define MAX_PTRS_PER_PGD        (1 << (H_PGD_INDEX_SIZE > RADIX_PGD_INDEX_SIZE ? \
 236                                       H_PGD_INDEX_SIZE : RADIX_PGD_INDEX_SIZE))
 237
 238/* PMD_SHIFT determines what a second-level page table entry can map */
 239#define PMD_SHIFT       (PAGE_SHIFT + PTE_INDEX_SIZE)
 240#define PMD_SIZE        (1UL << PMD_SHIFT)
 241#define PMD_MASK        (~(PMD_SIZE-1))
 242
 243/* PUD_SHIFT determines what a third-level page table entry can map */
 244#define PUD_SHIFT       (PMD_SHIFT + PMD_INDEX_SIZE)
 245#define PUD_SIZE        (1UL << PUD_SHIFT)
 246#define PUD_MASK        (~(PUD_SIZE-1))
 247
 248/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
 249#define PGDIR_SHIFT     (PUD_SHIFT + PUD_INDEX_SIZE)
 250#define PGDIR_SIZE      (1UL << PGDIR_SHIFT)
 251#define PGDIR_MASK      (~(PGDIR_SIZE-1))
 252
 253/* Bits to mask out from a PMD to get to the PTE page */
 254#define PMD_MASKED_BITS         0xc0000000000000ffUL
 255/* Bits to mask out from a PUD to get to the PMD page */
 256#define PUD_MASKED_BITS         0xc0000000000000ffUL
 257/* Bits to mask out from a PGD to get to the PUD page */
 258#define P4D_MASKED_BITS         0xc0000000000000ffUL
 259
 260/*
 261 * Used as an indicator for rcu callback functions
 262 */
 263enum pgtable_index {
 264        PTE_INDEX = 0,
 265        PMD_INDEX,
 266        PUD_INDEX,
 267        PGD_INDEX,
 268        /*
 269         * Below are used with 4k page size and hugetlb
 270         */
 271        HTLB_16M_INDEX,
 272        HTLB_16G_INDEX,
 273};
 274
 275extern unsigned long __vmalloc_start;
 276extern unsigned long __vmalloc_end;
 277#define VMALLOC_START   __vmalloc_start
 278#define VMALLOC_END     __vmalloc_end
 279
 280static inline unsigned int ioremap_max_order(void)
 281{
 282        if (radix_enabled())
 283                return PUD_SHIFT;
 284        return 7 + PAGE_SHIFT; /* default from linux/vmalloc.h */
 285}
 286#define IOREMAP_MAX_ORDER ioremap_max_order()
 287
 288extern unsigned long __kernel_virt_start;
 289extern unsigned long __kernel_io_start;
 290extern unsigned long __kernel_io_end;
 291#define KERN_VIRT_START __kernel_virt_start
 292#define KERN_IO_START  __kernel_io_start
 293#define KERN_IO_END __kernel_io_end
 294
 295extern struct page *vmemmap;
 296extern unsigned long pci_io_base;
 297#endif /* __ASSEMBLY__ */
 298
 299#include <asm/book3s/64/hash.h>
 300#include <asm/book3s/64/radix.h>
 301
 302#if H_MAX_PHYSMEM_BITS > R_MAX_PHYSMEM_BITS
 303#define  MAX_PHYSMEM_BITS       H_MAX_PHYSMEM_BITS
 304#else
 305#define  MAX_PHYSMEM_BITS       R_MAX_PHYSMEM_BITS
 306#endif
 307
 308
 309#ifdef CONFIG_PPC_64K_PAGES
 310#include <asm/book3s/64/pgtable-64k.h>
 311#else
 312#include <asm/book3s/64/pgtable-4k.h>
 313#endif
 314
 315#include <asm/barrier.h>
 316/*
 317 * IO space itself carved into the PIO region (ISA and PHB IO space) and
 318 * the ioremap space
 319 *
 320 *  ISA_IO_BASE = KERN_IO_START, 64K reserved area
 321 *  PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces
 322 * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE
 323 */
 324#define FULL_IO_SIZE    0x80000000ul
 325#define  ISA_IO_BASE    (KERN_IO_START)
 326#define  ISA_IO_END     (KERN_IO_START + 0x10000ul)
 327#define  PHB_IO_BASE    (ISA_IO_END)
 328#define  PHB_IO_END     (KERN_IO_START + FULL_IO_SIZE)
 329#define IOREMAP_BASE    (PHB_IO_END)
 330#define IOREMAP_START   (ioremap_bot)
 331#define IOREMAP_END     (KERN_IO_END - FIXADDR_SIZE)
 332#define FIXADDR_SIZE    SZ_32M
 333
 334/* Advertise special mapping type for AGP */
 335#define HAVE_PAGE_AGP
 336
 337#ifndef __ASSEMBLY__
 338
 339/*
 340 * This is the default implementation of various PTE accessors, it's
 341 * used in all cases except Book3S with 64K pages where we have a
 342 * concept of sub-pages
 343 */
 344#ifndef __real_pte
 345
 346#define __real_pte(e, p, o)             ((real_pte_t){(e)})
 347#define __rpte_to_pte(r)        ((r).pte)
 348#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT)
 349
 350#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift)       \
 351        do {                                                             \
 352                index = 0;                                               \
 353                shift = mmu_psize_defs[psize].shift;                     \
 354
 355#define pte_iterate_hashed_end() } while(0)
 356
 357/*
 358 * We expect this to be called only for user addresses or kernel virtual
 359 * addresses other than the linear mapping.
 360 */
 361#define pte_pagesize_index(mm, addr, pte)       MMU_PAGE_4K
 362
 363#endif /* __real_pte */
 364
 365static inline unsigned long pte_update(struct mm_struct *mm, unsigned long addr,
 366                                       pte_t *ptep, unsigned long clr,
 367                                       unsigned long set, int huge)
 368{
 369        if (radix_enabled())
 370                return radix__pte_update(mm, addr, ptep, clr, set, huge);
 371        return hash__pte_update(mm, addr, ptep, clr, set, huge);
 372}
 373/*
 374 * For hash even if we have _PAGE_ACCESSED = 0, we do a pte_update.
 375 * We currently remove entries from the hashtable regardless of whether
 376 * the entry was young or dirty.
 377 *
 378 * We should be more intelligent about this but for the moment we override
 379 * these functions and force a tlb flush unconditionally
 380 * For radix: H_PAGE_HASHPTE should be zero. Hence we can use the same
 381 * function for both hash and radix.
 382 */
 383static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
 384                                              unsigned long addr, pte_t *ptep)
 385{
 386        unsigned long old;
 387
 388        if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
 389                return 0;
 390        old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
 391        return (old & _PAGE_ACCESSED) != 0;
 392}
 393
 394#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 395#define ptep_test_and_clear_young(__vma, __addr, __ptep)        \
 396({                                                              \
 397        __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
 398})
 399
 400/*
 401 * On Book3S CPUs, clearing the accessed bit without a TLB flush
 402 * doesn't cause data corruption. [ It could cause incorrect
 403 * page aging and the (mistaken) reclaim of hot pages, but the
 404 * chance of that should be relatively low. ]
 405 *
 406 * So as a performance optimization don't flush the TLB when
 407 * clearing the accessed bit, it will eventually be flushed by
 408 * a context switch or a VM operation anyway. [ In the rare
 409 * event of it not getting flushed for a long time the delay
 410 * shouldn't really matter because there's no real memory
 411 * pressure for swapout to react to. ]
 412 */
 413#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
 414#define ptep_clear_flush_young ptep_test_and_clear_young
 415
 416#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
 417#define pmdp_clear_flush_young pmdp_test_and_clear_young
 418
 419static inline int __pte_write(pte_t pte)
 420{
 421        return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE));
 422}
 423
 424#ifdef CONFIG_NUMA_BALANCING
 425#define pte_savedwrite pte_savedwrite
 426static inline bool pte_savedwrite(pte_t pte)
 427{
 428        /*
 429         * Saved write ptes are prot none ptes that doesn't have
 430         * privileged bit sit. We mark prot none as one which has
 431         * present and pviliged bit set and RWX cleared. To mark
 432         * protnone which used to have _PAGE_WRITE set we clear
 433         * the privileged bit.
 434         */
 435        return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED));
 436}
 437#else
 438#define pte_savedwrite pte_savedwrite
 439static inline bool pte_savedwrite(pte_t pte)
 440{
 441        return false;
 442}
 443#endif
 444
 445static inline int pte_write(pte_t pte)
 446{
 447        return __pte_write(pte) || pte_savedwrite(pte);
 448}
 449
 450static inline int pte_read(pte_t pte)
 451{
 452        return !!(pte_raw(pte) & cpu_to_be64(_PAGE_READ));
 453}
 454
 455#define __HAVE_ARCH_PTEP_SET_WRPROTECT
 456static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 457                                      pte_t *ptep)
 458{
 459        if (__pte_write(*ptep))
 460                pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
 461        else if (unlikely(pte_savedwrite(*ptep)))
 462                pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0);
 463}
 464
 465#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
 466static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 467                                           unsigned long addr, pte_t *ptep)
 468{
 469        /*
 470         * We should not find protnone for hugetlb, but this complete the
 471         * interface.
 472         */
 473        if (__pte_write(*ptep))
 474                pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
 475        else if (unlikely(pte_savedwrite(*ptep)))
 476                pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 1);
 477}
 478
 479#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
 480static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 481                                       unsigned long addr, pte_t *ptep)
 482{
 483        unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
 484        return __pte(old);
 485}
 486
 487#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
 488static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 489                                            unsigned long addr,
 490                                            pte_t *ptep, int full)
 491{
 492        if (full && radix_enabled()) {
 493                /*
 494                 * We know that this is a full mm pte clear and
 495                 * hence can be sure there is no parallel set_pte.
 496                 */
 497                return radix__ptep_get_and_clear_full(mm, addr, ptep, full);
 498        }
 499        return ptep_get_and_clear(mm, addr, ptep);
 500}
 501
 502
 503static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
 504                             pte_t * ptep)
 505{
 506        pte_update(mm, addr, ptep, ~0UL, 0, 0);
 507}
 508
 509static inline int pte_dirty(pte_t pte)
 510{
 511        return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DIRTY));
 512}
 513
 514static inline int pte_young(pte_t pte)
 515{
 516        return !!(pte_raw(pte) & cpu_to_be64(_PAGE_ACCESSED));
 517}
 518
 519static inline int pte_special(pte_t pte)
 520{
 521        return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SPECIAL));
 522}
 523
 524static inline bool pte_exec(pte_t pte)
 525{
 526        return !!(pte_raw(pte) & cpu_to_be64(_PAGE_EXEC));
 527}
 528
 529
 530#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
 531static inline bool pte_soft_dirty(pte_t pte)
 532{
 533        return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SOFT_DIRTY));
 534}
 535
 536static inline pte_t pte_mksoft_dirty(pte_t pte)
 537{
 538        return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SOFT_DIRTY));
 539}
 540
 541static inline pte_t pte_clear_soft_dirty(pte_t pte)
 542{
 543        return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SOFT_DIRTY));
 544}
 545#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
 546
 547#ifdef CONFIG_NUMA_BALANCING
 548static inline int pte_protnone(pte_t pte)
 549{
 550        return (pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE | _PAGE_RWX)) ==
 551                cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE);
 552}
 553
 554#define pte_mk_savedwrite pte_mk_savedwrite
 555static inline pte_t pte_mk_savedwrite(pte_t pte)
 556{
 557        /*
 558         * Used by Autonuma subsystem to preserve the write bit
 559         * while marking the pte PROT_NONE. Only allow this
 560         * on PROT_NONE pte
 561         */
 562        VM_BUG_ON((pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_RWX | _PAGE_PRIVILEGED)) !=
 563                  cpu_to_be64(_PAGE_PRESENT | _PAGE_PRIVILEGED));
 564        return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED));
 565}
 566
 567#define pte_clear_savedwrite pte_clear_savedwrite
 568static inline pte_t pte_clear_savedwrite(pte_t pte)
 569{
 570        /*
 571         * Used by KSM subsystem to make a protnone pte readonly.
 572         */
 573        VM_BUG_ON(!pte_protnone(pte));
 574        return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED));
 575}
 576#else
 577#define pte_clear_savedwrite pte_clear_savedwrite
 578static inline pte_t pte_clear_savedwrite(pte_t pte)
 579{
 580        VM_WARN_ON(1);
 581        return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
 582}
 583#endif /* CONFIG_NUMA_BALANCING */
 584
 585static inline bool pte_hw_valid(pte_t pte)
 586{
 587        return (pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE)) ==
 588                cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE);
 589}
 590
 591static inline int pte_present(pte_t pte)
 592{
 593        /*
 594         * A pte is considerent present if _PAGE_PRESENT is set.
 595         * We also need to consider the pte present which is marked
 596         * invalid during ptep_set_access_flags. Hence we look for _PAGE_INVALID
 597         * if we find _PAGE_PRESENT cleared.
 598         */
 599
 600        if (pte_hw_valid(pte))
 601                return true;
 602        return (pte_raw(pte) & cpu_to_be64(_PAGE_INVALID | _PAGE_PTE)) ==
 603                cpu_to_be64(_PAGE_INVALID | _PAGE_PTE);
 604}
 605
 606#ifdef CONFIG_PPC_MEM_KEYS
 607extern bool arch_pte_access_permitted(u64 pte, bool write, bool execute);
 608#else
 609static inline bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
 610{
 611        return true;
 612}
 613#endif /* CONFIG_PPC_MEM_KEYS */
 614
 615static inline bool pte_user(pte_t pte)
 616{
 617        return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
 618}
 619
 620#define pte_access_permitted pte_access_permitted
 621static inline bool pte_access_permitted(pte_t pte, bool write)
 622{
 623        /*
 624         * _PAGE_READ is needed for any access and will be
 625         * cleared for PROT_NONE
 626         */
 627        if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte))
 628                return false;
 629
 630        if (write && !pte_write(pte))
 631                return false;
 632
 633        return arch_pte_access_permitted(pte_val(pte), write, 0);
 634}
 635
 636/*
 637 * Conversion functions: convert a page and protection to a page entry,
 638 * and a page entry and page directory to the page they refer to.
 639 *
 640 * Even if PTEs can be unsigned long long, a PFN is always an unsigned
 641 * long for now.
 642 */
 643static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
 644{
 645        VM_BUG_ON(pfn >> (64 - PAGE_SHIFT));
 646        VM_BUG_ON((pfn << PAGE_SHIFT) & ~PTE_RPN_MASK);
 647
 648        return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot) | _PAGE_PTE);
 649}
 650
 651static inline unsigned long pte_pfn(pte_t pte)
 652{
 653        return (pte_val(pte) & PTE_RPN_MASK) >> PAGE_SHIFT;
 654}
 655
 656/* Generic modifiers for PTE bits */
 657static inline pte_t pte_wrprotect(pte_t pte)
 658{
 659        if (unlikely(pte_savedwrite(pte)))
 660                return pte_clear_savedwrite(pte);
 661        return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
 662}
 663
 664static inline pte_t pte_exprotect(pte_t pte)
 665{
 666        return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_EXEC));
 667}
 668
 669static inline pte_t pte_mkclean(pte_t pte)
 670{
 671        return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_DIRTY));
 672}
 673
 674static inline pte_t pte_mkold(pte_t pte)
 675{
 676        return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_ACCESSED));
 677}
 678
 679static inline pte_t pte_mkexec(pte_t pte)
 680{
 681        return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_EXEC));
 682}
 683
 684static inline pte_t pte_mkwrite(pte_t pte)
 685{
 686        /*
 687         * write implies read, hence set both
 688         */
 689        return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_RW));
 690}
 691
 692static inline pte_t pte_mkdirty(pte_t pte)
 693{
 694        return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_DIRTY | _PAGE_SOFT_DIRTY));
 695}
 696
 697static inline pte_t pte_mkyoung(pte_t pte)
 698{
 699        return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_ACCESSED));
 700}
 701
 702static inline pte_t pte_mkspecial(pte_t pte)
 703{
 704        return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL));
 705}
 706
 707static inline pte_t pte_mkhuge(pte_t pte)
 708{
 709        return pte;
 710}
 711
 712static inline pte_t pte_mkdevmap(pte_t pte)
 713{
 714        return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL | _PAGE_DEVMAP));
 715}
 716
 717static inline pte_t pte_mkprivileged(pte_t pte)
 718{
 719        return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED));
 720}
 721
 722static inline pte_t pte_mkuser(pte_t pte)
 723{
 724        return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED));
 725}
 726
 727/*
 728 * This is potentially called with a pmd as the argument, in which case it's not
 729 * safe to check _PAGE_DEVMAP unless we also confirm that _PAGE_PTE is set.
 730 * That's because the bit we use for _PAGE_DEVMAP is not reserved for software
 731 * use in page directory entries (ie. non-ptes).
 732 */
 733static inline int pte_devmap(pte_t pte)
 734{
 735        u64 mask = cpu_to_be64(_PAGE_DEVMAP | _PAGE_PTE);
 736
 737        return (pte_raw(pte) & mask) == mask;
 738}
 739
 740static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 741{
 742        /* FIXME!! check whether this need to be a conditional */
 743        return __pte_raw((pte_raw(pte) & cpu_to_be64(_PAGE_CHG_MASK)) |
 744                         cpu_to_be64(pgprot_val(newprot)));
 745}
 746
 747/* Encode and de-code a swap entry */
 748#define MAX_SWAPFILES_CHECK() do { \
 749        BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
 750        /*                                                      \
 751         * Don't have overlapping bits with _PAGE_HPTEFLAGS     \
 752         * We filter HPTEFLAGS on set_pte.                      \
 753         */                                                     \
 754        BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
 755        BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY);   \
 756        } while (0)
 757
 758#define SWP_TYPE_BITS 5
 759#define __swp_type(x)           (((x).val >> _PAGE_BIT_SWAP_TYPE) \
 760                                & ((1UL << SWP_TYPE_BITS) - 1))
 761#define __swp_offset(x)         (((x).val & PTE_RPN_MASK) >> PAGE_SHIFT)
 762#define __swp_entry(type, offset)       ((swp_entry_t) { \
 763                                ((type) << _PAGE_BIT_SWAP_TYPE) \
 764                                | (((offset) << PAGE_SHIFT) & PTE_RPN_MASK)})
 765/*
 766 * swp_entry_t must be independent of pte bits. We build a swp_entry_t from
 767 * swap type and offset we get from swap and convert that to pte to find a
 768 * matching pte in linux page table.
 769 * Clear bits not found in swap entries here.
 770 */
 771#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
 772#define __swp_entry_to_pte(x)   __pte((x).val | _PAGE_PTE)
 773#define __pmd_to_swp_entry(pmd) (__pte_to_swp_entry(pmd_pte(pmd)))
 774#define __swp_entry_to_pmd(x)   (pte_pmd(__swp_entry_to_pte(x)))
 775
 776#ifdef CONFIG_MEM_SOFT_DIRTY
 777#define _PAGE_SWP_SOFT_DIRTY   (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
 778#else
 779#define _PAGE_SWP_SOFT_DIRTY    0UL
 780#endif /* CONFIG_MEM_SOFT_DIRTY */
 781
 782#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
 783static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
 784{
 785        return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_SOFT_DIRTY));
 786}
 787
 788static inline bool pte_swp_soft_dirty(pte_t pte)
 789{
 790        return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SWP_SOFT_DIRTY));
 791}
 792
 793static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
 794{
 795        return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SWP_SOFT_DIRTY));
 796}
 797#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
 798
 799static inline bool check_pte_access(unsigned long access, unsigned long ptev)
 800{
 801        /*
 802         * This check for _PAGE_RWX and _PAGE_PRESENT bits
 803         */
 804        if (access & ~ptev)
 805                return false;
 806        /*
 807         * This check for access to privilege space
 808         */
 809        if ((access & _PAGE_PRIVILEGED) != (ptev & _PAGE_PRIVILEGED))
 810                return false;
 811
 812        return true;
 813}
 814/*
 815 * Generic functions with hash/radix callbacks
 816 */
 817
 818static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
 819                                           pte_t *ptep, pte_t entry,
 820                                           unsigned long address,
 821                                           int psize)
 822{
 823        if (radix_enabled())
 824                return radix__ptep_set_access_flags(vma, ptep, entry,
 825                                                    address, psize);
 826        return hash__ptep_set_access_flags(ptep, entry);
 827}
 828
 829#define __HAVE_ARCH_PTE_SAME
 830static inline int pte_same(pte_t pte_a, pte_t pte_b)
 831{
 832        if (radix_enabled())
 833                return radix__pte_same(pte_a, pte_b);
 834        return hash__pte_same(pte_a, pte_b);
 835}
 836
 837static inline int pte_none(pte_t pte)
 838{
 839        if (radix_enabled())
 840                return radix__pte_none(pte);
 841        return hash__pte_none(pte);
 842}
 843
 844static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
 845                                pte_t *ptep, pte_t pte, int percpu)
 846{
 847
 848        VM_WARN_ON(!(pte_raw(pte) & cpu_to_be64(_PAGE_PTE)));
 849        /*
 850         * Keep the _PAGE_PTE added till we are sure we handle _PAGE_PTE
 851         * in all the callers.
 852         */
 853        pte = __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PTE));
 854
 855        if (radix_enabled())
 856                return radix__set_pte_at(mm, addr, ptep, pte, percpu);
 857        return hash__set_pte_at(mm, addr, ptep, pte, percpu);
 858}
 859
 860#define _PAGE_CACHE_CTL (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
 861
 862#define pgprot_noncached pgprot_noncached
 863static inline pgprot_t pgprot_noncached(pgprot_t prot)
 864{
 865        return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
 866                        _PAGE_NON_IDEMPOTENT);
 867}
 868
 869#define pgprot_noncached_wc pgprot_noncached_wc
 870static inline pgprot_t pgprot_noncached_wc(pgprot_t prot)
 871{
 872        return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
 873                        _PAGE_TOLERANT);
 874}
 875
 876#define pgprot_cached pgprot_cached
 877static inline pgprot_t pgprot_cached(pgprot_t prot)
 878{
 879        return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL));
 880}
 881
 882#define pgprot_writecombine pgprot_writecombine
 883static inline pgprot_t pgprot_writecombine(pgprot_t prot)
 884{
 885        return pgprot_noncached_wc(prot);
 886}
 887/*
 888 * check a pte mapping have cache inhibited property
 889 */
 890static inline bool pte_ci(pte_t pte)
 891{
 892        __be64 pte_v = pte_raw(pte);
 893
 894        if (((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_TOLERANT)) ||
 895            ((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_NON_IDEMPOTENT)))
 896                return true;
 897        return false;
 898}
 899
 900static inline void pmd_clear(pmd_t *pmdp)
 901{
 902        if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
 903                /*
 904                 * Don't use this if we can possibly have a hash page table
 905                 * entry mapping this.
 906                 */
 907                WARN_ON((pmd_val(*pmdp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
 908        }
 909        *pmdp = __pmd(0);
 910}
 911
 912static inline int pmd_none(pmd_t pmd)
 913{
 914        return !pmd_raw(pmd);
 915}
 916
 917static inline int pmd_present(pmd_t pmd)
 918{
 919        /*
 920         * A pmd is considerent present if _PAGE_PRESENT is set.
 921         * We also need to consider the pmd present which is marked
 922         * invalid during a split. Hence we look for _PAGE_INVALID
 923         * if we find _PAGE_PRESENT cleared.
 924         */
 925        if (pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID))
 926                return true;
 927
 928        return false;
 929}
 930
 931static inline int pmd_is_serializing(pmd_t pmd)
 932{
 933        /*
 934         * If the pmd is undergoing a split, the _PAGE_PRESENT bit is clear
 935         * and _PAGE_INVALID is set (see pmd_present, pmdp_invalidate).
 936         *
 937         * This condition may also occur when flushing a pmd while flushing
 938         * it (see ptep_modify_prot_start), so callers must ensure this
 939         * case is fine as well.
 940         */
 941        if ((pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)) ==
 942                                                cpu_to_be64(_PAGE_INVALID))
 943                return true;
 944
 945        return false;
 946}
 947
 948static inline int pmd_bad(pmd_t pmd)
 949{
 950        if (radix_enabled())
 951                return radix__pmd_bad(pmd);
 952        return hash__pmd_bad(pmd);
 953}
 954
 955static inline void pud_clear(pud_t *pudp)
 956{
 957        if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
 958                /*
 959                 * Don't use this if we can possibly have a hash page table
 960                 * entry mapping this.
 961                 */
 962                WARN_ON((pud_val(*pudp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
 963        }
 964        *pudp = __pud(0);
 965}
 966
 967static inline int pud_none(pud_t pud)
 968{
 969        return !pud_raw(pud);
 970}
 971
 972static inline int pud_present(pud_t pud)
 973{
 974        return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PRESENT));
 975}
 976
 977extern struct page *pud_page(pud_t pud);
 978extern struct page *pmd_page(pmd_t pmd);
 979static inline pte_t pud_pte(pud_t pud)
 980{
 981        return __pte_raw(pud_raw(pud));
 982}
 983
 984static inline pud_t pte_pud(pte_t pte)
 985{
 986        return __pud_raw(pte_raw(pte));
 987}
 988#define pud_write(pud)          pte_write(pud_pte(pud))
 989
 990static inline int pud_bad(pud_t pud)
 991{
 992        if (radix_enabled())
 993                return radix__pud_bad(pud);
 994        return hash__pud_bad(pud);
 995}
 996
 997#define pud_access_permitted pud_access_permitted
 998static inline bool pud_access_permitted(pud_t pud, bool write)
 999{
1000        return pte_access_permitted(pud_pte(pud), write);
1001}
1002
1003#define __p4d_raw(x)    ((p4d_t) { __pgd_raw(x) })
1004static inline __be64 p4d_raw(p4d_t x)
1005{
1006        return pgd_raw(x.pgd);
1007}
1008
1009#define p4d_write(p4d)          pte_write(p4d_pte(p4d))
1010
1011static inline void p4d_clear(p4d_t *p4dp)
1012{
1013        *p4dp = __p4d(0);
1014}
1015
1016static inline int p4d_none(p4d_t p4d)
1017{
1018        return !p4d_raw(p4d);
1019}
1020
1021static inline int p4d_present(p4d_t p4d)
1022{
1023        return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PRESENT));
1024}
1025
1026static inline pte_t p4d_pte(p4d_t p4d)
1027{
1028        return __pte_raw(p4d_raw(p4d));
1029}
1030
1031static inline p4d_t pte_p4d(pte_t pte)
1032{
1033        return __p4d_raw(pte_raw(pte));
1034}
1035
1036static inline int p4d_bad(p4d_t p4d)
1037{
1038        if (radix_enabled())
1039                return radix__p4d_bad(p4d);
1040        return hash__p4d_bad(p4d);
1041}
1042
1043#define p4d_access_permitted p4d_access_permitted
1044static inline bool p4d_access_permitted(p4d_t p4d, bool write)
1045{
1046        return pte_access_permitted(p4d_pte(p4d), write);
1047}
1048
1049extern struct page *p4d_page(p4d_t p4d);
1050
1051/* Pointers in the page table tree are physical addresses */
1052#define __pgtable_ptr_val(ptr)  __pa(ptr)
1053
1054static inline pud_t *p4d_pgtable(p4d_t p4d)
1055{
1056        return (pud_t *)__va(p4d_val(p4d) & ~P4D_MASKED_BITS);
1057}
1058
1059static inline pmd_t *pud_pgtable(pud_t pud)
1060{
1061        return (pmd_t *)__va(pud_val(pud) & ~PUD_MASKED_BITS);
1062}
1063
1064#define pte_ERROR(e) \
1065        pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
1066#define pmd_ERROR(e) \
1067        pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
1068#define pud_ERROR(e) \
1069        pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
1070#define pgd_ERROR(e) \
1071        pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
1072
1073static inline int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
1074{
1075        if (radix_enabled()) {
1076#if defined(CONFIG_PPC_RADIX_MMU) && defined(DEBUG_VM)
1077                unsigned long page_size = 1 << mmu_psize_defs[mmu_io_psize].shift;
1078                WARN((page_size != PAGE_SIZE), "I/O page size != PAGE_SIZE");
1079#endif
1080                return radix__map_kernel_page(ea, pa, prot, PAGE_SIZE);
1081        }
1082        return hash__map_kernel_page(ea, pa, prot);
1083}
1084
1085static inline int __meminit vmemmap_create_mapping(unsigned long start,
1086                                                   unsigned long page_size,
1087                                                   unsigned long phys)
1088{
1089        if (radix_enabled())
1090                return radix__vmemmap_create_mapping(start, page_size, phys);
1091        return hash__vmemmap_create_mapping(start, page_size, phys);
1092}
1093
1094#ifdef CONFIG_MEMORY_HOTPLUG
1095static inline void vmemmap_remove_mapping(unsigned long start,
1096                                          unsigned long page_size)
1097{
1098        if (radix_enabled())
1099                return radix__vmemmap_remove_mapping(start, page_size);
1100        return hash__vmemmap_remove_mapping(start, page_size);
1101}
1102#endif
1103
1104static inline pte_t pmd_pte(pmd_t pmd)
1105{
1106        return __pte_raw(pmd_raw(pmd));
1107}
1108
1109static inline pmd_t pte_pmd(pte_t pte)
1110{
1111        return __pmd_raw(pte_raw(pte));
1112}
1113
1114static inline pte_t *pmdp_ptep(pmd_t *pmd)
1115{
1116        return (pte_t *)pmd;
1117}
1118#define pmd_pfn(pmd)            pte_pfn(pmd_pte(pmd))
1119#define pmd_dirty(pmd)          pte_dirty(pmd_pte(pmd))
1120#define pmd_young(pmd)          pte_young(pmd_pte(pmd))
1121#define pmd_mkold(pmd)          pte_pmd(pte_mkold(pmd_pte(pmd)))
1122#define pmd_wrprotect(pmd)      pte_pmd(pte_wrprotect(pmd_pte(pmd)))
1123#define pmd_mkdirty(pmd)        pte_pmd(pte_mkdirty(pmd_pte(pmd)))
1124#define pmd_mkclean(pmd)        pte_pmd(pte_mkclean(pmd_pte(pmd)))
1125#define pmd_mkyoung(pmd)        pte_pmd(pte_mkyoung(pmd_pte(pmd)))
1126#define pmd_mkwrite(pmd)        pte_pmd(pte_mkwrite(pmd_pte(pmd)))
1127#define pmd_mk_savedwrite(pmd)  pte_pmd(pte_mk_savedwrite(pmd_pte(pmd)))
1128#define pmd_clear_savedwrite(pmd)       pte_pmd(pte_clear_savedwrite(pmd_pte(pmd)))
1129
1130#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
1131#define pmd_soft_dirty(pmd)    pte_soft_dirty(pmd_pte(pmd))
1132#define pmd_mksoft_dirty(pmd)  pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)))
1133#define pmd_clear_soft_dirty(pmd) pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd)))
1134
1135#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
1136#define pmd_swp_mksoft_dirty(pmd)       pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd)))
1137#define pmd_swp_soft_dirty(pmd)         pte_swp_soft_dirty(pmd_pte(pmd))
1138#define pmd_swp_clear_soft_dirty(pmd)   pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd)))
1139#endif
1140#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
1141
1142#ifdef CONFIG_NUMA_BALANCING
1143static inline int pmd_protnone(pmd_t pmd)
1144{
1145        return pte_protnone(pmd_pte(pmd));
1146}
1147#endif /* CONFIG_NUMA_BALANCING */
1148
1149#define pmd_write(pmd)          pte_write(pmd_pte(pmd))
1150#define __pmd_write(pmd)        __pte_write(pmd_pte(pmd))
1151#define pmd_savedwrite(pmd)     pte_savedwrite(pmd_pte(pmd))
1152
1153#define pmd_access_permitted pmd_access_permitted
1154static inline bool pmd_access_permitted(pmd_t pmd, bool write)
1155{
1156        /*
1157         * pmdp_invalidate sets this combination (which is not caught by
1158         * !pte_present() check in pte_access_permitted), to prevent
1159         * lock-free lookups, as part of the serialize_against_pte_lookup()
1160         * synchronisation.
1161         *
1162         * This also catches the case where the PTE's hardware PRESENT bit is
1163         * cleared while TLB is flushed, which is suboptimal but should not
1164         * be frequent.
1165         */
1166        if (pmd_is_serializing(pmd))
1167                return false;
1168
1169        return pte_access_permitted(pmd_pte(pmd), write);
1170}
1171
1172#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1173extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
1174extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
1175extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
1176extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
1177                       pmd_t *pmdp, pmd_t pmd);
1178static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
1179                                        unsigned long addr, pmd_t *pmd)
1180{
1181}
1182
1183extern int hash__has_transparent_hugepage(void);
1184static inline int has_transparent_hugepage(void)
1185{
1186        if (radix_enabled())
1187                return radix__has_transparent_hugepage();
1188        return hash__has_transparent_hugepage();
1189}
1190#define has_transparent_hugepage has_transparent_hugepage
1191
1192static inline unsigned long
1193pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp,
1194                    unsigned long clr, unsigned long set)
1195{
1196        if (radix_enabled())
1197                return radix__pmd_hugepage_update(mm, addr, pmdp, clr, set);
1198        return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set);
1199}
1200
1201/*
1202 * returns true for pmd migration entries, THP, devmap, hugetlb
1203 * But compile time dependent on THP config
1204 */
1205static inline int pmd_large(pmd_t pmd)
1206{
1207        return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
1208}
1209
1210/*
1211 * For radix we should always find H_PAGE_HASHPTE zero. Hence
1212 * the below will work for radix too
1213 */
1214static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
1215                                              unsigned long addr, pmd_t *pmdp)
1216{
1217        unsigned long old;
1218
1219        if ((pmd_raw(*pmdp) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
1220                return 0;
1221        old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
1222        return ((old & _PAGE_ACCESSED) != 0);
1223}
1224
1225#define __HAVE_ARCH_PMDP_SET_WRPROTECT
1226static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
1227                                      pmd_t *pmdp)
1228{
1229        if (__pmd_write((*pmdp)))
1230                pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
1231        else if (unlikely(pmd_savedwrite(*pmdp)))
1232                pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED);
1233}
1234
1235/*
1236 * Only returns true for a THP. False for pmd migration entry.
1237 * We also need to return true when we come across a pte that
1238 * in between a thp split. While splitting THP, we mark the pmd
1239 * invalid (pmdp_invalidate()) before we set it with pte page
1240 * address. A pmd_trans_huge() check against a pmd entry during that time
1241 * should return true.
1242 * We should not call this on a hugetlb entry. We should check for HugeTLB
1243 * entry using vma->vm_flags
1244 * The page table walk rule is explained in Documentation/vm/transhuge.rst
1245 */
1246static inline int pmd_trans_huge(pmd_t pmd)
1247{
1248        if (!pmd_present(pmd))
1249                return false;
1250
1251        if (radix_enabled())
1252                return radix__pmd_trans_huge(pmd);
1253        return hash__pmd_trans_huge(pmd);
1254}
1255
1256#define __HAVE_ARCH_PMD_SAME
1257static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
1258{
1259        if (radix_enabled())
1260                return radix__pmd_same(pmd_a, pmd_b);
1261        return hash__pmd_same(pmd_a, pmd_b);
1262}
1263
1264static inline pmd_t __pmd_mkhuge(pmd_t pmd)
1265{
1266        if (radix_enabled())
1267                return radix__pmd_mkhuge(pmd);
1268        return hash__pmd_mkhuge(pmd);
1269}
1270
1271/*
1272 * pfn_pmd return a pmd_t that can be used as pmd pte entry.
1273 */
1274static inline pmd_t pmd_mkhuge(pmd_t pmd)
1275{
1276#ifdef CONFIG_DEBUG_VM
1277        if (radix_enabled())
1278                WARN_ON((pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)) == 0);
1279        else
1280                WARN_ON((pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE | H_PAGE_THP_HUGE)) !=
1281                        cpu_to_be64(_PAGE_PTE | H_PAGE_THP_HUGE));
1282#endif
1283        return pmd;
1284}
1285
1286#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
1287extern int pmdp_set_access_flags(struct vm_area_struct *vma,
1288                                 unsigned long address, pmd_t *pmdp,
1289                                 pmd_t entry, int dirty);
1290
1291#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
1292extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
1293                                     unsigned long address, pmd_t *pmdp);
1294
1295#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
1296static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
1297                                            unsigned long addr, pmd_t *pmdp)
1298{
1299        if (radix_enabled())
1300                return radix__pmdp_huge_get_and_clear(mm, addr, pmdp);
1301        return hash__pmdp_huge_get_and_clear(mm, addr, pmdp);
1302}
1303
1304static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
1305                                        unsigned long address, pmd_t *pmdp)
1306{
1307        if (radix_enabled())
1308                return radix__pmdp_collapse_flush(vma, address, pmdp);
1309        return hash__pmdp_collapse_flush(vma, address, pmdp);
1310}
1311#define pmdp_collapse_flush pmdp_collapse_flush
1312
1313#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
1314pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
1315                                   unsigned long addr,
1316                                   pmd_t *pmdp, int full);
1317
1318#define __HAVE_ARCH_PGTABLE_DEPOSIT
1319static inline void pgtable_trans_huge_deposit(struct mm_struct *mm,
1320                                              pmd_t *pmdp, pgtable_t pgtable)
1321{
1322        if (radix_enabled())
1323                return radix__pgtable_trans_huge_deposit(mm, pmdp, pgtable);
1324        return hash__pgtable_trans_huge_deposit(mm, pmdp, pgtable);
1325}
1326
1327#define __HAVE_ARCH_PGTABLE_WITHDRAW
1328static inline pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm,
1329                                                    pmd_t *pmdp)
1330{
1331        if (radix_enabled())
1332                return radix__pgtable_trans_huge_withdraw(mm, pmdp);
1333        return hash__pgtable_trans_huge_withdraw(mm, pmdp);
1334}
1335
1336#define __HAVE_ARCH_PMDP_INVALIDATE
1337extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
1338                             pmd_t *pmdp);
1339
1340#define pmd_move_must_withdraw pmd_move_must_withdraw
1341struct spinlock;
1342extern int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
1343                                  struct spinlock *old_pmd_ptl,
1344                                  struct vm_area_struct *vma);
1345/*
1346 * Hash translation mode use the deposited table to store hash pte
1347 * slot information.
1348 */
1349#define arch_needs_pgtable_deposit arch_needs_pgtable_deposit
1350static inline bool arch_needs_pgtable_deposit(void)
1351{
1352        if (radix_enabled())
1353                return false;
1354        return true;
1355}
1356extern void serialize_against_pte_lookup(struct mm_struct *mm);
1357
1358
1359static inline pmd_t pmd_mkdevmap(pmd_t pmd)
1360{
1361        if (radix_enabled())
1362                return radix__pmd_mkdevmap(pmd);
1363        return hash__pmd_mkdevmap(pmd);
1364}
1365
1366static inline int pmd_devmap(pmd_t pmd)
1367{
1368        return pte_devmap(pmd_pte(pmd));
1369}
1370
1371static inline int pud_devmap(pud_t pud)
1372{
1373        return 0;
1374}
1375
1376static inline int pgd_devmap(pgd_t pgd)
1377{
1378        return 0;
1379}
1380#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1381
1382static inline int pud_pfn(pud_t pud)
1383{
1384        /*
1385         * Currently all calls to pud_pfn() are gated around a pud_devmap()
1386         * check so this should never be used. If it grows another user we
1387         * want to know about it.
1388         */
1389        BUILD_BUG();
1390        return 0;
1391}
1392#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
1393pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
1394void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
1395                             pte_t *, pte_t, pte_t);
1396
1397/*
1398 * Returns true for a R -> RW upgrade of pte
1399 */
1400static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_val)
1401{
1402        if (!(old_val & _PAGE_READ))
1403                return false;
1404
1405        if ((!(old_val & _PAGE_WRITE)) && (new_val & _PAGE_WRITE))
1406                return true;
1407
1408        return false;
1409}
1410
1411/*
1412 * Like pmd_huge() and pmd_large(), but works regardless of config options
1413 */
1414#define pmd_is_leaf pmd_is_leaf
1415#define pmd_leaf pmd_is_leaf
1416static inline bool pmd_is_leaf(pmd_t pmd)
1417{
1418        return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
1419}
1420
1421#define pud_is_leaf pud_is_leaf
1422#define pud_leaf pud_is_leaf
1423static inline bool pud_is_leaf(pud_t pud)
1424{
1425        return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
1426}
1427
1428#define p4d_is_leaf p4d_is_leaf
1429#define p4d_leaf p4d_is_leaf
1430static inline bool p4d_is_leaf(p4d_t p4d)
1431{
1432        return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PTE));
1433}
1434
1435#endif /* __ASSEMBLY__ */
1436#endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
1437