linux/arch/x86/include/asm/pgtable.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef _ASM_X86_PGTABLE_H
   3#define _ASM_X86_PGTABLE_H
   4
   5#include <linux/mem_encrypt.h>
   6#include <asm/page.h>
   7#include <asm/pgtable_types.h>
   8
   9/*
  10 * Macro to mark a page protection value as UC-
  11 */
  12#define pgprot_noncached(prot)                                          \
  13        ((boot_cpu_data.x86 > 3)                                        \
  14         ? (__pgprot(pgprot_val(prot) |                                 \
  15                     cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS)))     \
  16         : (prot))
  17
  18/*
  19 * Macros to add or remove encryption attribute
  20 */
  21#define pgprot_encrypted(prot)  __pgprot(__sme_set(pgprot_val(prot)))
  22#define pgprot_decrypted(prot)  __pgprot(__sme_clr(pgprot_val(prot)))
  23
  24#ifndef __ASSEMBLY__
  25#include <asm/x86_init.h>
  26#include <asm/fpu/xstate.h>
  27#include <asm/fpu/api.h>
  28#include <asm-generic/pgtable_uffd.h>
  29
  30extern pgd_t early_top_pgt[PTRS_PER_PGD];
  31int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
  32
  33void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm);
  34void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
  35                                   bool user);
  36void ptdump_walk_pgd_level_checkwx(void);
  37void ptdump_walk_user_pgd_level_checkwx(void);
  38
  39#ifdef CONFIG_DEBUG_WX
  40#define debug_checkwx()         ptdump_walk_pgd_level_checkwx()
  41#define debug_checkwx_user()    ptdump_walk_user_pgd_level_checkwx()
  42#else
  43#define debug_checkwx()         do { } while (0)
  44#define debug_checkwx_user()    do { } while (0)
  45#endif
  46
  47/*
  48 * ZERO_PAGE is a global shared page that is always zero: used
  49 * for zero-mapped memory areas etc..
  50 */
  51extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
  52        __visible;
  53#define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page))
  54
  55extern spinlock_t pgd_lock;
  56extern struct list_head pgd_list;
  57
  58extern struct mm_struct *pgd_page_get_mm(struct page *page);
  59
  60extern pmdval_t early_pmd_flags;
  61
  62#ifdef CONFIG_PARAVIRT_XXL
  63#include <asm/paravirt.h>
  64#else  /* !CONFIG_PARAVIRT_XXL */
  65#define set_pte(ptep, pte)              native_set_pte(ptep, pte)
  66#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
  67
  68#define set_pte_atomic(ptep, pte)                                       \
  69        native_set_pte_atomic(ptep, pte)
  70
  71#define set_pmd(pmdp, pmd)              native_set_pmd(pmdp, pmd)
  72
  73#ifndef __PAGETABLE_P4D_FOLDED
  74#define set_pgd(pgdp, pgd)              native_set_pgd(pgdp, pgd)
  75#define pgd_clear(pgd)                  (pgtable_l5_enabled() ? native_pgd_clear(pgd) : 0)
  76#endif
  77
  78#ifndef set_p4d
  79# define set_p4d(p4dp, p4d)             native_set_p4d(p4dp, p4d)
  80#endif
  81
  82#ifndef __PAGETABLE_PUD_FOLDED
  83#define p4d_clear(p4d)                  native_p4d_clear(p4d)
  84#endif
  85
  86#ifndef set_pud
  87# define set_pud(pudp, pud)             native_set_pud(pudp, pud)
  88#endif
  89
  90#ifndef __PAGETABLE_PUD_FOLDED
  91#define pud_clear(pud)                  native_pud_clear(pud)
  92#endif
  93
  94#define pte_clear(mm, addr, ptep)       native_pte_clear(mm, addr, ptep)
  95#define pmd_clear(pmd)                  native_pmd_clear(pmd)
  96
  97#define pgd_val(x)      native_pgd_val(x)
  98#define __pgd(x)        native_make_pgd(x)
  99
 100#ifndef __PAGETABLE_P4D_FOLDED
 101#define p4d_val(x)      native_p4d_val(x)
 102#define __p4d(x)        native_make_p4d(x)
 103#endif
 104
 105#ifndef __PAGETABLE_PUD_FOLDED
 106#define pud_val(x)      native_pud_val(x)
 107#define __pud(x)        native_make_pud(x)
 108#endif
 109
 110#ifndef __PAGETABLE_PMD_FOLDED
 111#define pmd_val(x)      native_pmd_val(x)
 112#define __pmd(x)        native_make_pmd(x)
 113#endif
 114
 115#define pte_val(x)      native_pte_val(x)
 116#define __pte(x)        native_make_pte(x)
 117
 118#define arch_end_context_switch(prev)   do {} while(0)
 119#endif  /* CONFIG_PARAVIRT_XXL */
 120
 121/*
 122 * The following only work if pte_present() is true.
 123 * Undefined behaviour if not..
 124 */
 125static inline int pte_dirty(pte_t pte)
 126{
 127        return pte_flags(pte) & _PAGE_DIRTY;
 128}
 129
 130
 131static inline u32 read_pkru(void)
 132{
 133        if (boot_cpu_has(X86_FEATURE_OSPKE))
 134                return rdpkru();
 135        return 0;
 136}
 137
 138static inline void write_pkru(u32 pkru)
 139{
 140        struct pkru_state *pk;
 141
 142        if (!boot_cpu_has(X86_FEATURE_OSPKE))
 143                return;
 144
 145        pk = get_xsave_addr(&current->thread.fpu.state.xsave, XFEATURE_PKRU);
 146
 147        /*
 148         * The PKRU value in xstate needs to be in sync with the value that is
 149         * written to the CPU. The FPU restore on return to userland would
 150         * otherwise load the previous value again.
 151         */
 152        fpregs_lock();
 153        if (pk)
 154                pk->pkru = pkru;
 155        __write_pkru(pkru);
 156        fpregs_unlock();
 157}
 158
 159static inline int pte_young(pte_t pte)
 160{
 161        return pte_flags(pte) & _PAGE_ACCESSED;
 162}
 163
 164static inline int pmd_dirty(pmd_t pmd)
 165{
 166        return pmd_flags(pmd) & _PAGE_DIRTY;
 167}
 168
 169static inline int pmd_young(pmd_t pmd)
 170{
 171        return pmd_flags(pmd) & _PAGE_ACCESSED;
 172}
 173
 174static inline int pud_dirty(pud_t pud)
 175{
 176        return pud_flags(pud) & _PAGE_DIRTY;
 177}
 178
 179static inline int pud_young(pud_t pud)
 180{
 181        return pud_flags(pud) & _PAGE_ACCESSED;
 182}
 183
 184static inline int pte_write(pte_t pte)
 185{
 186        return pte_flags(pte) & _PAGE_RW;
 187}
 188
 189static inline int pte_huge(pte_t pte)
 190{
 191        return pte_flags(pte) & _PAGE_PSE;
 192}
 193
 194static inline int pte_global(pte_t pte)
 195{
 196        return pte_flags(pte) & _PAGE_GLOBAL;
 197}
 198
 199static inline int pte_exec(pte_t pte)
 200{
 201        return !(pte_flags(pte) & _PAGE_NX);
 202}
 203
 204static inline int pte_special(pte_t pte)
 205{
 206        return pte_flags(pte) & _PAGE_SPECIAL;
 207}
 208
 209/* Entries that were set to PROT_NONE are inverted */
 210
 211static inline u64 protnone_mask(u64 val);
 212
 213static inline unsigned long pte_pfn(pte_t pte)
 214{
 215        phys_addr_t pfn = pte_val(pte);
 216        pfn ^= protnone_mask(pfn);
 217        return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
 218}
 219
 220static inline unsigned long pmd_pfn(pmd_t pmd)
 221{
 222        phys_addr_t pfn = pmd_val(pmd);
 223        pfn ^= protnone_mask(pfn);
 224        return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
 225}
 226
 227static inline unsigned long pud_pfn(pud_t pud)
 228{
 229        phys_addr_t pfn = pud_val(pud);
 230        pfn ^= protnone_mask(pfn);
 231        return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT;
 232}
 233
 234static inline unsigned long p4d_pfn(p4d_t p4d)
 235{
 236        return (p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT;
 237}
 238
 239static inline unsigned long pgd_pfn(pgd_t pgd)
 240{
 241        return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT;
 242}
 243
 244#define p4d_leaf        p4d_large
 245static inline int p4d_large(p4d_t p4d)
 246{
 247        /* No 512 GiB pages yet */
 248        return 0;
 249}
 250
 251#define pte_page(pte)   pfn_to_page(pte_pfn(pte))
 252
 253#define pmd_leaf        pmd_large
 254static inline int pmd_large(pmd_t pte)
 255{
 256        return pmd_flags(pte) & _PAGE_PSE;
 257}
 258
 259#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 260static inline int pmd_trans_huge(pmd_t pmd)
 261{
 262        return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
 263}
 264
 265#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 266static inline int pud_trans_huge(pud_t pud)
 267{
 268        return (pud_val(pud) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
 269}
 270#endif
 271
 272#define has_transparent_hugepage has_transparent_hugepage
 273static inline int has_transparent_hugepage(void)
 274{
 275        return boot_cpu_has(X86_FEATURE_PSE);
 276}
 277
 278#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
 279static inline int pmd_devmap(pmd_t pmd)
 280{
 281        return !!(pmd_val(pmd) & _PAGE_DEVMAP);
 282}
 283
 284#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 285static inline int pud_devmap(pud_t pud)
 286{
 287        return !!(pud_val(pud) & _PAGE_DEVMAP);
 288}
 289#else
 290static inline int pud_devmap(pud_t pud)
 291{
 292        return 0;
 293}
 294#endif
 295
 296static inline int pgd_devmap(pgd_t pgd)
 297{
 298        return 0;
 299}
 300#endif
 301#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 302
 303static inline pte_t pte_set_flags(pte_t pte, pteval_t set)
 304{
 305        pteval_t v = native_pte_val(pte);
 306
 307        return native_make_pte(v | set);
 308}
 309
 310static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
 311{
 312        pteval_t v = native_pte_val(pte);
 313
 314        return native_make_pte(v & ~clear);
 315}
 316
 317#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
 318static inline int pte_uffd_wp(pte_t pte)
 319{
 320        return pte_flags(pte) & _PAGE_UFFD_WP;
 321}
 322
 323static inline pte_t pte_mkuffd_wp(pte_t pte)
 324{
 325        return pte_set_flags(pte, _PAGE_UFFD_WP);
 326}
 327
 328static inline pte_t pte_clear_uffd_wp(pte_t pte)
 329{
 330        return pte_clear_flags(pte, _PAGE_UFFD_WP);
 331}
 332#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
 333
 334static inline pte_t pte_mkclean(pte_t pte)
 335{
 336        return pte_clear_flags(pte, _PAGE_DIRTY);
 337}
 338
 339static inline pte_t pte_mkold(pte_t pte)
 340{
 341        return pte_clear_flags(pte, _PAGE_ACCESSED);
 342}
 343
 344static inline pte_t pte_wrprotect(pte_t pte)
 345{
 346        return pte_clear_flags(pte, _PAGE_RW);
 347}
 348
 349static inline pte_t pte_mkexec(pte_t pte)
 350{
 351        return pte_clear_flags(pte, _PAGE_NX);
 352}
 353
 354static inline pte_t pte_mkdirty(pte_t pte)
 355{
 356        return pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
 357}
 358
 359static inline pte_t pte_mkyoung(pte_t pte)
 360{
 361        return pte_set_flags(pte, _PAGE_ACCESSED);
 362}
 363
 364static inline pte_t pte_mkwrite(pte_t pte)
 365{
 366        return pte_set_flags(pte, _PAGE_RW);
 367}
 368
 369static inline pte_t pte_mkhuge(pte_t pte)
 370{
 371        return pte_set_flags(pte, _PAGE_PSE);
 372}
 373
 374static inline pte_t pte_clrhuge(pte_t pte)
 375{
 376        return pte_clear_flags(pte, _PAGE_PSE);
 377}
 378
 379static inline pte_t pte_mkglobal(pte_t pte)
 380{
 381        return pte_set_flags(pte, _PAGE_GLOBAL);
 382}
 383
 384static inline pte_t pte_clrglobal(pte_t pte)
 385{
 386        return pte_clear_flags(pte, _PAGE_GLOBAL);
 387}
 388
 389static inline pte_t pte_mkspecial(pte_t pte)
 390{
 391        return pte_set_flags(pte, _PAGE_SPECIAL);
 392}
 393
 394static inline pte_t pte_mkdevmap(pte_t pte)
 395{
 396        return pte_set_flags(pte, _PAGE_SPECIAL|_PAGE_DEVMAP);
 397}
 398
 399static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
 400{
 401        pmdval_t v = native_pmd_val(pmd);
 402
 403        return native_make_pmd(v | set);
 404}
 405
 406static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
 407{
 408        pmdval_t v = native_pmd_val(pmd);
 409
 410        return native_make_pmd(v & ~clear);
 411}
 412
 413#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
 414static inline int pmd_uffd_wp(pmd_t pmd)
 415{
 416        return pmd_flags(pmd) & _PAGE_UFFD_WP;
 417}
 418
 419static inline pmd_t pmd_mkuffd_wp(pmd_t pmd)
 420{
 421        return pmd_set_flags(pmd, _PAGE_UFFD_WP);
 422}
 423
 424static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd)
 425{
 426        return pmd_clear_flags(pmd, _PAGE_UFFD_WP);
 427}
 428#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
 429
 430static inline pmd_t pmd_mkold(pmd_t pmd)
 431{
 432        return pmd_clear_flags(pmd, _PAGE_ACCESSED);
 433}
 434
 435static inline pmd_t pmd_mkclean(pmd_t pmd)
 436{
 437        return pmd_clear_flags(pmd, _PAGE_DIRTY);
 438}
 439
 440static inline pmd_t pmd_wrprotect(pmd_t pmd)
 441{
 442        return pmd_clear_flags(pmd, _PAGE_RW);
 443}
 444
 445static inline pmd_t pmd_mkdirty(pmd_t pmd)
 446{
 447        return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
 448}
 449
 450static inline pmd_t pmd_mkdevmap(pmd_t pmd)
 451{
 452        return pmd_set_flags(pmd, _PAGE_DEVMAP);
 453}
 454
 455static inline pmd_t pmd_mkhuge(pmd_t pmd)
 456{
 457        return pmd_set_flags(pmd, _PAGE_PSE);
 458}
 459
 460static inline pmd_t pmd_mkyoung(pmd_t pmd)
 461{
 462        return pmd_set_flags(pmd, _PAGE_ACCESSED);
 463}
 464
 465static inline pmd_t pmd_mkwrite(pmd_t pmd)
 466{
 467        return pmd_set_flags(pmd, _PAGE_RW);
 468}
 469
 470static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
 471{
 472        pudval_t v = native_pud_val(pud);
 473
 474        return native_make_pud(v | set);
 475}
 476
 477static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
 478{
 479        pudval_t v = native_pud_val(pud);
 480
 481        return native_make_pud(v & ~clear);
 482}
 483
 484static inline pud_t pud_mkold(pud_t pud)
 485{
 486        return pud_clear_flags(pud, _PAGE_ACCESSED);
 487}
 488
 489static inline pud_t pud_mkclean(pud_t pud)
 490{
 491        return pud_clear_flags(pud, _PAGE_DIRTY);
 492}
 493
 494static inline pud_t pud_wrprotect(pud_t pud)
 495{
 496        return pud_clear_flags(pud, _PAGE_RW);
 497}
 498
 499static inline pud_t pud_mkdirty(pud_t pud)
 500{
 501        return pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
 502}
 503
 504static inline pud_t pud_mkdevmap(pud_t pud)
 505{
 506        return pud_set_flags(pud, _PAGE_DEVMAP);
 507}
 508
 509static inline pud_t pud_mkhuge(pud_t pud)
 510{
 511        return pud_set_flags(pud, _PAGE_PSE);
 512}
 513
 514static inline pud_t pud_mkyoung(pud_t pud)
 515{
 516        return pud_set_flags(pud, _PAGE_ACCESSED);
 517}
 518
 519static inline pud_t pud_mkwrite(pud_t pud)
 520{
 521        return pud_set_flags(pud, _PAGE_RW);
 522}
 523
 524#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
 525static inline int pte_soft_dirty(pte_t pte)
 526{
 527        return pte_flags(pte) & _PAGE_SOFT_DIRTY;
 528}
 529
 530static inline int pmd_soft_dirty(pmd_t pmd)
 531{
 532        return pmd_flags(pmd) & _PAGE_SOFT_DIRTY;
 533}
 534
 535static inline int pud_soft_dirty(pud_t pud)
 536{
 537        return pud_flags(pud) & _PAGE_SOFT_DIRTY;
 538}
 539
 540static inline pte_t pte_mksoft_dirty(pte_t pte)
 541{
 542        return pte_set_flags(pte, _PAGE_SOFT_DIRTY);
 543}
 544
 545static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
 546{
 547        return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
 548}
 549
 550static inline pud_t pud_mksoft_dirty(pud_t pud)
 551{
 552        return pud_set_flags(pud, _PAGE_SOFT_DIRTY);
 553}
 554
 555static inline pte_t pte_clear_soft_dirty(pte_t pte)
 556{
 557        return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
 558}
 559
 560static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
 561{
 562        return pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY);
 563}
 564
 565static inline pud_t pud_clear_soft_dirty(pud_t pud)
 566{
 567        return pud_clear_flags(pud, _PAGE_SOFT_DIRTY);
 568}
 569
 570#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
 571
 572/*
 573 * Mask out unsupported bits in a present pgprot.  Non-present pgprots
 574 * can use those bits for other purposes, so leave them be.
 575 */
 576static inline pgprotval_t massage_pgprot(pgprot_t pgprot)
 577{
 578        pgprotval_t protval = pgprot_val(pgprot);
 579
 580        if (protval & _PAGE_PRESENT)
 581                protval &= __supported_pte_mask;
 582
 583        return protval;
 584}
 585
 586static inline pgprotval_t check_pgprot(pgprot_t pgprot)
 587{
 588        pgprotval_t massaged_val = massage_pgprot(pgprot);
 589
 590        /* mmdebug.h can not be included here because of dependencies */
 591#ifdef CONFIG_DEBUG_VM
 592        WARN_ONCE(pgprot_val(pgprot) != massaged_val,
 593                  "attempted to set unsupported pgprot: %016llx "
 594                  "bits: %016llx supported: %016llx\n",
 595                  (u64)pgprot_val(pgprot),
 596                  (u64)pgprot_val(pgprot) ^ massaged_val,
 597                  (u64)__supported_pte_mask);
 598#endif
 599
 600        return massaged_val;
 601}
 602
 603static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
 604{
 605        phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
 606        pfn ^= protnone_mask(pgprot_val(pgprot));
 607        pfn &= PTE_PFN_MASK;
 608        return __pte(pfn | check_pgprot(pgprot));
 609}
 610
 611static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
 612{
 613        phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
 614        pfn ^= protnone_mask(pgprot_val(pgprot));
 615        pfn &= PHYSICAL_PMD_PAGE_MASK;
 616        return __pmd(pfn | check_pgprot(pgprot));
 617}
 618
 619static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
 620{
 621        phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
 622        pfn ^= protnone_mask(pgprot_val(pgprot));
 623        pfn &= PHYSICAL_PUD_PAGE_MASK;
 624        return __pud(pfn | check_pgprot(pgprot));
 625}
 626
 627static inline pmd_t pmd_mknotpresent(pmd_t pmd)
 628{
 629        return pfn_pmd(pmd_pfn(pmd),
 630                      __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
 631}
 632
 633static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
 634
 635static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 636{
 637        pteval_t val = pte_val(pte), oldval = val;
 638
 639        /*
 640         * Chop off the NX bit (if present), and add the NX portion of
 641         * the newprot (if present):
 642         */
 643        val &= _PAGE_CHG_MASK;
 644        val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK;
 645        val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
 646        return __pte(val);
 647}
 648
 649static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 650{
 651        pmdval_t val = pmd_val(pmd), oldval = val;
 652
 653        val &= _HPAGE_CHG_MASK;
 654        val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
 655        val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK);
 656        return __pmd(val);
 657}
 658
 659/*
 660 * mprotect needs to preserve PAT and encryption bits when updating
 661 * vm_page_prot
 662 */
 663#define pgprot_modify pgprot_modify
 664static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 665{
 666        pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK;
 667        pgprotval_t addbits = pgprot_val(newprot) & ~_PAGE_CHG_MASK;
 668        return __pgprot(preservebits | addbits);
 669}
 670
 671#define pte_pgprot(x) __pgprot(pte_flags(x))
 672#define pmd_pgprot(x) __pgprot(pmd_flags(x))
 673#define pud_pgprot(x) __pgprot(pud_flags(x))
 674#define p4d_pgprot(x) __pgprot(p4d_flags(x))
 675
 676#define canon_pgprot(p) __pgprot(massage_pgprot(p))
 677
 678static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
 679{
 680        return canon_pgprot(prot);
 681}
 682
 683static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
 684                                         enum page_cache_mode pcm,
 685                                         enum page_cache_mode new_pcm)
 686{
 687        /*
 688         * PAT type is always WB for untracked ranges, so no need to check.
 689         */
 690        if (x86_platform.is_untracked_pat_range(paddr, paddr + size))
 691                return 1;
 692
 693        /*
 694         * Certain new memtypes are not allowed with certain
 695         * requested memtype:
 696         * - request is uncached, return cannot be write-back
 697         * - request is write-combine, return cannot be write-back
 698         * - request is write-through, return cannot be write-back
 699         * - request is write-through, return cannot be write-combine
 700         */
 701        if ((pcm == _PAGE_CACHE_MODE_UC_MINUS &&
 702             new_pcm == _PAGE_CACHE_MODE_WB) ||
 703            (pcm == _PAGE_CACHE_MODE_WC &&
 704             new_pcm == _PAGE_CACHE_MODE_WB) ||
 705            (pcm == _PAGE_CACHE_MODE_WT &&
 706             new_pcm == _PAGE_CACHE_MODE_WB) ||
 707            (pcm == _PAGE_CACHE_MODE_WT &&
 708             new_pcm == _PAGE_CACHE_MODE_WC)) {
 709                return 0;
 710        }
 711
 712        return 1;
 713}
 714
 715pmd_t *populate_extra_pmd(unsigned long vaddr);
 716pte_t *populate_extra_pte(unsigned long vaddr);
 717
 718#ifdef CONFIG_PAGE_TABLE_ISOLATION
 719pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd);
 720
 721/*
 722 * Take a PGD location (pgdp) and a pgd value that needs to be set there.
 723 * Populates the user and returns the resulting PGD that must be set in
 724 * the kernel copy of the page tables.
 725 */
 726static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
 727{
 728        if (!static_cpu_has(X86_FEATURE_PTI))
 729                return pgd;
 730        return __pti_set_user_pgtbl(pgdp, pgd);
 731}
 732#else   /* CONFIG_PAGE_TABLE_ISOLATION */
 733static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
 734{
 735        return pgd;
 736}
 737#endif  /* CONFIG_PAGE_TABLE_ISOLATION */
 738
 739#endif  /* __ASSEMBLY__ */
 740
 741
 742#ifdef CONFIG_X86_32
 743# include <asm/pgtable_32.h>
 744#else
 745# include <asm/pgtable_64.h>
 746#endif
 747
 748#ifndef __ASSEMBLY__
 749#include <linux/mm_types.h>
 750#include <linux/mmdebug.h>
 751#include <linux/log2.h>
 752#include <asm/fixmap.h>
 753
 754static inline int pte_none(pte_t pte)
 755{
 756        return !(pte.pte & ~(_PAGE_KNL_ERRATUM_MASK));
 757}
 758
 759#define __HAVE_ARCH_PTE_SAME
 760static inline int pte_same(pte_t a, pte_t b)
 761{
 762        return a.pte == b.pte;
 763}
 764
 765static inline int pte_present(pte_t a)
 766{
 767        return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
 768}
 769
 770#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
 771static inline int pte_devmap(pte_t a)
 772{
 773        return (pte_flags(a) & _PAGE_DEVMAP) == _PAGE_DEVMAP;
 774}
 775#endif
 776
 777#define pte_accessible pte_accessible
 778static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
 779{
 780        if (pte_flags(a) & _PAGE_PRESENT)
 781                return true;
 782
 783        if ((pte_flags(a) & _PAGE_PROTNONE) &&
 784                        mm_tlb_flush_pending(mm))
 785                return true;
 786
 787        return false;
 788}
 789
 790static inline int pmd_present(pmd_t pmd)
 791{
 792        /*
 793         * Checking for _PAGE_PSE is needed too because
 794         * split_huge_page will temporarily clear the present bit (but
 795         * the _PAGE_PSE flag will remain set at all times while the
 796         * _PAGE_PRESENT bit is clear).
 797         */
 798        return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE);
 799}
 800
 801#ifdef CONFIG_NUMA_BALANCING
 802/*
 803 * These work without NUMA balancing but the kernel does not care. See the
 804 * comment in include/asm-generic/pgtable.h
 805 */
 806static inline int pte_protnone(pte_t pte)
 807{
 808        return (pte_flags(pte) & (_PAGE_PROTNONE | _PAGE_PRESENT))
 809                == _PAGE_PROTNONE;
 810}
 811
 812static inline int pmd_protnone(pmd_t pmd)
 813{
 814        return (pmd_flags(pmd) & (_PAGE_PROTNONE | _PAGE_PRESENT))
 815                == _PAGE_PROTNONE;
 816}
 817#endif /* CONFIG_NUMA_BALANCING */
 818
 819static inline int pmd_none(pmd_t pmd)
 820{
 821        /* Only check low word on 32-bit platforms, since it might be
 822           out of sync with upper half. */
 823        unsigned long val = native_pmd_val(pmd);
 824        return (val & ~_PAGE_KNL_ERRATUM_MASK) == 0;
 825}
 826
 827static inline unsigned long pmd_page_vaddr(pmd_t pmd)
 828{
 829        return (unsigned long)__va(pmd_val(pmd) & pmd_pfn_mask(pmd));
 830}
 831
 832/*
 833 * Currently stuck as a macro due to indirect forward reference to
 834 * linux/mmzone.h's __section_mem_map_addr() definition:
 835 */
 836#define pmd_page(pmd)   pfn_to_page(pmd_pfn(pmd))
 837
 838/*
 839 * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
 840 *
 841 * this macro returns the index of the entry in the pmd page which would
 842 * control the given virtual address
 843 */
 844static inline unsigned long pmd_index(unsigned long address)
 845{
 846        return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
 847}
 848
 849/*
 850 * Conversion functions: convert a page and protection to a page entry,
 851 * and a page entry and page directory to the page they refer to.
 852 *
 853 * (Currently stuck as a macro because of indirect forward reference
 854 * to linux/mm.h:page_to_nid())
 855 */
 856#define mk_pte(page, pgprot)   pfn_pte(page_to_pfn(page), (pgprot))
 857
 858/*
 859 * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
 860 *
 861 * this function returns the index of the entry in the pte page which would
 862 * control the given virtual address
 863 *
 864 * Also define macro so we can test if pte_index is defined for arch.
 865 */
 866#define pte_index pte_index
 867static inline unsigned long pte_index(unsigned long address)
 868{
 869        return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
 870}
 871
 872static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
 873{
 874        return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
 875}
 876
 877static inline int pmd_bad(pmd_t pmd)
 878{
 879        return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
 880}
 881
 882static inline unsigned long pages_to_mb(unsigned long npg)
 883{
 884        return npg >> (20 - PAGE_SHIFT);
 885}
 886
 887#if CONFIG_PGTABLE_LEVELS > 2
 888static inline int pud_none(pud_t pud)
 889{
 890        return (native_pud_val(pud) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0;
 891}
 892
 893static inline int pud_present(pud_t pud)
 894{
 895        return pud_flags(pud) & _PAGE_PRESENT;
 896}
 897
 898static inline unsigned long pud_page_vaddr(pud_t pud)
 899{
 900        return (unsigned long)__va(pud_val(pud) & pud_pfn_mask(pud));
 901}
 902
 903/*
 904 * Currently stuck as a macro due to indirect forward reference to
 905 * linux/mmzone.h's __section_mem_map_addr() definition:
 906 */
 907#define pud_page(pud)   pfn_to_page(pud_pfn(pud))
 908
 909/* Find an entry in the second-level page table.. */
 910static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
 911{
 912        return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
 913}
 914
 915#define pud_leaf        pud_large
 916static inline int pud_large(pud_t pud)
 917{
 918        return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
 919                (_PAGE_PSE | _PAGE_PRESENT);
 920}
 921
 922static inline int pud_bad(pud_t pud)
 923{
 924        return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
 925}
 926#else
 927#define pud_leaf        pud_large
 928static inline int pud_large(pud_t pud)
 929{
 930        return 0;
 931}
 932#endif  /* CONFIG_PGTABLE_LEVELS > 2 */
 933
 934static inline unsigned long pud_index(unsigned long address)
 935{
 936        return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
 937}
 938
 939#if CONFIG_PGTABLE_LEVELS > 3
 940static inline int p4d_none(p4d_t p4d)
 941{
 942        return (native_p4d_val(p4d) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0;
 943}
 944
 945static inline int p4d_present(p4d_t p4d)
 946{
 947        return p4d_flags(p4d) & _PAGE_PRESENT;
 948}
 949
 950static inline unsigned long p4d_page_vaddr(p4d_t p4d)
 951{
 952        return (unsigned long)__va(p4d_val(p4d) & p4d_pfn_mask(p4d));
 953}
 954
 955/*
 956 * Currently stuck as a macro due to indirect forward reference to
 957 * linux/mmzone.h's __section_mem_map_addr() definition:
 958 */
 959#define p4d_page(p4d)   pfn_to_page(p4d_pfn(p4d))
 960
 961/* Find an entry in the third-level page table.. */
 962static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
 963{
 964        return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
 965}
 966
 967static inline int p4d_bad(p4d_t p4d)
 968{
 969        unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;
 970
 971        if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
 972                ignore_flags |= _PAGE_NX;
 973
 974        return (p4d_flags(p4d) & ~ignore_flags) != 0;
 975}
 976#endif  /* CONFIG_PGTABLE_LEVELS > 3 */
 977
 978static inline unsigned long p4d_index(unsigned long address)
 979{
 980        return (address >> P4D_SHIFT) & (PTRS_PER_P4D - 1);
 981}
 982
 983#if CONFIG_PGTABLE_LEVELS > 4
 984static inline int pgd_present(pgd_t pgd)
 985{
 986        if (!pgtable_l5_enabled())
 987                return 1;
 988        return pgd_flags(pgd) & _PAGE_PRESENT;
 989}
 990
 991static inline unsigned long pgd_page_vaddr(pgd_t pgd)
 992{
 993        return (unsigned long)__va((unsigned long)pgd_val(pgd) & PTE_PFN_MASK);
 994}
 995
 996/*
 997 * Currently stuck as a macro due to indirect forward reference to
 998 * linux/mmzone.h's __section_mem_map_addr() definition:
 999 */
1000#define pgd_page(pgd)   pfn_to_page(pgd_pfn(pgd))
1001
1002/* to find an entry in a page-table-directory. */
1003static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
1004{
1005        if (!pgtable_l5_enabled())
1006                return (p4d_t *)pgd;
1007        return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address);
1008}
1009
1010static inline int pgd_bad(pgd_t pgd)
1011{
1012        unsigned long ignore_flags = _PAGE_USER;
1013
1014        if (!pgtable_l5_enabled())
1015                return 0;
1016
1017        if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
1018                ignore_flags |= _PAGE_NX;
1019
1020        return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
1021}
1022
1023static inline int pgd_none(pgd_t pgd)
1024{
1025        if (!pgtable_l5_enabled())
1026                return 0;
1027        /*
1028         * There is no need to do a workaround for the KNL stray
1029         * A/D bit erratum here.  PGDs only point to page tables
1030         * except on 32-bit non-PAE which is not supported on
1031         * KNL.
1032         */
1033        return !native_pgd_val(pgd);
1034}
1035#endif  /* CONFIG_PGTABLE_LEVELS > 4 */
1036
1037#endif  /* __ASSEMBLY__ */
1038
1039/*
1040 * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
1041 *
1042 * this macro returns the index of the entry in the pgd page which would
1043 * control the given virtual address
1044 */
1045#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
1046
1047/*
1048 * pgd_offset() returns a (pgd_t *)
1049 * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
1050 */
1051#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
1052/*
1053 * a shortcut to get a pgd_t in a given mm
1054 */
1055#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
1056/*
1057 * a shortcut which implies the use of the kernel's pgd, instead
1058 * of a process's
1059 */
1060#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
1061
1062
1063#define KERNEL_PGD_BOUNDARY     pgd_index(PAGE_OFFSET)
1064#define KERNEL_PGD_PTRS         (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)
1065
1066#ifndef __ASSEMBLY__
1067
1068extern int direct_gbpages;
1069void init_mem_mapping(void);
1070void early_alloc_pgt_buf(void);
1071extern void memblock_find_dma_reserve(void);
1072
1073#ifdef CONFIG_X86_64
1074/* Realmode trampoline initialization. */
1075extern pgd_t trampoline_pgd_entry;
1076static inline void __meminit init_trampoline_default(void)
1077{
1078        /* Default trampoline pgd value */
1079        trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)];
1080}
1081
1082void __init poking_init(void);
1083
1084unsigned long init_memory_mapping(unsigned long start,
1085                                  unsigned long end, pgprot_t prot);
1086
1087# ifdef CONFIG_RANDOMIZE_MEMORY
1088void __meminit init_trampoline(void);
1089# else
1090#  define init_trampoline init_trampoline_default
1091# endif
1092#else
1093static inline void init_trampoline(void) { }
1094#endif
1095
1096/* local pte updates need not use xchg for locking */
1097static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
1098{
1099        pte_t res = *ptep;
1100
1101        /* Pure native function needs no input for mm, addr */
1102        native_pte_clear(NULL, 0, ptep);
1103        return res;
1104}
1105
1106static inline pmd_t native_local_pmdp_get_and_clear(pmd_t *pmdp)
1107{
1108        pmd_t res = *pmdp;
1109
1110        native_pmd_clear(pmdp);
1111        return res;
1112}
1113
1114static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
1115{
1116        pud_t res = *pudp;
1117
1118        native_pud_clear(pudp);
1119        return res;
1120}
1121
1122static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
1123                                     pte_t *ptep , pte_t pte)
1124{
1125        native_set_pte(ptep, pte);
1126}
1127
1128static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
1129                              pmd_t *pmdp, pmd_t pmd)
1130{
1131        set_pmd(pmdp, pmd);
1132}
1133
1134static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
1135                              pud_t *pudp, pud_t pud)
1136{
1137        native_set_pud(pudp, pud);
1138}
1139
1140/*
1141 * We only update the dirty/accessed state if we set
1142 * the dirty bit by hand in the kernel, since the hardware
1143 * will do the accessed bit for us, and we don't want to
1144 * race with other CPU's that might be updating the dirty
1145 * bit at the same time.
1146 */
1147struct vm_area_struct;
1148
1149#define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
1150extern int ptep_set_access_flags(struct vm_area_struct *vma,
1151                                 unsigned long address, pte_t *ptep,
1152                                 pte_t entry, int dirty);
1153
1154#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
1155extern int ptep_test_and_clear_young(struct vm_area_struct *vma,
1156                                     unsigned long addr, pte_t *ptep);
1157
1158#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
1159extern int ptep_clear_flush_young(struct vm_area_struct *vma,
1160                                  unsigned long address, pte_t *ptep);
1161
1162#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
1163static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
1164                                       pte_t *ptep)
1165{
1166        pte_t pte = native_ptep_get_and_clear(ptep);
1167        return pte;
1168}
1169
1170#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
1171static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
1172                                            unsigned long addr, pte_t *ptep,
1173                                            int full)
1174{
1175        pte_t pte;
1176        if (full) {
1177                /*
1178                 * Full address destruction in progress; paravirt does not
1179                 * care about updates and native needs no locking
1180                 */
1181                pte = native_local_ptep_get_and_clear(ptep);
1182        } else {
1183                pte = ptep_get_and_clear(mm, addr, ptep);
1184        }
1185        return pte;
1186}
1187
1188#define __HAVE_ARCH_PTEP_SET_WRPROTECT
1189static inline void ptep_set_wrprotect(struct mm_struct *mm,
1190                                      unsigned long addr, pte_t *ptep)
1191{
1192        clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
1193}
1194
1195#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
1196
1197#define mk_pmd(page, pgprot)   pfn_pmd(page_to_pfn(page), (pgprot))
1198
1199#define  __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
1200extern int pmdp_set_access_flags(struct vm_area_struct *vma,
1201                                 unsigned long address, pmd_t *pmdp,
1202                                 pmd_t entry, int dirty);
1203extern int pudp_set_access_flags(struct vm_area_struct *vma,
1204                                 unsigned long address, pud_t *pudp,
1205                                 pud_t entry, int dirty);
1206
1207#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
1208extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
1209                                     unsigned long addr, pmd_t *pmdp);
1210extern int pudp_test_and_clear_young(struct vm_area_struct *vma,
1211                                     unsigned long addr, pud_t *pudp);
1212
1213#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
1214extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
1215                                  unsigned long address, pmd_t *pmdp);
1216
1217
1218#define pmd_write pmd_write
1219static inline int pmd_write(pmd_t pmd)
1220{
1221        return pmd_flags(pmd) & _PAGE_RW;
1222}
1223
1224#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
1225static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr,
1226                                       pmd_t *pmdp)
1227{
1228        return native_pmdp_get_and_clear(pmdp);
1229}
1230
1231#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
1232static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
1233                                        unsigned long addr, pud_t *pudp)
1234{
1235        return native_pudp_get_and_clear(pudp);
1236}
1237
1238#define __HAVE_ARCH_PMDP_SET_WRPROTECT
1239static inline void pmdp_set_wrprotect(struct mm_struct *mm,
1240                                      unsigned long addr, pmd_t *pmdp)
1241{
1242        clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp);
1243}
1244
1245#define pud_write pud_write
1246static inline int pud_write(pud_t pud)
1247{
1248        return pud_flags(pud) & _PAGE_RW;
1249}
1250
1251#ifndef pmdp_establish
1252#define pmdp_establish pmdp_establish
1253static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
1254                unsigned long address, pmd_t *pmdp, pmd_t pmd)
1255{
1256        if (IS_ENABLED(CONFIG_SMP)) {
1257                return xchg(pmdp, pmd);
1258        } else {
1259                pmd_t old = *pmdp;
1260                WRITE_ONCE(*pmdp, pmd);
1261                return old;
1262        }
1263}
1264#endif
1265/*
1266 * Page table pages are page-aligned.  The lower half of the top
1267 * level is used for userspace and the top half for the kernel.
1268 *
1269 * Returns true for parts of the PGD that map userspace and
1270 * false for the parts that map the kernel.
1271 */
1272static inline bool pgdp_maps_userspace(void *__ptr)
1273{
1274        unsigned long ptr = (unsigned long)__ptr;
1275
1276        return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START);
1277}
1278
1279#define pgd_leaf        pgd_large
1280static inline int pgd_large(pgd_t pgd) { return 0; }
1281
1282#ifdef CONFIG_PAGE_TABLE_ISOLATION
1283/*
1284 * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
1285 * (8k-aligned and 8k in size).  The kernel one is at the beginning 4k and
1286 * the user one is in the last 4k.  To switch between them, you
1287 * just need to flip the 12th bit in their addresses.
1288 */
1289#define PTI_PGTABLE_SWITCH_BIT  PAGE_SHIFT
1290
1291/*
1292 * This generates better code than the inline assembly in
1293 * __set_bit().
1294 */
1295static inline void *ptr_set_bit(void *ptr, int bit)
1296{
1297        unsigned long __ptr = (unsigned long)ptr;
1298
1299        __ptr |= BIT(bit);
1300        return (void *)__ptr;
1301}
1302static inline void *ptr_clear_bit(void *ptr, int bit)
1303{
1304        unsigned long __ptr = (unsigned long)ptr;
1305
1306        __ptr &= ~BIT(bit);
1307        return (void *)__ptr;
1308}
1309
1310static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
1311{
1312        return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
1313}
1314
1315static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
1316{
1317        return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
1318}
1319
1320static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
1321{
1322        return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
1323}
1324
1325static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
1326{
1327        return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
1328}
1329#endif /* CONFIG_PAGE_TABLE_ISOLATION */
1330
1331/*
1332 * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
1333 *
1334 *  dst - pointer to pgd range anwhere on a pgd page
1335 *  src - ""
1336 *  count - the number of pgds to copy.
1337 *
1338 * dst and src can be on the same page, but the range must not overlap,
1339 * and must not cross a page boundary.
1340 */
1341static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
1342{
1343        memcpy(dst, src, count * sizeof(pgd_t));
1344#ifdef CONFIG_PAGE_TABLE_ISOLATION
1345        if (!static_cpu_has(X86_FEATURE_PTI))
1346                return;
1347        /* Clone the user space pgd as well */
1348        memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src),
1349               count * sizeof(pgd_t));
1350#endif
1351}
1352
1353#define PTE_SHIFT ilog2(PTRS_PER_PTE)
1354static inline int page_level_shift(enum pg_level level)
1355{
1356        return (PAGE_SHIFT - PTE_SHIFT) + level * PTE_SHIFT;
1357}
1358static inline unsigned long page_level_size(enum pg_level level)
1359{
1360        return 1UL << page_level_shift(level);
1361}
1362static inline unsigned long page_level_mask(enum pg_level level)
1363{
1364        return ~(page_level_size(level) - 1);
1365}
1366
1367/*
1368 * The x86 doesn't have any external MMU info: the kernel page
1369 * tables contain all the necessary information.
1370 */
1371static inline void update_mmu_cache(struct vm_area_struct *vma,
1372                unsigned long addr, pte_t *ptep)
1373{
1374}
1375static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
1376                unsigned long addr, pmd_t *pmd)
1377{
1378}
1379static inline void update_mmu_cache_pud(struct vm_area_struct *vma,
1380                unsigned long addr, pud_t *pud)
1381{
1382}
1383
1384#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
1385static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
1386{
1387        return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY);
1388}
1389
1390static inline int pte_swp_soft_dirty(pte_t pte)
1391{
1392        return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY;
1393}
1394
1395static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
1396{
1397        return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
1398}
1399
1400#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
1401static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
1402{
1403        return pmd_set_flags(pmd, _PAGE_SWP_SOFT_DIRTY);
1404}
1405
1406static inline int pmd_swp_soft_dirty(pmd_t pmd)
1407{
1408        return pmd_flags(pmd) & _PAGE_SWP_SOFT_DIRTY;
1409}
1410
1411static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
1412{
1413        return pmd_clear_flags(pmd, _PAGE_SWP_SOFT_DIRTY);
1414}
1415#endif
1416#endif
1417
1418#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
1419static inline pte_t pte_swp_mkuffd_wp(pte_t pte)
1420{
1421        return pte_set_flags(pte, _PAGE_SWP_UFFD_WP);
1422}
1423
1424static inline int pte_swp_uffd_wp(pte_t pte)
1425{
1426        return pte_flags(pte) & _PAGE_SWP_UFFD_WP;
1427}
1428
1429static inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
1430{
1431        return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP);
1432}
1433
1434static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd)
1435{
1436        return pmd_set_flags(pmd, _PAGE_SWP_UFFD_WP);
1437}
1438
1439static inline int pmd_swp_uffd_wp(pmd_t pmd)
1440{
1441        return pmd_flags(pmd) & _PAGE_SWP_UFFD_WP;
1442}
1443
1444static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd)
1445{
1446        return pmd_clear_flags(pmd, _PAGE_SWP_UFFD_WP);
1447}
1448#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
1449
1450#define PKRU_AD_BIT 0x1
1451#define PKRU_WD_BIT 0x2
1452#define PKRU_BITS_PER_PKEY 2
1453
1454#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
1455extern u32 init_pkru_value;
1456#else
1457#define init_pkru_value 0
1458#endif
1459
1460static inline bool __pkru_allows_read(u32 pkru, u16 pkey)
1461{
1462        int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
1463        return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits));
1464}
1465
1466static inline bool __pkru_allows_write(u32 pkru, u16 pkey)
1467{
1468        int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
1469        /*
1470         * Access-disable disables writes too so we need to check
1471         * both bits here.
1472         */
1473        return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits));
1474}
1475
1476static inline u16 pte_flags_pkey(unsigned long pte_flags)
1477{
1478#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
1479        /* ifdef to avoid doing 59-bit shift on 32-bit values */
1480        return (pte_flags & _PAGE_PKEY_MASK) >> _PAGE_BIT_PKEY_BIT0;
1481#else
1482        return 0;
1483#endif
1484}
1485
1486static inline bool __pkru_allows_pkey(u16 pkey, bool write)
1487{
1488        u32 pkru = read_pkru();
1489
1490        if (!__pkru_allows_read(pkru, pkey))
1491                return false;
1492        if (write && !__pkru_allows_write(pkru, pkey))
1493                return false;
1494
1495        return true;
1496}
1497
1498/*
1499 * 'pteval' can come from a PTE, PMD or PUD.  We only check
1500 * _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the
1501 * same value on all 3 types.
1502 */
1503static inline bool __pte_access_permitted(unsigned long pteval, bool write)
1504{
1505        unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER;
1506
1507        if (write)
1508                need_pte_bits |= _PAGE_RW;
1509
1510        if ((pteval & need_pte_bits) != need_pte_bits)
1511                return 0;
1512
1513        return __pkru_allows_pkey(pte_flags_pkey(pteval), write);
1514}
1515
1516#define pte_access_permitted pte_access_permitted
1517static inline bool pte_access_permitted(pte_t pte, bool write)
1518{
1519        return __pte_access_permitted(pte_val(pte), write);
1520}
1521
1522#define pmd_access_permitted pmd_access_permitted
1523static inline bool pmd_access_permitted(pmd_t pmd, bool write)
1524{
1525        return __pte_access_permitted(pmd_val(pmd), write);
1526}
1527
1528#define pud_access_permitted pud_access_permitted
1529static inline bool pud_access_permitted(pud_t pud, bool write)
1530{
1531        return __pte_access_permitted(pud_val(pud), write);
1532}
1533
1534#define __HAVE_ARCH_PFN_MODIFY_ALLOWED 1
1535extern bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot);
1536
1537static inline bool arch_has_pfn_modify_check(void)
1538{
1539        return boot_cpu_has_bug(X86_BUG_L1TF);
1540}
1541
1542#define arch_faults_on_old_pte arch_faults_on_old_pte
1543static inline bool arch_faults_on_old_pte(void)
1544{
1545        return false;
1546}
1547
1548#include <asm-generic/pgtable.h>
1549#endif  /* __ASSEMBLY__ */
1550
1551#endif /* _ASM_X86_PGTABLE_H */
1552