linux/arch/x86/include/asm/pgtable.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef _ASM_X86_PGTABLE_H
   3#define _ASM_X86_PGTABLE_H
   4
   5#include <linux/mem_encrypt.h>
   6#include <asm/page.h>
   7#include <asm/pgtable_types.h>
   8
   9/*
  10 * Macro to mark a page protection value as UC-
  11 */
  12#define pgprot_noncached(prot)                                          \
  13        ((boot_cpu_data.x86 > 3)                                        \
  14         ? (__pgprot(pgprot_val(prot) |                                 \
  15                     cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS)))     \
  16         : (prot))
  17
  18/*
  19 * Macros to add or remove encryption attribute
  20 */
  21#define pgprot_encrypted(prot)  __pgprot(__sme_set(pgprot_val(prot)))
  22#define pgprot_decrypted(prot)  __pgprot(__sme_clr(pgprot_val(prot)))
  23
  24#ifndef __ASSEMBLY__
  25#include <asm/x86_init.h>
  26#include <asm/fpu/xstate.h>
  27#include <asm/fpu/api.h>
  28#include <asm-generic/pgtable_uffd.h>
  29
  30extern pgd_t early_top_pgt[PTRS_PER_PGD];
  31bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
  32
  33void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
  34void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
  35void ptdump_walk_pgd_level_checkwx(void);
  36
  37#ifdef CONFIG_DEBUG_WX
  38#define debug_checkwx() ptdump_walk_pgd_level_checkwx()
  39#else
  40#define debug_checkwx() do { } while (0)
  41#endif
  42
  43/*
  44 * ZERO_PAGE is a global shared page that is always zero: used
  45 * for zero-mapped memory areas etc..
  46 */
  47extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
  48        __visible;
  49#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
  50
  51extern spinlock_t pgd_lock;
  52extern struct list_head pgd_list;
  53
  54extern struct mm_struct *pgd_page_get_mm(struct page *page);
  55
  56extern pmdval_t early_pmd_flags;
  57
  58#ifdef CONFIG_PARAVIRT
  59#include <asm/paravirt.h>
  60#else  /* !CONFIG_PARAVIRT */
  61#define set_pte(ptep, pte)              native_set_pte(ptep, pte)
  62#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
  63
  64#define set_pte_atomic(ptep, pte)                                       \
  65        native_set_pte_atomic(ptep, pte)
  66
  67#define set_pmd(pmdp, pmd)              native_set_pmd(pmdp, pmd)
  68
  69#ifndef __PAGETABLE_P4D_FOLDED
  70#define set_pgd(pgdp, pgd)              native_set_pgd(pgdp, pgd)
  71#define pgd_clear(pgd)                  (pgtable_l5_enabled() ? native_pgd_clear(pgd) : 0)
  72#endif
  73
  74#ifndef set_p4d
  75# define set_p4d(p4dp, p4d)             native_set_p4d(p4dp, p4d)
  76#endif
  77
  78#ifndef __PAGETABLE_PUD_FOLDED
  79#define p4d_clear(p4d)                  native_p4d_clear(p4d)
  80#endif
  81
  82#ifndef set_pud
  83# define set_pud(pudp, pud)             native_set_pud(pudp, pud)
  84#endif
  85
  86#ifndef __PAGETABLE_PUD_FOLDED
  87#define pud_clear(pud)                  native_pud_clear(pud)
  88#endif
  89
  90#define pte_clear(mm, addr, ptep)       native_pte_clear(mm, addr, ptep)
  91#define pmd_clear(pmd)                  native_pmd_clear(pmd)
  92
  93#define pgd_val(x)      native_pgd_val(x)
  94#define __pgd(x)        native_make_pgd(x)
  95
  96#ifndef __PAGETABLE_P4D_FOLDED
  97#define p4d_val(x)      native_p4d_val(x)
  98#define __p4d(x)        native_make_p4d(x)
  99#endif
 100
 101#ifndef __PAGETABLE_PUD_FOLDED
 102#define pud_val(x)      native_pud_val(x)
 103#define __pud(x)        native_make_pud(x)
 104#endif
 105
 106#ifndef __PAGETABLE_PMD_FOLDED
 107#define pmd_val(x)      native_pmd_val(x)
 108#define __pmd(x)        native_make_pmd(x)
 109#endif
 110
 111#define pte_val(x)      native_pte_val(x)
 112#define __pte(x)        native_make_pte(x)
 113
 114#define arch_end_context_switch(prev)   do {} while(0)
 115
 116#endif  /* CONFIG_PARAVIRT */
 117
 118/*
 119 * The following only work if pte_present() is true.
 120 * Undefined behaviour if not..
 121 */
 122static inline int pte_dirty(pte_t pte)
 123{
 124        return pte_flags(pte) & _PAGE_DIRTY;
 125}
 126
 127
 128static inline u32 read_pkru(void)
 129{
 130        if (boot_cpu_has(X86_FEATURE_OSPKE))
 131                return rdpkru();
 132        return 0;
 133}
 134
 135static inline void write_pkru(u32 pkru)
 136{
 137        struct pkru_state *pk;
 138
 139        if (!boot_cpu_has(X86_FEATURE_OSPKE))
 140                return;
 141
 142        pk = get_xsave_addr(&current->thread.fpu.state.xsave, XFEATURE_PKRU);
 143
 144        /*
 145         * The PKRU value in xstate needs to be in sync with the value that is
 146         * written to the CPU. The FPU restore on return to userland would
 147         * otherwise load the previous value again.
 148         */
 149        fpregs_lock();
 150        if (pk)
 151                pk->pkru = pkru;
 152        __write_pkru(pkru);
 153        fpregs_unlock();
 154}
 155
 156static inline int pte_young(pte_t pte)
 157{
 158        return pte_flags(pte) & _PAGE_ACCESSED;
 159}
 160
 161static inline int pmd_dirty(pmd_t pmd)
 162{
 163        return pmd_flags(pmd) & _PAGE_DIRTY;
 164}
 165
 166static inline int pmd_young(pmd_t pmd)
 167{
 168        return pmd_flags(pmd) & _PAGE_ACCESSED;
 169}
 170
 171static inline int pud_dirty(pud_t pud)
 172{
 173        return pud_flags(pud) & _PAGE_DIRTY;
 174}
 175
 176static inline int pud_young(pud_t pud)
 177{
 178        return pud_flags(pud) & _PAGE_ACCESSED;
 179}
 180
 181static inline int pte_write(pte_t pte)
 182{
 183        return pte_flags(pte) & _PAGE_RW;
 184}
 185
 186static inline int pte_huge(pte_t pte)
 187{
 188        return pte_flags(pte) & _PAGE_PSE;
 189}
 190
 191static inline int pte_global(pte_t pte)
 192{
 193        return pte_flags(pte) & _PAGE_GLOBAL;
 194}
 195
 196static inline int pte_exec(pte_t pte)
 197{
 198        return !(pte_flags(pte) & _PAGE_NX);
 199}
 200
 201static inline int pte_special(pte_t pte)
 202{
 203        return pte_flags(pte) & _PAGE_SPECIAL;
 204}
 205
 206/* Entries that were set to PROT_NONE are inverted */
 207
 208static inline u64 protnone_mask(u64 val);
 209
 210static inline unsigned long pte_pfn(pte_t pte)
 211{
 212        phys_addr_t pfn = pte_val(pte);
 213        pfn ^= protnone_mask(pfn);
 214        return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
 215}
 216
 217static inline unsigned long pmd_pfn(pmd_t pmd)
 218{
 219        phys_addr_t pfn = pmd_val(pmd);
 220        pfn ^= protnone_mask(pfn);
 221        return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
 222}
 223
 224static inline unsigned long pud_pfn(pud_t pud)
 225{
 226        phys_addr_t pfn = pud_val(pud);
 227        pfn ^= protnone_mask(pfn);
 228        return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT;
 229}
 230
 231static inline unsigned long p4d_pfn(p4d_t p4d)
 232{
 233        return (p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT;
 234}
 235
 236static inline unsigned long pgd_pfn(pgd_t pgd)
 237{
 238        return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT;
 239}
 240
 241#define p4d_leaf        p4d_large
 242static inline int p4d_large(p4d_t p4d)
 243{
 244        /* No 512 GiB pages yet */
 245        return 0;
 246}
 247
 248#define pte_page(pte)   pfn_to_page(pte_pfn(pte))
 249
 250#define pmd_leaf        pmd_large
 251static inline int pmd_large(pmd_t pte)
 252{
 253        return pmd_flags(pte) & _PAGE_PSE;
 254}
 255
 256#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 257/* NOTE: when predicate huge page, consider also pmd_devmap, or use pmd_large */
 258static inline int pmd_trans_huge(pmd_t pmd)
 259{
 260        return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
 261}
 262
 263#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 264static inline int pud_trans_huge(pud_t pud)
 265{
 266        return (pud_val(pud) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
 267}
 268#endif
 269
 270#define has_transparent_hugepage has_transparent_hugepage
 271static inline int has_transparent_hugepage(void)
 272{
 273        return boot_cpu_has(X86_FEATURE_PSE);
 274}
 275
 276#ifdef __HAVE_ARCH_PTE_DEVMAP
 277static inline int pmd_devmap(pmd_t pmd)
 278{
 279        return !!(pmd_val(pmd) & _PAGE_DEVMAP);
 280}
 281
 282#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 283static inline int pud_devmap(pud_t pud)
 284{
 285        return !!(pud_val(pud) & _PAGE_DEVMAP);
 286}
 287#else
 288static inline int pud_devmap(pud_t pud)
 289{
 290        return 0;
 291}
 292#endif
 293
 294static inline int pgd_devmap(pgd_t pgd)
 295{
 296        return 0;
 297}
 298#endif
 299#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 300
 301static inline pte_t pte_set_flags(pte_t pte, pteval_t set)
 302{
 303        pteval_t v = native_pte_val(pte);
 304
 305        return native_make_pte(v | set);
 306}
 307
 308static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
 309{
 310        pteval_t v = native_pte_val(pte);
 311
 312        return native_make_pte(v & ~clear);
 313}
 314
 315#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
 316static inline int pte_uffd_wp(pte_t pte)
 317{
 318        return pte_flags(pte) & _PAGE_UFFD_WP;
 319}
 320
 321static inline pte_t pte_mkuffd_wp(pte_t pte)
 322{
 323        return pte_set_flags(pte, _PAGE_UFFD_WP);
 324}
 325
 326static inline pte_t pte_clear_uffd_wp(pte_t pte)
 327{
 328        return pte_clear_flags(pte, _PAGE_UFFD_WP);
 329}
 330#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
 331
 332static inline pte_t pte_mkclean(pte_t pte)
 333{
 334        return pte_clear_flags(pte, _PAGE_DIRTY);
 335}
 336
 337static inline pte_t pte_mkold(pte_t pte)
 338{
 339        return pte_clear_flags(pte, _PAGE_ACCESSED);
 340}
 341
 342static inline pte_t pte_wrprotect(pte_t pte)
 343{
 344        return pte_clear_flags(pte, _PAGE_RW);
 345}
 346
 347static inline pte_t pte_mkexec(pte_t pte)
 348{
 349        return pte_clear_flags(pte, _PAGE_NX);
 350}
 351
 352static inline pte_t pte_mkdirty(pte_t pte)
 353{
 354        return pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
 355}
 356
 357static inline pte_t pte_mkyoung(pte_t pte)
 358{
 359        return pte_set_flags(pte, _PAGE_ACCESSED);
 360}
 361
 362static inline pte_t pte_mkwrite(pte_t pte)
 363{
 364        return pte_set_flags(pte, _PAGE_RW);
 365}
 366
 367static inline pte_t pte_mkhuge(pte_t pte)
 368{
 369        return pte_set_flags(pte, _PAGE_PSE);
 370}
 371
 372static inline pte_t pte_clrhuge(pte_t pte)
 373{
 374        return pte_clear_flags(pte, _PAGE_PSE);
 375}
 376
 377static inline pte_t pte_mkglobal(pte_t pte)
 378{
 379        return pte_set_flags(pte, _PAGE_GLOBAL);
 380}
 381
 382static inline pte_t pte_clrglobal(pte_t pte)
 383{
 384        return pte_clear_flags(pte, _PAGE_GLOBAL);
 385}
 386
 387static inline pte_t pte_mkspecial(pte_t pte)
 388{
 389        return pte_set_flags(pte, _PAGE_SPECIAL);
 390}
 391
 392static inline pte_t pte_mkdevmap(pte_t pte)
 393{
 394        return pte_set_flags(pte, _PAGE_SPECIAL|_PAGE_DEVMAP);
 395}
 396
 397static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
 398{
 399        pmdval_t v = native_pmd_val(pmd);
 400
 401        return native_make_pmd(v | set);
 402}
 403
 404static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
 405{
 406        pmdval_t v = native_pmd_val(pmd);
 407
 408        return native_make_pmd(v & ~clear);
 409}
 410
 411#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
 412static inline int pmd_uffd_wp(pmd_t pmd)
 413{
 414        return pmd_flags(pmd) & _PAGE_UFFD_WP;
 415}
 416
 417static inline pmd_t pmd_mkuffd_wp(pmd_t pmd)
 418{
 419        return pmd_set_flags(pmd, _PAGE_UFFD_WP);
 420}
 421
 422static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd)
 423{
 424        return pmd_clear_flags(pmd, _PAGE_UFFD_WP);
 425}
 426#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
 427
 428static inline pmd_t pmd_mkold(pmd_t pmd)
 429{
 430        return pmd_clear_flags(pmd, _PAGE_ACCESSED);
 431}
 432
 433static inline pmd_t pmd_mkclean(pmd_t pmd)
 434{
 435        return pmd_clear_flags(pmd, _PAGE_DIRTY);
 436}
 437
 438static inline pmd_t pmd_wrprotect(pmd_t pmd)
 439{
 440        return pmd_clear_flags(pmd, _PAGE_RW);
 441}
 442
 443static inline pmd_t pmd_mkdirty(pmd_t pmd)
 444{
 445        return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
 446}
 447
 448static inline pmd_t pmd_mkdevmap(pmd_t pmd)
 449{
 450        return pmd_set_flags(pmd, _PAGE_DEVMAP);
 451}
 452
 453static inline pmd_t pmd_mkhuge(pmd_t pmd)
 454{
 455        return pmd_set_flags(pmd, _PAGE_PSE);
 456}
 457
 458static inline pmd_t pmd_mkyoung(pmd_t pmd)
 459{
 460        return pmd_set_flags(pmd, _PAGE_ACCESSED);
 461}
 462
 463static inline pmd_t pmd_mkwrite(pmd_t pmd)
 464{
 465        return pmd_set_flags(pmd, _PAGE_RW);
 466}
 467
 468static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
 469{
 470        pudval_t v = native_pud_val(pud);
 471
 472        return native_make_pud(v | set);
 473}
 474
 475static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
 476{
 477        pudval_t v = native_pud_val(pud);
 478
 479        return native_make_pud(v & ~clear);
 480}
 481
 482static inline pud_t pud_mkold(pud_t pud)
 483{
 484        return pud_clear_flags(pud, _PAGE_ACCESSED);
 485}
 486
 487static inline pud_t pud_mkclean(pud_t pud)
 488{
 489        return pud_clear_flags(pud, _PAGE_DIRTY);
 490}
 491
 492static inline pud_t pud_wrprotect(pud_t pud)
 493{
 494        return pud_clear_flags(pud, _PAGE_RW);
 495}
 496
 497static inline pud_t pud_mkdirty(pud_t pud)
 498{
 499        return pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
 500}
 501
 502static inline pud_t pud_mkdevmap(pud_t pud)
 503{
 504        return pud_set_flags(pud, _PAGE_DEVMAP);
 505}
 506
 507static inline pud_t pud_mkhuge(pud_t pud)
 508{
 509        return pud_set_flags(pud, _PAGE_PSE);
 510}
 511
 512static inline pud_t pud_mkyoung(pud_t pud)
 513{
 514        return pud_set_flags(pud, _PAGE_ACCESSED);
 515}
 516
 517static inline pud_t pud_mkwrite(pud_t pud)
 518{
 519        return pud_set_flags(pud, _PAGE_RW);
 520}
 521
 522#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
 523static inline int pte_soft_dirty(pte_t pte)
 524{
 525        return pte_flags(pte) & _PAGE_SOFT_DIRTY;
 526}
 527
 528static inline int pmd_soft_dirty(pmd_t pmd)
 529{
 530        return pmd_flags(pmd) & _PAGE_SOFT_DIRTY;
 531}
 532
 533static inline int pud_soft_dirty(pud_t pud)
 534{
 535        return pud_flags(pud) & _PAGE_SOFT_DIRTY;
 536}
 537
 538static inline pte_t pte_mksoft_dirty(pte_t pte)
 539{
 540        return pte_set_flags(pte, _PAGE_SOFT_DIRTY);
 541}
 542
 543static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
 544{
 545        return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
 546}
 547
 548static inline pud_t pud_mksoft_dirty(pud_t pud)
 549{
 550        return pud_set_flags(pud, _PAGE_SOFT_DIRTY);
 551}
 552
 553static inline pte_t pte_clear_soft_dirty(pte_t pte)
 554{
 555        return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
 556}
 557
 558static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
 559{
 560        return pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY);
 561}
 562
 563static inline pud_t pud_clear_soft_dirty(pud_t pud)
 564{
 565        return pud_clear_flags(pud, _PAGE_SOFT_DIRTY);
 566}
 567
 568#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
 569
 570/*
 571 * Mask out unsupported bits in a present pgprot.  Non-present pgprots
 572 * can use those bits for other purposes, so leave them be.
 573 */
 574static inline pgprotval_t massage_pgprot(pgprot_t pgprot)
 575{
 576        pgprotval_t protval = pgprot_val(pgprot);
 577
 578        if (protval & _PAGE_PRESENT)
 579                protval &= __supported_pte_mask;
 580
 581        return protval;
 582}
 583
 584static inline pgprotval_t check_pgprot(pgprot_t pgprot)
 585{
 586        pgprotval_t massaged_val = massage_pgprot(pgprot);
 587
 588        /* mmdebug.h can not be included here because of dependencies */
 589#ifdef CONFIG_DEBUG_VM
 590        WARN_ONCE(pgprot_val(pgprot) != massaged_val,
 591                  "attempted to set unsupported pgprot: %016llx "
 592                  "bits: %016llx supported: %016llx\n",
 593                  (u64)pgprot_val(pgprot),
 594                  (u64)pgprot_val(pgprot) ^ massaged_val,
 595                  (u64)__supported_pte_mask);
 596#endif
 597
 598        return massaged_val;
 599}
 600
 601static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
 602{
 603        phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
 604        pfn ^= protnone_mask(pgprot_val(pgprot));
 605        pfn &= PTE_PFN_MASK;
 606        return __pte(pfn | check_pgprot(pgprot));
 607}
 608
 609static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
 610{
 611        phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
 612        pfn ^= protnone_mask(pgprot_val(pgprot));
 613        pfn &= PHYSICAL_PMD_PAGE_MASK;
 614        return __pmd(pfn | check_pgprot(pgprot));
 615}
 616
 617static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
 618{
 619        phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
 620        pfn ^= protnone_mask(pgprot_val(pgprot));
 621        pfn &= PHYSICAL_PUD_PAGE_MASK;
 622        return __pud(pfn | check_pgprot(pgprot));
 623}
 624
 625static inline pmd_t pmd_mknotpresent(pmd_t pmd)
 626{
 627        return pfn_pmd(pmd_pfn(pmd),
 628                      __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
 629}
 630
 631static inline pud_t pud_mknotpresent(pud_t pud)
 632{
 633        return pfn_pud(pud_pfn(pud),
 634              __pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
 635}
 636
 637static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
 638
 639static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 640{
 641        pteval_t val = pte_val(pte), oldval = val;
 642
 643        /*
 644         * Chop off the NX bit (if present), and add the NX portion of
 645         * the newprot (if present):
 646         */
 647        val &= _PAGE_CHG_MASK;
 648        val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK;
 649        val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
 650        return __pte(val);
 651}
 652
 653static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 654{
 655        pmdval_t val = pmd_val(pmd), oldval = val;
 656
 657        val &= _HPAGE_CHG_MASK;
 658        val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
 659        val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK);
 660        return __pmd(val);
 661}
 662
 663/*
 664 * mprotect needs to preserve PAT and encryption bits when updating
 665 * vm_page_prot
 666 */
 667#define pgprot_modify pgprot_modify
 668static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 669{
 670        pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK;
 671        pgprotval_t addbits = pgprot_val(newprot) & ~_PAGE_CHG_MASK;
 672        return __pgprot(preservebits | addbits);
 673}
 674
 675#define pte_pgprot(x) __pgprot(pte_flags(x))
 676#define pmd_pgprot(x) __pgprot(pmd_flags(x))
 677#define pud_pgprot(x) __pgprot(pud_flags(x))
 678#define p4d_pgprot(x) __pgprot(p4d_flags(x))
 679
 680#define canon_pgprot(p) __pgprot(massage_pgprot(p))
 681
 682static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
 683{
 684        return canon_pgprot(prot);
 685}
 686
 687static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
 688                                         enum page_cache_mode pcm,
 689                                         enum page_cache_mode new_pcm)
 690{
 691        /*
 692         * PAT type is always WB for untracked ranges, so no need to check.
 693         */
 694        if (x86_platform.is_untracked_pat_range(paddr, paddr + size))
 695                return 1;
 696
 697        /*
 698         * Certain new memtypes are not allowed with certain
 699         * requested memtype:
 700         * - request is uncached, return cannot be write-back
 701         * - request is write-combine, return cannot be write-back
 702         * - request is write-through, return cannot be write-back
 703         * - request is write-through, return cannot be write-combine
 704         */
 705        if ((pcm == _PAGE_CACHE_MODE_UC_MINUS &&
 706             new_pcm == _PAGE_CACHE_MODE_WB) ||
 707            (pcm == _PAGE_CACHE_MODE_WC &&
 708             new_pcm == _PAGE_CACHE_MODE_WB) ||
 709            (pcm == _PAGE_CACHE_MODE_WT &&
 710             new_pcm == _PAGE_CACHE_MODE_WB) ||
 711            (pcm == _PAGE_CACHE_MODE_WT &&
 712             new_pcm == _PAGE_CACHE_MODE_WC)) {
 713                return 0;
 714        }
 715
 716        return 1;
 717}
 718
 719pmd_t *populate_extra_pmd(unsigned long vaddr);
 720pte_t *populate_extra_pte(unsigned long vaddr);
 721
 722#ifdef CONFIG_PAGE_TABLE_ISOLATION
 723pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd);
 724
 725/*
 726 * Take a PGD location (pgdp) and a pgd value that needs to be set there.
 727 * Populates the user and returns the resulting PGD that must be set in
 728 * the kernel copy of the page tables.
 729 */
 730static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
 731{
 732        if (!static_cpu_has(X86_FEATURE_PTI))
 733                return pgd;
 734        return __pti_set_user_pgtbl(pgdp, pgd);
 735}
 736#else   /* CONFIG_PAGE_TABLE_ISOLATION */
 737static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
 738{
 739        return pgd;
 740}
 741#endif  /* CONFIG_PAGE_TABLE_ISOLATION */
 742
 743#endif  /* __ASSEMBLY__ */
 744
 745
 746#ifdef CONFIG_X86_32
 747# include <asm/pgtable_32.h>
 748#else
 749# include <asm/pgtable_64.h>
 750#endif
 751
 752#ifndef __ASSEMBLY__
 753#include <linux/mm_types.h>
 754#include <linux/mmdebug.h>
 755#include <linux/log2.h>
 756#include <asm/fixmap.h>
 757
 758static inline int pte_none(pte_t pte)
 759{
 760        return !(pte.pte & ~(_PAGE_KNL_ERRATUM_MASK));
 761}
 762
 763#define __HAVE_ARCH_PTE_SAME
 764static inline int pte_same(pte_t a, pte_t b)
 765{
 766        return a.pte == b.pte;
 767}
 768
 769static inline int pte_present(pte_t a)
 770{
 771        return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
 772}
 773
 774#ifdef __HAVE_ARCH_PTE_DEVMAP
 775static inline int pte_devmap(pte_t a)
 776{
 777        return (pte_flags(a) & _PAGE_DEVMAP) == _PAGE_DEVMAP;
 778}
 779#endif
 780
 781#define pte_accessible pte_accessible
 782static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
 783{
 784        if (pte_flags(a) & _PAGE_PRESENT)
 785                return true;
 786
 787        if ((pte_flags(a) & _PAGE_PROTNONE) &&
 788                        mm_tlb_flush_pending(mm))
 789                return true;
 790
 791        return false;
 792}
 793
 794static inline int pmd_present(pmd_t pmd)
 795{
 796        /*
 797         * Checking for _PAGE_PSE is needed too because
 798         * split_huge_page will temporarily clear the present bit (but
 799         * the _PAGE_PSE flag will remain set at all times while the
 800         * _PAGE_PRESENT bit is clear).
 801         */
 802        return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE);
 803}
 804
 805#ifdef CONFIG_NUMA_BALANCING
 806/*
 807 * These work without NUMA balancing but the kernel does not care. See the
 808 * comment in include/asm-generic/pgtable.h
 809 */
 810static inline int pte_protnone(pte_t pte)
 811{
 812        return (pte_flags(pte) & (_PAGE_PROTNONE | _PAGE_PRESENT))
 813                == _PAGE_PROTNONE;
 814}
 815
 816static inline int pmd_protnone(pmd_t pmd)
 817{
 818        return (pmd_flags(pmd) & (_PAGE_PROTNONE | _PAGE_PRESENT))
 819                == _PAGE_PROTNONE;
 820}
 821#endif /* CONFIG_NUMA_BALANCING */
 822
 823static inline int pmd_none(pmd_t pmd)
 824{
 825        /* Only check low word on 32-bit platforms, since it might be
 826           out of sync with upper half. */
 827        unsigned long val = native_pmd_val(pmd);
 828        return (val & ~_PAGE_KNL_ERRATUM_MASK) == 0;
 829}
 830
 831static inline unsigned long pmd_page_vaddr(pmd_t pmd)
 832{
 833        return (unsigned long)__va(pmd_val(pmd) & pmd_pfn_mask(pmd));
 834}
 835
 836/*
 837 * Currently stuck as a macro due to indirect forward reference to
 838 * linux/mmzone.h's __section_mem_map_addr() definition:
 839 */
 840#define pmd_page(pmd)   pfn_to_page(pmd_pfn(pmd))
 841
 842/*
 843 * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
 844 *
 845 * this macro returns the index of the entry in the pmd page which would
 846 * control the given virtual address
 847 */
 848static inline unsigned long pmd_index(unsigned long address)
 849{
 850        return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
 851}
 852
 853/*
 854 * Conversion functions: convert a page and protection to a page entry,
 855 * and a page entry and page directory to the page they refer to.
 856 *
 857 * (Currently stuck as a macro because of indirect forward reference
 858 * to linux/mm.h:page_to_nid())
 859 */
 860#define mk_pte(page, pgprot)   pfn_pte(page_to_pfn(page), (pgprot))
 861
 862/*
 863 * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
 864 *
 865 * this function returns the index of the entry in the pte page which would
 866 * control the given virtual address
 867 */
 868static inline unsigned long pte_index(unsigned long address)
 869{
 870        return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
 871}
 872
 873static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
 874{
 875        return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
 876}
 877
 878static inline int pmd_bad(pmd_t pmd)
 879{
 880        return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
 881}
 882
 883static inline unsigned long pages_to_mb(unsigned long npg)
 884{
 885        return npg >> (20 - PAGE_SHIFT);
 886}
 887
 888#if CONFIG_PGTABLE_LEVELS > 2
 889static inline int pud_none(pud_t pud)
 890{
 891        return (native_pud_val(pud) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0;
 892}
 893
 894static inline int pud_present(pud_t pud)
 895{
 896        return pud_flags(pud) & _PAGE_PRESENT;
 897}
 898
 899static inline unsigned long pud_page_vaddr(pud_t pud)
 900{
 901        return (unsigned long)__va(pud_val(pud) & pud_pfn_mask(pud));
 902}
 903
 904/*
 905 * Currently stuck as a macro due to indirect forward reference to
 906 * linux/mmzone.h's __section_mem_map_addr() definition:
 907 */
 908#define pud_page(pud)   pfn_to_page(pud_pfn(pud))
 909
 910/* Find an entry in the second-level page table.. */
 911static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
 912{
 913        return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
 914}
 915
 916#define pud_leaf        pud_large
 917static inline int pud_large(pud_t pud)
 918{
 919        return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
 920                (_PAGE_PSE | _PAGE_PRESENT);
 921}
 922
 923static inline int pud_bad(pud_t pud)
 924{
 925        return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
 926}
 927#else
 928#define pud_leaf        pud_large
 929static inline int pud_large(pud_t pud)
 930{
 931        return 0;
 932}
 933#endif  /* CONFIG_PGTABLE_LEVELS > 2 */
 934
 935static inline unsigned long pud_index(unsigned long address)
 936{
 937        return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
 938}
 939
 940#if CONFIG_PGTABLE_LEVELS > 3
 941static inline int p4d_none(p4d_t p4d)
 942{
 943        return (native_p4d_val(p4d) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0;
 944}
 945
 946static inline int p4d_present(p4d_t p4d)
 947{
 948        return p4d_flags(p4d) & _PAGE_PRESENT;
 949}
 950
 951static inline unsigned long p4d_page_vaddr(p4d_t p4d)
 952{
 953        return (unsigned long)__va(p4d_val(p4d) & p4d_pfn_mask(p4d));
 954}
 955
 956/*
 957 * Currently stuck as a macro due to indirect forward reference to
 958 * linux/mmzone.h's __section_mem_map_addr() definition:
 959 */
 960#define p4d_page(p4d)   pfn_to_page(p4d_pfn(p4d))
 961
 962/* Find an entry in the third-level page table.. */
 963static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
 964{
 965        return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
 966}
 967
 968static inline int p4d_bad(p4d_t p4d)
 969{
 970        unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;
 971
 972        if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
 973                ignore_flags |= _PAGE_NX;
 974
 975        return (p4d_flags(p4d) & ~ignore_flags) != 0;
 976}
 977#endif  /* CONFIG_PGTABLE_LEVELS > 3 */
 978
 979static inline unsigned long p4d_index(unsigned long address)
 980{
 981        return (address >> P4D_SHIFT) & (PTRS_PER_P4D - 1);
 982}
 983
 984#if CONFIG_PGTABLE_LEVELS > 4
 985static inline int pgd_present(pgd_t pgd)
 986{
 987        if (!pgtable_l5_enabled())
 988                return 1;
 989        return pgd_flags(pgd) & _PAGE_PRESENT;
 990}
 991
 992static inline unsigned long pgd_page_vaddr(pgd_t pgd)
 993{
 994        return (unsigned long)__va((unsigned long)pgd_val(pgd) & PTE_PFN_MASK);
 995}
 996
 997/*
 998 * Currently stuck as a macro due to indirect forward reference to
 999 * linux/mmzone.h's __section_mem_map_addr() definition:
1000 */
1001#define pgd_page(pgd)   pfn_to_page(pgd_pfn(pgd))
1002
1003/* to find an entry in a page-table-directory. */
1004static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
1005{
1006        if (!pgtable_l5_enabled())
1007                return (p4d_t *)pgd;
1008        return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address);
1009}
1010
1011static inline int pgd_bad(pgd_t pgd)
1012{
1013        unsigned long ignore_flags = _PAGE_USER;
1014
1015        if (!pgtable_l5_enabled())
1016                return 0;
1017
1018        if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
1019                ignore_flags |= _PAGE_NX;
1020
1021        return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
1022}
1023
1024static inline int pgd_none(pgd_t pgd)
1025{
1026        if (!pgtable_l5_enabled())
1027                return 0;
1028        /*
1029         * There is no need to do a workaround for the KNL stray
1030         * A/D bit erratum here.  PGDs only point to page tables
1031         * except on 32-bit non-PAE which is not supported on
1032         * KNL.
1033         */
1034        return !native_pgd_val(pgd);
1035}
1036#endif  /* CONFIG_PGTABLE_LEVELS > 4 */
1037
1038#endif  /* __ASSEMBLY__ */
1039
1040/*
1041 * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
1042 *
1043 * this macro returns the index of the entry in the pgd page which would
1044 * control the given virtual address
1045 */
1046#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
1047
1048/*
1049 * pgd_offset() returns a (pgd_t *)
1050 * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
1051 */
1052#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
1053/*
1054 * a shortcut to get a pgd_t in a given mm
1055 */
1056#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
1057/*
1058 * a shortcut which implies the use of the kernel's pgd, instead
1059 * of a process's
1060 */
1061#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
1062
1063
1064#define KERNEL_PGD_BOUNDARY     pgd_index(PAGE_OFFSET)
1065#define KERNEL_PGD_PTRS         (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)
1066
1067#ifndef __ASSEMBLY__
1068
1069extern int direct_gbpages;
1070void init_mem_mapping(void);
1071void early_alloc_pgt_buf(void);
1072extern void memblock_find_dma_reserve(void);
1073
1074#ifdef CONFIG_X86_64
1075/* Realmode trampoline initialization. */
1076extern pgd_t trampoline_pgd_entry;
1077static inline void __meminit init_trampoline_default(void)
1078{
1079        /* Default trampoline pgd value */
1080        trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)];
1081}
1082
1083void __init poking_init(void);
1084
1085unsigned long init_memory_mapping(unsigned long start,
1086                                  unsigned long end, pgprot_t prot);
1087
1088# ifdef CONFIG_RANDOMIZE_MEMORY
1089void __meminit init_trampoline(void);
1090# else
1091#  define init_trampoline init_trampoline_default
1092# endif
1093#else
1094static inline void init_trampoline(void) { }
1095#endif
1096
1097/* local pte updates need not use xchg for locking */
1098static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
1099{
1100        pte_t res = *ptep;
1101
1102        /* Pure native function needs no input for mm, addr */
1103        native_pte_clear(NULL, 0, ptep);
1104        return res;
1105}
1106
1107static inline pmd_t native_local_pmdp_get_and_clear(pmd_t *pmdp)
1108{
1109        pmd_t res = *pmdp;
1110
1111        native_pmd_clear(pmdp);
1112        return res;
1113}
1114
1115static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
1116{
1117        pud_t res = *pudp;
1118
1119        native_pud_clear(pudp);
1120        return res;
1121}
1122
1123static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
1124                                     pte_t *ptep , pte_t pte)
1125{
1126        native_set_pte(ptep, pte);
1127}
1128
1129static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
1130                              pmd_t *pmdp, pmd_t pmd)
1131{
1132        set_pmd(pmdp, pmd);
1133}
1134
1135static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
1136                              pud_t *pudp, pud_t pud)
1137{
1138        native_set_pud(pudp, pud);
1139}
1140
1141/*
1142 * We only update the dirty/accessed state if we set
1143 * the dirty bit by hand in the kernel, since the hardware
1144 * will do the accessed bit for us, and we don't want to
1145 * race with other CPU's that might be updating the dirty
1146 * bit at the same time.
1147 */
1148struct vm_area_struct;
1149
1150#define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
1151extern int ptep_set_access_flags(struct vm_area_struct *vma,
1152                                 unsigned long address, pte_t *ptep,
1153                                 pte_t entry, int dirty);
1154
1155#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
1156extern int ptep_test_and_clear_young(struct vm_area_struct *vma,
1157                                     unsigned long addr, pte_t *ptep);
1158
1159#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
1160extern int ptep_clear_flush_young(struct vm_area_struct *vma,
1161                                  unsigned long address, pte_t *ptep);
1162
1163#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
1164static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
1165                                       pte_t *ptep)
1166{
1167        pte_t pte = native_ptep_get_and_clear(ptep);
1168        return pte;
1169}
1170
1171#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
1172static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
1173                                            unsigned long addr, pte_t *ptep,
1174                                            int full)
1175{
1176        pte_t pte;
1177        if (full) {
1178                /*
1179                 * Full address destruction in progress; paravirt does not
1180                 * care about updates and native needs no locking
1181                 */
1182                pte = native_local_ptep_get_and_clear(ptep);
1183        } else {
1184                pte = ptep_get_and_clear(mm, addr, ptep);
1185        }
1186        return pte;
1187}
1188
1189#define __HAVE_ARCH_PTEP_SET_WRPROTECT
1190static inline void ptep_set_wrprotect(struct mm_struct *mm,
1191                                      unsigned long addr, pte_t *ptep)
1192{
1193        clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
1194}
1195
1196#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
1197
1198#define mk_pmd(page, pgprot)   pfn_pmd(page_to_pfn(page), (pgprot))
1199
1200#define  __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
1201extern int pmdp_set_access_flags(struct vm_area_struct *vma,
1202                                 unsigned long address, pmd_t *pmdp,
1203                                 pmd_t entry, int dirty);
1204extern int pudp_set_access_flags(struct vm_area_struct *vma,
1205                                 unsigned long address, pud_t *pudp,
1206                                 pud_t entry, int dirty);
1207
1208#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
1209extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
1210                                     unsigned long addr, pmd_t *pmdp);
1211extern int pudp_test_and_clear_young(struct vm_area_struct *vma,
1212                                     unsigned long addr, pud_t *pudp);
1213
1214#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
1215extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
1216                                  unsigned long address, pmd_t *pmdp);
1217
1218
1219#define pmd_write pmd_write
1220static inline int pmd_write(pmd_t pmd)
1221{
1222        return pmd_flags(pmd) & _PAGE_RW;
1223}
1224
1225#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
1226static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr,
1227                                       pmd_t *pmdp)
1228{
1229        return native_pmdp_get_and_clear(pmdp);
1230}
1231
1232#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
1233static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
1234                                        unsigned long addr, pud_t *pudp)
1235{
1236        return native_pudp_get_and_clear(pudp);
1237}
1238
1239#define __HAVE_ARCH_PMDP_SET_WRPROTECT
1240static inline void pmdp_set_wrprotect(struct mm_struct *mm,
1241                                      unsigned long addr, pmd_t *pmdp)
1242{
1243        clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp);
1244}
1245
1246#define pud_write pud_write
1247static inline int pud_write(pud_t pud)
1248{
1249        return pud_flags(pud) & _PAGE_RW;
1250}
1251
1252#ifndef pmdp_establish
1253#define pmdp_establish pmdp_establish
1254static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
1255                unsigned long address, pmd_t *pmdp, pmd_t pmd)
1256{
1257        if (IS_ENABLED(CONFIG_SMP)) {
1258                return xchg(pmdp, pmd);
1259        } else {
1260                pmd_t old = *pmdp;
1261                *pmdp = pmd;
1262                return old;
1263        }
1264}
1265#endif
1266/*
1267 * Page table pages are page-aligned.  The lower half of the top
1268 * level is used for userspace and the top half for the kernel.
1269 *
1270 * Returns true for parts of the PGD that map userspace and
1271 * false for the parts that map the kernel.
1272 */
1273static inline bool pgdp_maps_userspace(void *__ptr)
1274{
1275        unsigned long ptr = (unsigned long)__ptr;
1276
1277        return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START);
1278}
1279
1280#define pgd_leaf        pgd_large
1281static inline int pgd_large(pgd_t pgd) { return 0; }
1282
1283#ifdef CONFIG_PAGE_TABLE_ISOLATION
1284/*
1285 * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
1286 * (8k-aligned and 8k in size).  The kernel one is at the beginning 4k and
1287 * the user one is in the last 4k.  To switch between them, you
1288 * just need to flip the 12th bit in their addresses.
1289 */
1290#define PTI_PGTABLE_SWITCH_BIT  PAGE_SHIFT
1291
1292/*
1293 * This generates better code than the inline assembly in
1294 * __set_bit().
1295 */
1296static inline void *ptr_set_bit(void *ptr, int bit)
1297{
1298        unsigned long __ptr = (unsigned long)ptr;
1299
1300        __ptr |= BIT(bit);
1301        return (void *)__ptr;
1302}
1303static inline void *ptr_clear_bit(void *ptr, int bit)
1304{
1305        unsigned long __ptr = (unsigned long)ptr;
1306
1307        __ptr &= ~BIT(bit);
1308        return (void *)__ptr;
1309}
1310
1311static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
1312{
1313        return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
1314}
1315
1316static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
1317{
1318        return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
1319}
1320
1321static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
1322{
1323        return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
1324}
1325
1326static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
1327{
1328        return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
1329}
1330#endif /* CONFIG_PAGE_TABLE_ISOLATION */
1331
1332/*
1333 * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
1334 *
1335 *  dst - pointer to pgd range anwhere on a pgd page
1336 *  src - ""
1337 *  count - the number of pgds to copy.
1338 *
1339 * dst and src can be on the same page, but the range must not overlap,
1340 * and must not cross a page boundary.
1341 */
1342static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
1343{
1344        memcpy(dst, src, count * sizeof(pgd_t));
1345#ifdef CONFIG_PAGE_TABLE_ISOLATION
1346        if (!static_cpu_has(X86_FEATURE_PTI))
1347                return;
1348        /* Clone the user space pgd as well */
1349        memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src),
1350               count * sizeof(pgd_t));
1351#endif
1352}
1353
1354#define PTE_SHIFT ilog2(PTRS_PER_PTE)
1355static inline int page_level_shift(enum pg_level level)
1356{
1357        return (PAGE_SHIFT - PTE_SHIFT) + level * PTE_SHIFT;
1358}
1359static inline unsigned long page_level_size(enum pg_level level)
1360{
1361        return 1UL << page_level_shift(level);
1362}
1363static inline unsigned long page_level_mask(enum pg_level level)
1364{
1365        return ~(page_level_size(level) - 1);
1366}
1367
1368/*
1369 * The x86 doesn't have any external MMU info: the kernel page
1370 * tables contain all the necessary information.
1371 */
1372static inline void update_mmu_cache(struct vm_area_struct *vma,
1373                unsigned long addr, pte_t *ptep)
1374{
1375}
1376static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
1377                unsigned long addr, pmd_t *pmd)
1378{
1379}
1380static inline void update_mmu_cache_pud(struct vm_area_struct *vma,
1381                unsigned long addr, pud_t *pud)
1382{
1383}
1384
1385#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
1386static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
1387{
1388        return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY);
1389}
1390
1391static inline int pte_swp_soft_dirty(pte_t pte)
1392{
1393        return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY;
1394}
1395
1396static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
1397{
1398        return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
1399}
1400
1401#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
1402static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
1403{
1404        return pmd_set_flags(pmd, _PAGE_SWP_SOFT_DIRTY);
1405}
1406
1407static inline int pmd_swp_soft_dirty(pmd_t pmd)
1408{
1409        return pmd_flags(pmd) & _PAGE_SWP_SOFT_DIRTY;
1410}
1411
1412static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
1413{
1414        return pmd_clear_flags(pmd, _PAGE_SWP_SOFT_DIRTY);
1415}
1416#endif
1417#endif
1418
1419#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
1420static inline pte_t pte_swp_mkuffd_wp(pte_t pte)
1421{
1422        return pte_set_flags(pte, _PAGE_SWP_UFFD_WP);
1423}
1424
1425static inline int pte_swp_uffd_wp(pte_t pte)
1426{
1427        return pte_flags(pte) & _PAGE_SWP_UFFD_WP;
1428}
1429
1430static inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
1431{
1432        return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP);
1433}
1434
1435static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd)
1436{
1437        return pmd_set_flags(pmd, _PAGE_SWP_UFFD_WP);
1438}
1439
1440static inline int pmd_swp_uffd_wp(pmd_t pmd)
1441{
1442        return pmd_flags(pmd) & _PAGE_SWP_UFFD_WP;
1443}
1444
1445static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd)
1446{
1447        return pmd_clear_flags(pmd, _PAGE_SWP_UFFD_WP);
1448}
1449#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
1450
1451#define PKRU_AD_BIT 0x1
1452#define PKRU_WD_BIT 0x2
1453#define PKRU_BITS_PER_PKEY 2
1454
1455#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
1456extern u32 init_pkru_value;
1457#else
1458#define init_pkru_value 0
1459#endif
1460
1461static inline bool __pkru_allows_read(u32 pkru, u16 pkey)
1462{
1463        int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
1464        return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits));
1465}
1466
1467static inline bool __pkru_allows_write(u32 pkru, u16 pkey)
1468{
1469        int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
1470        /*
1471         * Access-disable disables writes too so we need to check
1472         * both bits here.
1473         */
1474        return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits));
1475}
1476
1477static inline u16 pte_flags_pkey(unsigned long pte_flags)
1478{
1479#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
1480        /* ifdef to avoid doing 59-bit shift on 32-bit values */
1481        return (pte_flags & _PAGE_PKEY_MASK) >> _PAGE_BIT_PKEY_BIT0;
1482#else
1483        return 0;
1484#endif
1485}
1486
1487static inline bool __pkru_allows_pkey(u16 pkey, bool write)
1488{
1489        u32 pkru = read_pkru();
1490
1491        if (!__pkru_allows_read(pkru, pkey))
1492                return false;
1493        if (write && !__pkru_allows_write(pkru, pkey))
1494                return false;
1495
1496        return true;
1497}
1498
1499/*
1500 * 'pteval' can come from a PTE, PMD or PUD.  We only check
1501 * _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the
1502 * same value on all 3 types.
1503 */
1504static inline bool __pte_access_permitted(unsigned long pteval, bool write)
1505{
1506        unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER;
1507
1508        if (write)
1509                need_pte_bits |= _PAGE_RW;
1510
1511        if ((pteval & need_pte_bits) != need_pte_bits)
1512                return 0;
1513
1514        return __pkru_allows_pkey(pte_flags_pkey(pteval), write);
1515}
1516
1517#define pte_access_permitted pte_access_permitted
1518static inline bool pte_access_permitted(pte_t pte, bool write)
1519{
1520        return __pte_access_permitted(pte_val(pte), write);
1521}
1522
1523#define pmd_access_permitted pmd_access_permitted
1524static inline bool pmd_access_permitted(pmd_t pmd, bool write)
1525{
1526        return __pte_access_permitted(pmd_val(pmd), write);
1527}
1528
1529#define pud_access_permitted pud_access_permitted
1530static inline bool pud_access_permitted(pud_t pud, bool write)
1531{
1532        return __pte_access_permitted(pud_val(pud), write);
1533}
1534
1535#define __HAVE_ARCH_PFN_MODIFY_ALLOWED 1
1536extern bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot);
1537
1538static inline bool arch_has_pfn_modify_check(void)
1539{
1540        return boot_cpu_has_bug(X86_BUG_L1TF);
1541}
1542
1543#include <asm-generic/pgtable.h>
1544#endif  /* __ASSEMBLY__ */
1545
1546#endif /* _ASM_X86_PGTABLE_H */
1547