LXR linux/arch/x86/kvm/mmu/mmu

   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef __KVM_X86_MMU_INTERNAL_H
   3#define __KVM_X86_MMU_INTERNAL_H
   4
   5#include <linux/types.h>
   6#include <linux/kvm_host.h>
   7#include <asm/kvm_host.h>
   8
   9#undef MMU_DEBUG
  10
  11#ifdef MMU_DEBUG
  12extern bool dbg;
  13
  14#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
  15#define rmap_printk(fmt, args...) do { if (dbg) printk("%s: " fmt, __func__, ## args); } while (0)
  16#define MMU_WARN_ON(x) WARN_ON(x)
  17#else
  18#define pgprintk(x...) do { } while (0)
  19#define rmap_printk(x...) do { } while (0)
  20#define MMU_WARN_ON(x) do { } while (0)
  21#endif
  22
  23/*
  24 * Unlike regular MMU roots, PAE "roots", a.k.a. PDPTEs/PDPTRs, have a PRESENT
  25 * bit, and thus are guaranteed to be non-zero when valid.  And, when a guest
  26 * PDPTR is !PRESENT, its corresponding PAE root cannot be set to INVALID_PAGE,
  27 * as the CPU would treat that as PRESENT PDPTR with reserved bits set.  Use
  28 * '0' instead of INVALID_PAGE to indicate an invalid PAE root.
  29 */
  30#define INVALID_PAE_ROOT        0
  31#define IS_VALID_PAE_ROOT(x)    (!!(x))
  32
  33struct kvm_mmu_page {
  34        /*
  35         * Note, "link" through "spt" fit in a single 64 byte cache line on
  36         * 64-bit kernels, keep it that way unless there's a reason not to.
  37         */
  38        struct list_head link;
  39        struct hlist_node hash_link;
  40
  41        bool tdp_mmu_page;
  42        bool unsync;
  43        u8 mmu_valid_gen;
  44        bool lpage_disallowed; /* Can't be replaced by an equiv large page */
  45
  46        /*
  47         * The following two entries are used to key the shadow page in the
  48         * hash table.
  49         */
  50        union kvm_mmu_page_role role;
  51        gfn_t gfn;
  52
  53        u64 *spt;
  54        /* hold the gfn of each spte inside spt */
  55        gfn_t *gfns;
  56        /* Currently serving as active root */
  57        union {
  58                int root_count;
  59                refcount_t tdp_mmu_root_count;
  60        };
  61        unsigned int unsync_children;
  62        struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
  63        DECLARE_BITMAP(unsync_child_bitmap, 512);
  64
  65        struct list_head lpage_disallowed_link;
  66#ifdef CONFIG_X86_32
  67        /*
  68         * Used out of the mmu-lock to avoid reading spte values while an
  69         * update is in progress; see the comments in __get_spte_lockless().
  70         */
  71        int clear_spte_count;
  72#endif
  73
  74        /* Number of writes since the last time traversal visited this page.  */
  75        atomic_t write_flooding_count;
  76
  77#ifdef CONFIG_X86_64
  78        /* Used for freeing the page asynchronously if it is a TDP MMU page. */
  79        struct rcu_head rcu_head;
  80#endif
  81};
  82
  83extern struct kmem_cache *mmu_page_header_cache;
  84
  85static inline struct kvm_mmu_page *to_shadow_page(hpa_t shadow_page)
  86{
  87        struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
  88
  89        return (struct kvm_mmu_page *)page_private(page);
  90}
  91
  92static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
  93{
  94        return to_shadow_page(__pa(sptep));
  95}
  96
  97static inline int kvm_mmu_role_as_id(union kvm_mmu_page_role role)
  98{
  99        return role.smm ? 1 : 0;
 100}
 101
 102static inline int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
 103{
 104        return kvm_mmu_role_as_id(sp->role);
 105}
 106
 107static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
 108{
 109        /*
 110         * When using the EPT page-modification log, the GPAs in the CPU dirty
 111         * log would come from L2 rather than L1.  Therefore, we need to rely
 112         * on write protection to record dirty pages, which bypasses PML, since
 113         * writes now result in a vmexit.  Note, the check on CPU dirty logging
 114         * being enabled is mandatory as the bits used to denote WP-only SPTEs
 115         * are reserved for NPT w/ PAE (32-bit KVM).
 116         */
 117        return vcpu->arch.mmu == &vcpu->arch.guest_mmu &&
 118               kvm_x86_ops.cpu_dirty_log_size;
 119}
 120
 121extern int nx_huge_pages;
 122static inline bool is_nx_huge_page_enabled(void)
 123{
 124        return READ_ONCE(nx_huge_pages);
 125}
 126
 127int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync);
 128
 129void kvm_mmu_gfn_disallow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn);
 130void kvm_mmu_gfn_allow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn);
 131bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 132                                    struct kvm_memory_slot *slot, u64 gfn,
 133                                    int min_level);
 134void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
 135                                        u64 start_gfn, u64 pages);
 136unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
 137
 138/*
 139 * Return values of handle_mmio_page_fault, mmu.page_fault, and fast_page_fault().
 140 *
 141 * RET_PF_RETRY: let CPU fault again on the address.
 142 * RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
 143 * RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
 144 * RET_PF_FIXED: The faulting entry has been fixed.
 145 * RET_PF_SPURIOUS: The faulting entry was already fixed, e.g. by another vCPU.
 146 *
 147 * Any names added to this enum should be exported to userspace for use in
 148 * tracepoints via TRACE_DEFINE_ENUM() in mmutrace.h
 149 */
 150enum {
 151        RET_PF_RETRY = 0,
 152        RET_PF_EMULATE,
 153        RET_PF_INVALID,
 154        RET_PF_FIXED,
 155        RET_PF_SPURIOUS,
 156};
 157
 158/* Bits which may be returned by set_spte() */
 159#define SET_SPTE_WRITE_PROTECTED_PT     BIT(0)
 160#define SET_SPTE_NEED_REMOTE_TLB_FLUSH  BIT(1)
 161#define SET_SPTE_SPURIOUS               BIT(2)
 162
 163int kvm_mmu_max_mapping_level(struct kvm *kvm,
 164                              const struct kvm_memory_slot *slot, gfn_t gfn,
 165                              kvm_pfn_t pfn, int max_level);
 166int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
 167                            int max_level, kvm_pfn_t *pfnp,
 168                            bool huge_page_disallowed, int *req_level);
 169void disallowed_hugepage_adjust(u64 spte, gfn_t gfn, int cur_level,
 170                                kvm_pfn_t *pfnp, int *goal_levelp);
 171
 172void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
 173
 174void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp);
 175void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp);
 176
 177#endif /* __KVM_X86_MMU_INTERNAL_H */
 178