linux/drivers/gpu/drm/i915/gvt/gtt.c
<<
>>
Prefs
   1/*
   2 * GTT virtualization
   3 *
   4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice (including the next
  14 * paragraph) shall be included in all copies or substantial portions of the
  15 * Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 * SOFTWARE.
  24 *
  25 * Authors:
  26 *    Zhi Wang <zhi.a.wang@intel.com>
  27 *    Zhenyu Wang <zhenyuw@linux.intel.com>
  28 *    Xiao Zheng <xiao.zheng@intel.com>
  29 *
  30 * Contributors:
  31 *    Min He <min.he@intel.com>
  32 *    Bing Niu <bing.niu@intel.com>
  33 *
  34 */
  35
  36#include "i915_drv.h"
  37#include "gvt.h"
  38#include "i915_pvinfo.h"
  39#include "trace.h"
  40
  41#if defined(VERBOSE_DEBUG)
  42#define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args)
  43#else
  44#define gvt_vdbg_mm(fmt, args...)
  45#endif
  46
  47static bool enable_out_of_sync = false;
  48static int preallocated_oos_pages = 8192;
  49
  50/*
  51 * validate a gm address and related range size,
  52 * translate it to host gm address
  53 */
  54bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
  55{
  56        if (size == 0)
  57                return vgpu_gmadr_is_valid(vgpu, addr);
  58
  59        if (vgpu_gmadr_is_aperture(vgpu, addr) &&
  60            vgpu_gmadr_is_aperture(vgpu, addr + size - 1))
  61                return true;
  62        else if (vgpu_gmadr_is_hidden(vgpu, addr) &&
  63                 vgpu_gmadr_is_hidden(vgpu, addr + size - 1))
  64                return true;
  65
  66        gvt_dbg_mm("Invalid ggtt range at 0x%llx, size: 0x%x\n",
  67                     addr, size);
  68        return false;
  69}
  70
  71/* translate a guest gmadr to host gmadr */
  72int intel_gvt_ggtt_gmadr_g2h(struct intel_vgpu *vgpu, u64 g_addr, u64 *h_addr)
  73{
  74        struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
  75
  76        if (drm_WARN(&i915->drm, !vgpu_gmadr_is_valid(vgpu, g_addr),
  77                     "invalid guest gmadr %llx\n", g_addr))
  78                return -EACCES;
  79
  80        if (vgpu_gmadr_is_aperture(vgpu, g_addr))
  81                *h_addr = vgpu_aperture_gmadr_base(vgpu)
  82                          + (g_addr - vgpu_aperture_offset(vgpu));
  83        else
  84                *h_addr = vgpu_hidden_gmadr_base(vgpu)
  85                          + (g_addr - vgpu_hidden_offset(vgpu));
  86        return 0;
  87}
  88
  89/* translate a host gmadr to guest gmadr */
  90int intel_gvt_ggtt_gmadr_h2g(struct intel_vgpu *vgpu, u64 h_addr, u64 *g_addr)
  91{
  92        struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
  93
  94        if (drm_WARN(&i915->drm, !gvt_gmadr_is_valid(vgpu->gvt, h_addr),
  95                     "invalid host gmadr %llx\n", h_addr))
  96                return -EACCES;
  97
  98        if (gvt_gmadr_is_aperture(vgpu->gvt, h_addr))
  99                *g_addr = vgpu_aperture_gmadr_base(vgpu)
 100                        + (h_addr - gvt_aperture_gmadr_base(vgpu->gvt));
 101        else
 102                *g_addr = vgpu_hidden_gmadr_base(vgpu)
 103                        + (h_addr - gvt_hidden_gmadr_base(vgpu->gvt));
 104        return 0;
 105}
 106
 107int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index,
 108                             unsigned long *h_index)
 109{
 110        u64 h_addr;
 111        int ret;
 112
 113        ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << I915_GTT_PAGE_SHIFT,
 114                                       &h_addr);
 115        if (ret)
 116                return ret;
 117
 118        *h_index = h_addr >> I915_GTT_PAGE_SHIFT;
 119        return 0;
 120}
 121
 122int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index,
 123                             unsigned long *g_index)
 124{
 125        u64 g_addr;
 126        int ret;
 127
 128        ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << I915_GTT_PAGE_SHIFT,
 129                                       &g_addr);
 130        if (ret)
 131                return ret;
 132
 133        *g_index = g_addr >> I915_GTT_PAGE_SHIFT;
 134        return 0;
 135}
 136
 137#define gtt_type_is_entry(type) \
 138        (type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \
 139         && type != GTT_TYPE_PPGTT_PTE_ENTRY \
 140         && type != GTT_TYPE_PPGTT_ROOT_ENTRY)
 141
 142#define gtt_type_is_pt(type) \
 143        (type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX)
 144
 145#define gtt_type_is_pte_pt(type) \
 146        (type == GTT_TYPE_PPGTT_PTE_PT)
 147
 148#define gtt_type_is_root_pointer(type) \
 149        (gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY)
 150
 151#define gtt_init_entry(e, t, p, v) do { \
 152        (e)->type = t; \
 153        (e)->pdev = p; \
 154        memcpy(&(e)->val64, &v, sizeof(v)); \
 155} while (0)
 156
 157/*
 158 * Mappings between GTT_TYPE* enumerations.
 159 * Following information can be found according to the given type:
 160 * - type of next level page table
 161 * - type of entry inside this level page table
 162 * - type of entry with PSE set
 163 *
 164 * If the given type doesn't have such a kind of information,
 165 * e.g. give a l4 root entry type, then request to get its PSE type,
 166 * give a PTE page table type, then request to get its next level page
 167 * table type, as we know l4 root entry doesn't have a PSE bit,
 168 * and a PTE page table doesn't have a next level page table type,
 169 * GTT_TYPE_INVALID will be returned. This is useful when traversing a
 170 * page table.
 171 */
 172
 173struct gtt_type_table_entry {
 174        int entry_type;
 175        int pt_type;
 176        int next_pt_type;
 177        int pse_entry_type;
 178};
 179
 180#define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \
 181        [type] = { \
 182                .entry_type = e_type, \
 183                .pt_type = cpt_type, \
 184                .next_pt_type = npt_type, \
 185                .pse_entry_type = pse_type, \
 186        }
 187
 188static struct gtt_type_table_entry gtt_type_table[] = {
 189        GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
 190                        GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
 191                        GTT_TYPE_INVALID,
 192                        GTT_TYPE_PPGTT_PML4_PT,
 193                        GTT_TYPE_INVALID),
 194        GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT,
 195                        GTT_TYPE_PPGTT_PML4_ENTRY,
 196                        GTT_TYPE_PPGTT_PML4_PT,
 197                        GTT_TYPE_PPGTT_PDP_PT,
 198                        GTT_TYPE_INVALID),
 199        GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY,
 200                        GTT_TYPE_PPGTT_PML4_ENTRY,
 201                        GTT_TYPE_PPGTT_PML4_PT,
 202                        GTT_TYPE_PPGTT_PDP_PT,
 203                        GTT_TYPE_INVALID),
 204        GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT,
 205                        GTT_TYPE_PPGTT_PDP_ENTRY,
 206                        GTT_TYPE_PPGTT_PDP_PT,
 207                        GTT_TYPE_PPGTT_PDE_PT,
 208                        GTT_TYPE_PPGTT_PTE_1G_ENTRY),
 209        GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
 210                        GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
 211                        GTT_TYPE_INVALID,
 212                        GTT_TYPE_PPGTT_PDE_PT,
 213                        GTT_TYPE_PPGTT_PTE_1G_ENTRY),
 214        GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY,
 215                        GTT_TYPE_PPGTT_PDP_ENTRY,
 216                        GTT_TYPE_PPGTT_PDP_PT,
 217                        GTT_TYPE_PPGTT_PDE_PT,
 218                        GTT_TYPE_PPGTT_PTE_1G_ENTRY),
 219        GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT,
 220                        GTT_TYPE_PPGTT_PDE_ENTRY,
 221                        GTT_TYPE_PPGTT_PDE_PT,
 222                        GTT_TYPE_PPGTT_PTE_PT,
 223                        GTT_TYPE_PPGTT_PTE_2M_ENTRY),
 224        GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY,
 225                        GTT_TYPE_PPGTT_PDE_ENTRY,
 226                        GTT_TYPE_PPGTT_PDE_PT,
 227                        GTT_TYPE_PPGTT_PTE_PT,
 228                        GTT_TYPE_PPGTT_PTE_2M_ENTRY),
 229        /* We take IPS bit as 'PSE' for PTE level. */
 230        GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT,
 231                        GTT_TYPE_PPGTT_PTE_4K_ENTRY,
 232                        GTT_TYPE_PPGTT_PTE_PT,
 233                        GTT_TYPE_INVALID,
 234                        GTT_TYPE_PPGTT_PTE_64K_ENTRY),
 235        GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY,
 236                        GTT_TYPE_PPGTT_PTE_4K_ENTRY,
 237                        GTT_TYPE_PPGTT_PTE_PT,
 238                        GTT_TYPE_INVALID,
 239                        GTT_TYPE_PPGTT_PTE_64K_ENTRY),
 240        GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_64K_ENTRY,
 241                        GTT_TYPE_PPGTT_PTE_4K_ENTRY,
 242                        GTT_TYPE_PPGTT_PTE_PT,
 243                        GTT_TYPE_INVALID,
 244                        GTT_TYPE_PPGTT_PTE_64K_ENTRY),
 245        GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY,
 246                        GTT_TYPE_PPGTT_PDE_ENTRY,
 247                        GTT_TYPE_PPGTT_PDE_PT,
 248                        GTT_TYPE_INVALID,
 249                        GTT_TYPE_PPGTT_PTE_2M_ENTRY),
 250        GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY,
 251                        GTT_TYPE_PPGTT_PDP_ENTRY,
 252                        GTT_TYPE_PPGTT_PDP_PT,
 253                        GTT_TYPE_INVALID,
 254                        GTT_TYPE_PPGTT_PTE_1G_ENTRY),
 255        GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE,
 256                        GTT_TYPE_GGTT_PTE,
 257                        GTT_TYPE_INVALID,
 258                        GTT_TYPE_INVALID,
 259                        GTT_TYPE_INVALID),
 260};
 261
 262static inline int get_next_pt_type(int type)
 263{
 264        return gtt_type_table[type].next_pt_type;
 265}
 266
 267static inline int get_pt_type(int type)
 268{
 269        return gtt_type_table[type].pt_type;
 270}
 271
 272static inline int get_entry_type(int type)
 273{
 274        return gtt_type_table[type].entry_type;
 275}
 276
 277static inline int get_pse_type(int type)
 278{
 279        return gtt_type_table[type].pse_entry_type;
 280}
 281
 282static u64 read_pte64(struct i915_ggtt *ggtt, unsigned long index)
 283{
 284        void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index;
 285
 286        return readq(addr);
 287}
 288
 289static void ggtt_invalidate(struct intel_gt *gt)
 290{
 291        mmio_hw_access_pre(gt);
 292        intel_uncore_write(gt->uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 293        mmio_hw_access_post(gt);
 294}
 295
 296static void write_pte64(struct i915_ggtt *ggtt, unsigned long index, u64 pte)
 297{
 298        void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index;
 299
 300        writeq(pte, addr);
 301}
 302
 303static inline int gtt_get_entry64(void *pt,
 304                struct intel_gvt_gtt_entry *e,
 305                unsigned long index, bool hypervisor_access, unsigned long gpa,
 306                struct intel_vgpu *vgpu)
 307{
 308        const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
 309        int ret;
 310
 311        if (WARN_ON(info->gtt_entry_size != 8))
 312                return -EINVAL;
 313
 314        if (hypervisor_access) {
 315                ret = intel_gvt_hypervisor_read_gpa(vgpu, gpa +
 316                                (index << info->gtt_entry_size_shift),
 317                                &e->val64, 8);
 318                if (WARN_ON(ret))
 319                        return ret;
 320        } else if (!pt) {
 321                e->val64 = read_pte64(vgpu->gvt->gt->ggtt, index);
 322        } else {
 323                e->val64 = *((u64 *)pt + index);
 324        }
 325        return 0;
 326}
 327
 328static inline int gtt_set_entry64(void *pt,
 329                struct intel_gvt_gtt_entry *e,
 330                unsigned long index, bool hypervisor_access, unsigned long gpa,
 331                struct intel_vgpu *vgpu)
 332{
 333        const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
 334        int ret;
 335
 336        if (WARN_ON(info->gtt_entry_size != 8))
 337                return -EINVAL;
 338
 339        if (hypervisor_access) {
 340                ret = intel_gvt_hypervisor_write_gpa(vgpu, gpa +
 341                                (index << info->gtt_entry_size_shift),
 342                                &e->val64, 8);
 343                if (WARN_ON(ret))
 344                        return ret;
 345        } else if (!pt) {
 346                write_pte64(vgpu->gvt->gt->ggtt, index, e->val64);
 347        } else {
 348                *((u64 *)pt + index) = e->val64;
 349        }
 350        return 0;
 351}
 352
 353#define GTT_HAW 46
 354
 355#define ADDR_1G_MASK    GENMASK_ULL(GTT_HAW - 1, 30)
 356#define ADDR_2M_MASK    GENMASK_ULL(GTT_HAW - 1, 21)
 357#define ADDR_64K_MASK   GENMASK_ULL(GTT_HAW - 1, 16)
 358#define ADDR_4K_MASK    GENMASK_ULL(GTT_HAW - 1, 12)
 359
 360#define GTT_SPTE_FLAG_MASK GENMASK_ULL(62, 52)
 361#define GTT_SPTE_FLAG_64K_SPLITED BIT(52) /* splited 64K gtt entry */
 362
 363#define GTT_64K_PTE_STRIDE 16
 364
 365static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e)
 366{
 367        unsigned long pfn;
 368
 369        if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY)
 370                pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT;
 371        else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY)
 372                pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT;
 373        else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY)
 374                pfn = (e->val64 & ADDR_64K_MASK) >> PAGE_SHIFT;
 375        else
 376                pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT;
 377        return pfn;
 378}
 379
 380static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn)
 381{
 382        if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
 383                e->val64 &= ~ADDR_1G_MASK;
 384                pfn &= (ADDR_1G_MASK >> PAGE_SHIFT);
 385        } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) {
 386                e->val64 &= ~ADDR_2M_MASK;
 387                pfn &= (ADDR_2M_MASK >> PAGE_SHIFT);
 388        } else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) {
 389                e->val64 &= ~ADDR_64K_MASK;
 390                pfn &= (ADDR_64K_MASK >> PAGE_SHIFT);
 391        } else {
 392                e->val64 &= ~ADDR_4K_MASK;
 393                pfn &= (ADDR_4K_MASK >> PAGE_SHIFT);
 394        }
 395
 396        e->val64 |= (pfn << PAGE_SHIFT);
 397}
 398
 399static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e)
 400{
 401        return !!(e->val64 & _PAGE_PSE);
 402}
 403
 404static void gen8_gtt_clear_pse(struct intel_gvt_gtt_entry *e)
 405{
 406        if (gen8_gtt_test_pse(e)) {
 407                switch (e->type) {
 408                case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
 409                        e->val64 &= ~_PAGE_PSE;
 410                        e->type = GTT_TYPE_PPGTT_PDE_ENTRY;
 411                        break;
 412                case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
 413                        e->type = GTT_TYPE_PPGTT_PDP_ENTRY;
 414                        e->val64 &= ~_PAGE_PSE;
 415                        break;
 416                default:
 417                        WARN_ON(1);
 418                }
 419        }
 420}
 421
 422static bool gen8_gtt_test_ips(struct intel_gvt_gtt_entry *e)
 423{
 424        if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
 425                return false;
 426
 427        return !!(e->val64 & GEN8_PDE_IPS_64K);
 428}
 429
 430static void gen8_gtt_clear_ips(struct intel_gvt_gtt_entry *e)
 431{
 432        if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
 433                return;
 434
 435        e->val64 &= ~GEN8_PDE_IPS_64K;
 436}
 437
 438static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e)
 439{
 440        /*
 441         * i915 writes PDP root pointer registers without present bit,
 442         * it also works, so we need to treat root pointer entry
 443         * specifically.
 444         */
 445        if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY
 446                        || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
 447                return (e->val64 != 0);
 448        else
 449                return (e->val64 & _PAGE_PRESENT);
 450}
 451
 452static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e)
 453{
 454        e->val64 &= ~_PAGE_PRESENT;
 455}
 456
 457static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e)
 458{
 459        e->val64 |= _PAGE_PRESENT;
 460}
 461
 462static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e)
 463{
 464        return !!(e->val64 & GTT_SPTE_FLAG_64K_SPLITED);
 465}
 466
 467static void gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry *e)
 468{
 469        e->val64 |= GTT_SPTE_FLAG_64K_SPLITED;
 470}
 471
 472static void gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry *e)
 473{
 474        e->val64 &= ~GTT_SPTE_FLAG_64K_SPLITED;
 475}
 476
 477/*
 478 * Per-platform GMA routines.
 479 */
 480static unsigned long gma_to_ggtt_pte_index(unsigned long gma)
 481{
 482        unsigned long x = (gma >> I915_GTT_PAGE_SHIFT);
 483
 484        trace_gma_index(__func__, gma, x);
 485        return x;
 486}
 487
 488#define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \
 489static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \
 490{ \
 491        unsigned long x = (exp); \
 492        trace_gma_index(__func__, gma, x); \
 493        return x; \
 494}
 495
 496DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff));
 497DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff));
 498DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3));
 499DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff));
 500DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
 501
 502static struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = {
 503        .get_entry = gtt_get_entry64,
 504        .set_entry = gtt_set_entry64,
 505        .clear_present = gtt_entry_clear_present,
 506        .set_present = gtt_entry_set_present,
 507        .test_present = gen8_gtt_test_present,
 508        .test_pse = gen8_gtt_test_pse,
 509        .clear_pse = gen8_gtt_clear_pse,
 510        .clear_ips = gen8_gtt_clear_ips,
 511        .test_ips = gen8_gtt_test_ips,
 512        .clear_64k_splited = gen8_gtt_clear_64k_splited,
 513        .set_64k_splited = gen8_gtt_set_64k_splited,
 514        .test_64k_splited = gen8_gtt_test_64k_splited,
 515        .get_pfn = gen8_gtt_get_pfn,
 516        .set_pfn = gen8_gtt_set_pfn,
 517};
 518
 519static struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = {
 520        .gma_to_ggtt_pte_index = gma_to_ggtt_pte_index,
 521        .gma_to_pte_index = gen8_gma_to_pte_index,
 522        .gma_to_pde_index = gen8_gma_to_pde_index,
 523        .gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index,
 524        .gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index,
 525        .gma_to_pml4_index = gen8_gma_to_pml4_index,
 526};
 527
 528/* Update entry type per pse and ips bit. */
 529static void update_entry_type_for_real(struct intel_gvt_gtt_pte_ops *pte_ops,
 530        struct intel_gvt_gtt_entry *entry, bool ips)
 531{
 532        switch (entry->type) {
 533        case GTT_TYPE_PPGTT_PDE_ENTRY:
 534        case GTT_TYPE_PPGTT_PDP_ENTRY:
 535                if (pte_ops->test_pse(entry))
 536                        entry->type = get_pse_type(entry->type);
 537                break;
 538        case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
 539                if (ips)
 540                        entry->type = get_pse_type(entry->type);
 541                break;
 542        default:
 543                GEM_BUG_ON(!gtt_type_is_entry(entry->type));
 544        }
 545
 546        GEM_BUG_ON(entry->type == GTT_TYPE_INVALID);
 547}
 548
 549/*
 550 * MM helpers.
 551 */
 552static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm,
 553                struct intel_gvt_gtt_entry *entry, unsigned long index,
 554                bool guest)
 555{
 556        struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
 557
 558        GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT);
 559
 560        entry->type = mm->ppgtt_mm.root_entry_type;
 561        pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps :
 562                           mm->ppgtt_mm.shadow_pdps,
 563                           entry, index, false, 0, mm->vgpu);
 564        update_entry_type_for_real(pte_ops, entry, false);
 565}
 566
 567static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm,
 568                struct intel_gvt_gtt_entry *entry, unsigned long index)
 569{
 570        _ppgtt_get_root_entry(mm, entry, index, true);
 571}
 572
 573static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm,
 574                struct intel_gvt_gtt_entry *entry, unsigned long index)
 575{
 576        _ppgtt_get_root_entry(mm, entry, index, false);
 577}
 578
 579static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm,
 580                struct intel_gvt_gtt_entry *entry, unsigned long index,
 581                bool guest)
 582{
 583        struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
 584
 585        pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps :
 586                           mm->ppgtt_mm.shadow_pdps,
 587                           entry, index, false, 0, mm->vgpu);
 588}
 589
 590static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm,
 591                struct intel_gvt_gtt_entry *entry, unsigned long index)
 592{
 593        _ppgtt_set_root_entry(mm, entry, index, false);
 594}
 595
 596static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm,
 597                struct intel_gvt_gtt_entry *entry, unsigned long index)
 598{
 599        struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
 600
 601        GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
 602
 603        entry->type = GTT_TYPE_GGTT_PTE;
 604        pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
 605                           false, 0, mm->vgpu);
 606}
 607
 608static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm,
 609                struct intel_gvt_gtt_entry *entry, unsigned long index)
 610{
 611        struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
 612
 613        GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
 614
 615        pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
 616                           false, 0, mm->vgpu);
 617}
 618
 619static void ggtt_get_host_entry(struct intel_vgpu_mm *mm,
 620                struct intel_gvt_gtt_entry *entry, unsigned long index)
 621{
 622        struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
 623
 624        GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
 625
 626        pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu);
 627}
 628
 629static void ggtt_set_host_entry(struct intel_vgpu_mm *mm,
 630                struct intel_gvt_gtt_entry *entry, unsigned long index)
 631{
 632        struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
 633        unsigned long offset = index;
 634
 635        GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
 636
 637        if (vgpu_gmadr_is_aperture(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) {
 638                offset -= (vgpu_aperture_gmadr_base(mm->vgpu) >> PAGE_SHIFT);
 639                mm->ggtt_mm.host_ggtt_aperture[offset] = entry->val64;
 640        } else if (vgpu_gmadr_is_hidden(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) {
 641                offset -= (vgpu_hidden_gmadr_base(mm->vgpu) >> PAGE_SHIFT);
 642                mm->ggtt_mm.host_ggtt_hidden[offset] = entry->val64;
 643        }
 644
 645        pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu);
 646}
 647
 648/*
 649 * PPGTT shadow page table helpers.
 650 */
 651static inline int ppgtt_spt_get_entry(
 652                struct intel_vgpu_ppgtt_spt *spt,
 653                void *page_table, int type,
 654                struct intel_gvt_gtt_entry *e, unsigned long index,
 655                bool guest)
 656{
 657        struct intel_gvt *gvt = spt->vgpu->gvt;
 658        struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
 659        int ret;
 660
 661        e->type = get_entry_type(type);
 662
 663        if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
 664                return -EINVAL;
 665
 666        ret = ops->get_entry(page_table, e, index, guest,
 667                        spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
 668                        spt->vgpu);
 669        if (ret)
 670                return ret;
 671
 672        update_entry_type_for_real(ops, e, guest ?
 673                                   spt->guest_page.pde_ips : false);
 674
 675        gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
 676                    type, e->type, index, e->val64);
 677        return 0;
 678}
 679
 680static inline int ppgtt_spt_set_entry(
 681                struct intel_vgpu_ppgtt_spt *spt,
 682                void *page_table, int type,
 683                struct intel_gvt_gtt_entry *e, unsigned long index,
 684                bool guest)
 685{
 686        struct intel_gvt *gvt = spt->vgpu->gvt;
 687        struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
 688
 689        if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
 690                return -EINVAL;
 691
 692        gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
 693                    type, e->type, index, e->val64);
 694
 695        return ops->set_entry(page_table, e, index, guest,
 696                        spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
 697                        spt->vgpu);
 698}
 699
 700#define ppgtt_get_guest_entry(spt, e, index) \
 701        ppgtt_spt_get_entry(spt, NULL, \
 702                spt->guest_page.type, e, index, true)
 703
 704#define ppgtt_set_guest_entry(spt, e, index) \
 705        ppgtt_spt_set_entry(spt, NULL, \
 706                spt->guest_page.type, e, index, true)
 707
 708#define ppgtt_get_shadow_entry(spt, e, index) \
 709        ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \
 710                spt->shadow_page.type, e, index, false)
 711
 712#define ppgtt_set_shadow_entry(spt, e, index) \
 713        ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \
 714                spt->shadow_page.type, e, index, false)
 715
 716static void *alloc_spt(gfp_t gfp_mask)
 717{
 718        struct intel_vgpu_ppgtt_spt *spt;
 719
 720        spt = kzalloc(sizeof(*spt), gfp_mask);
 721        if (!spt)
 722                return NULL;
 723
 724        spt->shadow_page.page = alloc_page(gfp_mask);
 725        if (!spt->shadow_page.page) {
 726                kfree(spt);
 727                return NULL;
 728        }
 729        return spt;
 730}
 731
 732static void free_spt(struct intel_vgpu_ppgtt_spt *spt)
 733{
 734        __free_page(spt->shadow_page.page);
 735        kfree(spt);
 736}
 737
 738static int detach_oos_page(struct intel_vgpu *vgpu,
 739                struct intel_vgpu_oos_page *oos_page);
 740
 741static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
 742{
 743        struct device *kdev = spt->vgpu->gvt->gt->i915->drm.dev;
 744
 745        trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type);
 746
 747        dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096,
 748                       PCI_DMA_BIDIRECTIONAL);
 749
 750        radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn);
 751
 752        if (spt->guest_page.gfn) {
 753                if (spt->guest_page.oos_page)
 754                        detach_oos_page(spt->vgpu, spt->guest_page.oos_page);
 755
 756                intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn);
 757        }
 758
 759        list_del_init(&spt->post_shadow_list);
 760        free_spt(spt);
 761}
 762
 763static void ppgtt_free_all_spt(struct intel_vgpu *vgpu)
 764{
 765        struct intel_vgpu_ppgtt_spt *spt, *spn;
 766        struct radix_tree_iter iter;
 767        LIST_HEAD(all_spt);
 768        void __rcu **slot;
 769
 770        rcu_read_lock();
 771        radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) {
 772                spt = radix_tree_deref_slot(slot);
 773                list_move(&spt->post_shadow_list, &all_spt);
 774        }
 775        rcu_read_unlock();
 776
 777        list_for_each_entry_safe(spt, spn, &all_spt, post_shadow_list)
 778                ppgtt_free_spt(spt);
 779}
 780
 781static int ppgtt_handle_guest_write_page_table_bytes(
 782                struct intel_vgpu_ppgtt_spt *spt,
 783                u64 pa, void *p_data, int bytes);
 784
 785static int ppgtt_write_protection_handler(
 786                struct intel_vgpu_page_track *page_track,
 787                u64 gpa, void *data, int bytes)
 788{
 789        struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data;
 790
 791        int ret;
 792
 793        if (bytes != 4 && bytes != 8)
 794                return -EINVAL;
 795
 796        ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes);
 797        if (ret)
 798                return ret;
 799        return ret;
 800}
 801
 802/* Find a spt by guest gfn. */
 803static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn(
 804                struct intel_vgpu *vgpu, unsigned long gfn)
 805{
 806        struct intel_vgpu_page_track *track;
 807
 808        track = intel_vgpu_find_page_track(vgpu, gfn);
 809        if (track && track->handler == ppgtt_write_protection_handler)
 810                return track->priv_data;
 811
 812        return NULL;
 813}
 814
 815/* Find the spt by shadow page mfn. */
 816static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn(
 817                struct intel_vgpu *vgpu, unsigned long mfn)
 818{
 819        return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn);
 820}
 821
 822static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
 823
 824/* Allocate shadow page table without guest page. */
 825static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
 826                struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type)
 827{
 828        struct device *kdev = vgpu->gvt->gt->i915->drm.dev;
 829        struct intel_vgpu_ppgtt_spt *spt = NULL;
 830        dma_addr_t daddr;
 831        int ret;
 832
 833retry:
 834        spt = alloc_spt(GFP_KERNEL | __GFP_ZERO);
 835        if (!spt) {
 836                if (reclaim_one_ppgtt_mm(vgpu->gvt))
 837                        goto retry;
 838
 839                gvt_vgpu_err("fail to allocate ppgtt shadow page\n");
 840                return ERR_PTR(-ENOMEM);
 841        }
 842
 843        spt->vgpu = vgpu;
 844        atomic_set(&spt->refcount, 1);
 845        INIT_LIST_HEAD(&spt->post_shadow_list);
 846
 847        /*
 848         * Init shadow_page.
 849         */
 850        spt->shadow_page.type = type;
 851        daddr = dma_map_page(kdev, spt->shadow_page.page,
 852                             0, 4096, PCI_DMA_BIDIRECTIONAL);
 853        if (dma_mapping_error(kdev, daddr)) {
 854                gvt_vgpu_err("fail to map dma addr\n");
 855                ret = -EINVAL;
 856                goto err_free_spt;
 857        }
 858        spt->shadow_page.vaddr = page_address(spt->shadow_page.page);
 859        spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT;
 860
 861        ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt);
 862        if (ret)
 863                goto err_unmap_dma;
 864
 865        return spt;
 866
 867err_unmap_dma:
 868        dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 869err_free_spt:
 870        free_spt(spt);
 871        return ERR_PTR(ret);
 872}
 873
 874/* Allocate shadow page table associated with specific gfn. */
 875static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn(
 876                struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type,
 877                unsigned long gfn, bool guest_pde_ips)
 878{
 879        struct intel_vgpu_ppgtt_spt *spt;
 880        int ret;
 881
 882        spt = ppgtt_alloc_spt(vgpu, type);
 883        if (IS_ERR(spt))
 884                return spt;
 885
 886        /*
 887         * Init guest_page.
 888         */
 889        ret = intel_vgpu_register_page_track(vgpu, gfn,
 890                        ppgtt_write_protection_handler, spt);
 891        if (ret) {
 892                ppgtt_free_spt(spt);
 893                return ERR_PTR(ret);
 894        }
 895
 896        spt->guest_page.type = type;
 897        spt->guest_page.gfn = gfn;
 898        spt->guest_page.pde_ips = guest_pde_ips;
 899
 900        trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
 901
 902        return spt;
 903}
 904
 905#define pt_entry_size_shift(spt) \
 906        ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift)
 907
 908#define pt_entries(spt) \
 909        (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt))
 910
 911#define for_each_present_guest_entry(spt, e, i) \
 912        for (i = 0; i < pt_entries(spt); \
 913             i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
 914                if (!ppgtt_get_guest_entry(spt, e, i) && \
 915                    spt->vgpu->gvt->gtt.pte_ops->test_present(e))
 916
 917#define for_each_present_shadow_entry(spt, e, i) \
 918        for (i = 0; i < pt_entries(spt); \
 919             i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
 920                if (!ppgtt_get_shadow_entry(spt, e, i) && \
 921                    spt->vgpu->gvt->gtt.pte_ops->test_present(e))
 922
 923#define for_each_shadow_entry(spt, e, i) \
 924        for (i = 0; i < pt_entries(spt); \
 925             i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \
 926                if (!ppgtt_get_shadow_entry(spt, e, i))
 927
 928static inline void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt)
 929{
 930        int v = atomic_read(&spt->refcount);
 931
 932        trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1));
 933        atomic_inc(&spt->refcount);
 934}
 935
 936static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt)
 937{
 938        int v = atomic_read(&spt->refcount);
 939
 940        trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1));
 941        return atomic_dec_return(&spt->refcount);
 942}
 943
 944static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt);
 945
 946static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
 947                struct intel_gvt_gtt_entry *e)
 948{
 949        struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
 950        struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
 951        struct intel_vgpu_ppgtt_spt *s;
 952        enum intel_gvt_gtt_type cur_pt_type;
 953
 954        GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type)));
 955
 956        if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY
 957                && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
 958                cur_pt_type = get_next_pt_type(e->type);
 959
 960                if (!gtt_type_is_pt(cur_pt_type) ||
 961                                !gtt_type_is_pt(cur_pt_type + 1)) {
 962                        drm_WARN(&i915->drm, 1,
 963                                 "Invalid page table type, cur_pt_type is: %d\n",
 964                                 cur_pt_type);
 965                        return -EINVAL;
 966                }
 967
 968                cur_pt_type += 1;
 969
 970                if (ops->get_pfn(e) ==
 971                        vgpu->gtt.scratch_pt[cur_pt_type].page_mfn)
 972                        return 0;
 973        }
 974        s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
 975        if (!s) {
 976                gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n",
 977                                ops->get_pfn(e));
 978                return -ENXIO;
 979        }
 980        return ppgtt_invalidate_spt(s);
 981}
 982
 983static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt,
 984                struct intel_gvt_gtt_entry *entry)
 985{
 986        struct intel_vgpu *vgpu = spt->vgpu;
 987        struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
 988        unsigned long pfn;
 989        int type;
 990
 991        pfn = ops->get_pfn(entry);
 992        type = spt->shadow_page.type;
 993
 994        /* Uninitialized spte or unshadowed spte. */
 995        if (!pfn || pfn == vgpu->gtt.scratch_pt[type].page_mfn)
 996                return;
 997
 998        intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
 999}
1000
1001static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
1002{
1003        struct intel_vgpu *vgpu = spt->vgpu;
1004        struct intel_gvt_gtt_entry e;
1005        unsigned long index;
1006        int ret;
1007
1008        trace_spt_change(spt->vgpu->id, "die", spt,
1009                        spt->guest_page.gfn, spt->shadow_page.type);
1010
1011        if (ppgtt_put_spt(spt) > 0)
1012                return 0;
1013
1014        for_each_present_shadow_entry(spt, &e, index) {
1015                switch (e.type) {
1016                case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
1017                        gvt_vdbg_mm("invalidate 4K entry\n");
1018                        ppgtt_invalidate_pte(spt, &e);
1019                        break;
1020                case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
1021                        /* We don't setup 64K shadow entry so far. */
1022                        WARN(1, "suspicious 64K gtt entry\n");
1023                        continue;
1024                case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
1025                        gvt_vdbg_mm("invalidate 2M entry\n");
1026                        continue;
1027                case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
1028                        WARN(1, "GVT doesn't support 1GB page\n");
1029                        continue;
1030                case GTT_TYPE_PPGTT_PML4_ENTRY:
1031                case GTT_TYPE_PPGTT_PDP_ENTRY:
1032                case GTT_TYPE_PPGTT_PDE_ENTRY:
1033                        gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n");
1034                        ret = ppgtt_invalidate_spt_by_shadow_entry(
1035                                        spt->vgpu, &e);
1036                        if (ret)
1037                                goto fail;
1038                        break;
1039                default:
1040                        GEM_BUG_ON(1);
1041                }
1042        }
1043
1044        trace_spt_change(spt->vgpu->id, "release", spt,
1045                         spt->guest_page.gfn, spt->shadow_page.type);
1046        ppgtt_free_spt(spt);
1047        return 0;
1048fail:
1049        gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n",
1050                        spt, e.val64, e.type);
1051        return ret;
1052}
1053
1054static bool vgpu_ips_enabled(struct intel_vgpu *vgpu)
1055{
1056        struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
1057
1058        if (GRAPHICS_VER(dev_priv) == 9) {
1059                u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) &
1060                        GAMW_ECO_ENABLE_64K_IPS_FIELD;
1061
1062                return ips == GAMW_ECO_ENABLE_64K_IPS_FIELD;
1063        } else if (GRAPHICS_VER(dev_priv) >= 11) {
1064                /* 64K paging only controlled by IPS bit in PTE now. */
1065                return true;
1066        } else
1067                return false;
1068}
1069
1070static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt);
1071
1072static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
1073                struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we)
1074{
1075        struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1076        struct intel_vgpu_ppgtt_spt *spt = NULL;
1077        bool ips = false;
1078        int ret;
1079
1080        GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type)));
1081
1082        if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY)
1083                ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we);
1084
1085        spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we));
1086        if (spt) {
1087                ppgtt_get_spt(spt);
1088
1089                if (ips != spt->guest_page.pde_ips) {
1090                        spt->guest_page.pde_ips = ips;
1091
1092                        gvt_dbg_mm("reshadow PDE since ips changed\n");
1093                        clear_page(spt->shadow_page.vaddr);
1094                        ret = ppgtt_populate_spt(spt);
1095                        if (ret) {
1096                                ppgtt_put_spt(spt);
1097                                goto err;
1098                        }
1099                }
1100        } else {
1101                int type = get_next_pt_type(we->type);
1102
1103                if (!gtt_type_is_pt(type)) {
1104                        ret = -EINVAL;
1105                        goto err;
1106                }
1107
1108                spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips);
1109                if (IS_ERR(spt)) {
1110                        ret = PTR_ERR(spt);
1111                        goto err;
1112                }
1113
1114                ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn);
1115                if (ret)
1116                        goto err_free_spt;
1117
1118                ret = ppgtt_populate_spt(spt);
1119                if (ret)
1120                        goto err_free_spt;
1121
1122                trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn,
1123                                 spt->shadow_page.type);
1124        }
1125        return spt;
1126
1127err_free_spt:
1128        ppgtt_free_spt(spt);
1129        spt = NULL;
1130err:
1131        gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1132                     spt, we->val64, we->type);
1133        return ERR_PTR(ret);
1134}
1135
1136static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
1137                struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge)
1138{
1139        struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops;
1140
1141        se->type = ge->type;
1142        se->val64 = ge->val64;
1143
1144        /* Because we always split 64KB pages, so clear IPS in shadow PDE. */
1145        if (se->type == GTT_TYPE_PPGTT_PDE_ENTRY)
1146                ops->clear_ips(se);
1147
1148        ops->set_pfn(se, s->shadow_page.mfn);
1149}
1150
1151/**
1152 * Check if can do 2M page
1153 * @vgpu: target vgpu
1154 * @entry: target pfn's gtt entry
1155 *
1156 * Return 1 if 2MB huge gtt shadowing is possible, 0 if miscondition,
1157 * negative if found err.
1158 */
1159static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
1160        struct intel_gvt_gtt_entry *entry)
1161{
1162        struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1163        unsigned long pfn;
1164
1165        if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M))
1166                return 0;
1167
1168        pfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, ops->get_pfn(entry));
1169        if (pfn == INTEL_GVT_INVALID_ADDR)
1170                return -EINVAL;
1171
1172        return PageTransHuge(pfn_to_page(pfn));
1173}
1174
1175static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
1176        struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1177        struct intel_gvt_gtt_entry *se)
1178{
1179        struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1180        struct intel_vgpu_ppgtt_spt *sub_spt;
1181        struct intel_gvt_gtt_entry sub_se;
1182        unsigned long start_gfn;
1183        dma_addr_t dma_addr;
1184        unsigned long sub_index;
1185        int ret;
1186
1187        gvt_dbg_mm("Split 2M gtt entry, index %lu\n", index);
1188
1189        start_gfn = ops->get_pfn(se);
1190
1191        sub_spt = ppgtt_alloc_spt(vgpu, GTT_TYPE_PPGTT_PTE_PT);
1192        if (IS_ERR(sub_spt))
1193                return PTR_ERR(sub_spt);
1194
1195        for_each_shadow_entry(sub_spt, &sub_se, sub_index) {
1196                ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu,
1197                                start_gfn + sub_index, PAGE_SIZE, &dma_addr);
1198                if (ret) {
1199                        ppgtt_invalidate_spt(spt);
1200                        return ret;
1201                }
1202                sub_se.val64 = se->val64;
1203
1204                /* Copy the PAT field from PDE. */
1205                sub_se.val64 &= ~_PAGE_PAT;
1206                sub_se.val64 |= (se->val64 & _PAGE_PAT_LARGE) >> 5;
1207
1208                ops->set_pfn(&sub_se, dma_addr >> PAGE_SHIFT);
1209                ppgtt_set_shadow_entry(sub_spt, &sub_se, sub_index);
1210        }
1211
1212        /* Clear dirty field. */
1213        se->val64 &= ~_PAGE_DIRTY;
1214
1215        ops->clear_pse(se);
1216        ops->clear_ips(se);
1217        ops->set_pfn(se, sub_spt->shadow_page.mfn);
1218        ppgtt_set_shadow_entry(spt, se, index);
1219        return 0;
1220}
1221
1222static int split_64KB_gtt_entry(struct intel_vgpu *vgpu,
1223        struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1224        struct intel_gvt_gtt_entry *se)
1225{
1226        struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1227        struct intel_gvt_gtt_entry entry = *se;
1228        unsigned long start_gfn;
1229        dma_addr_t dma_addr;
1230        int i, ret;
1231
1232        gvt_vdbg_mm("Split 64K gtt entry, index %lu\n", index);
1233
1234        GEM_BUG_ON(index % GTT_64K_PTE_STRIDE);
1235
1236        start_gfn = ops->get_pfn(se);
1237
1238        entry.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY;
1239        ops->set_64k_splited(&entry);
1240
1241        for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
1242                ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu,
1243                                        start_gfn + i, PAGE_SIZE, &dma_addr);
1244                if (ret)
1245                        return ret;
1246
1247                ops->set_pfn(&entry, dma_addr >> PAGE_SHIFT);
1248                ppgtt_set_shadow_entry(spt, &entry, index + i);
1249        }
1250        return 0;
1251}
1252
1253static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
1254        struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1255        struct intel_gvt_gtt_entry *ge)
1256{
1257        struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
1258        struct intel_gvt_gtt_entry se = *ge;
1259        unsigned long gfn, page_size = PAGE_SIZE;
1260        dma_addr_t dma_addr;
1261        int ret;
1262
1263        if (!pte_ops->test_present(ge))
1264                return 0;
1265
1266        gfn = pte_ops->get_pfn(ge);
1267
1268        switch (ge->type) {
1269        case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
1270                gvt_vdbg_mm("shadow 4K gtt entry\n");
1271                break;
1272        case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
1273                gvt_vdbg_mm("shadow 64K gtt entry\n");
1274                /*
1275                 * The layout of 64K page is special, the page size is
1276                 * controlled by uper PDE. To be simple, we always split
1277                 * 64K page to smaller 4K pages in shadow PT.
1278                 */
1279                return split_64KB_gtt_entry(vgpu, spt, index, &se);
1280        case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
1281                gvt_vdbg_mm("shadow 2M gtt entry\n");
1282                ret = is_2MB_gtt_possible(vgpu, ge);
1283                if (ret == 0)
1284                        return split_2MB_gtt_entry(vgpu, spt, index, &se);
1285                else if (ret < 0)
1286                        return ret;
1287                page_size = I915_GTT_PAGE_SIZE_2M;
1288                break;
1289        case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
1290                gvt_vgpu_err("GVT doesn't support 1GB entry\n");
1291                return -EINVAL;
1292        default:
1293                GEM_BUG_ON(1);
1294        }
1295
1296        /* direct shadow */
1297        ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, page_size,
1298                                                      &dma_addr);
1299        if (ret)
1300                return -ENXIO;
1301
1302        pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT);
1303        ppgtt_set_shadow_entry(spt, &se, index);
1304        return 0;
1305}
1306
1307static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
1308{
1309        struct intel_vgpu *vgpu = spt->vgpu;
1310        struct intel_gvt *gvt = vgpu->gvt;
1311        struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
1312        struct intel_vgpu_ppgtt_spt *s;
1313        struct intel_gvt_gtt_entry se, ge;
1314        unsigned long gfn, i;
1315        int ret;
1316
1317        trace_spt_change(spt->vgpu->id, "born", spt,
1318                         spt->guest_page.gfn, spt->shadow_page.type);
1319
1320        for_each_present_guest_entry(spt, &ge, i) {
1321                if (gtt_type_is_pt(get_next_pt_type(ge.type))) {
1322                        s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
1323                        if (IS_ERR(s)) {
1324                                ret = PTR_ERR(s);
1325                                goto fail;
1326                        }
1327                        ppgtt_get_shadow_entry(spt, &se, i);
1328                        ppgtt_generate_shadow_entry(&se, s, &ge);
1329                        ppgtt_set_shadow_entry(spt, &se, i);
1330                } else {
1331                        gfn = ops->get_pfn(&ge);
1332                        if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) {
1333                                ops->set_pfn(&se, gvt->gtt.scratch_mfn);
1334                                ppgtt_set_shadow_entry(spt, &se, i);
1335                                continue;
1336                        }
1337
1338                        ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge);
1339                        if (ret)
1340                                goto fail;
1341                }
1342        }
1343        return 0;
1344fail:
1345        gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1346                        spt, ge.val64, ge.type);
1347        return ret;
1348}
1349
1350static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt,
1351                struct intel_gvt_gtt_entry *se, unsigned long index)
1352{
1353        struct intel_vgpu *vgpu = spt->vgpu;
1354        struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1355        int ret;
1356
1357        trace_spt_guest_change(spt->vgpu->id, "remove", spt,
1358                               spt->shadow_page.type, se->val64, index);
1359
1360        gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n",
1361                    se->type, index, se->val64);
1362
1363        if (!ops->test_present(se))
1364                return 0;
1365
1366        if (ops->get_pfn(se) ==
1367            vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn)
1368                return 0;
1369
1370        if (gtt_type_is_pt(get_next_pt_type(se->type))) {
1371                struct intel_vgpu_ppgtt_spt *s =
1372                        intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se));
1373                if (!s) {
1374                        gvt_vgpu_err("fail to find guest page\n");
1375                        ret = -ENXIO;
1376                        goto fail;
1377                }
1378                ret = ppgtt_invalidate_spt(s);
1379                if (ret)
1380                        goto fail;
1381        } else {
1382                /* We don't setup 64K shadow entry so far. */
1383                WARN(se->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY,
1384                     "suspicious 64K entry\n");
1385                ppgtt_invalidate_pte(spt, se);
1386        }
1387
1388        return 0;
1389fail:
1390        gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1391                        spt, se->val64, se->type);
1392        return ret;
1393}
1394
1395static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt,
1396                struct intel_gvt_gtt_entry *we, unsigned long index)
1397{
1398        struct intel_vgpu *vgpu = spt->vgpu;
1399        struct intel_gvt_gtt_entry m;
1400        struct intel_vgpu_ppgtt_spt *s;
1401        int ret;
1402
1403        trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type,
1404                               we->val64, index);
1405
1406        gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n",
1407                    we->type, index, we->val64);
1408
1409        if (gtt_type_is_pt(get_next_pt_type(we->type))) {
1410                s = ppgtt_populate_spt_by_guest_entry(vgpu, we);
1411                if (IS_ERR(s)) {
1412                        ret = PTR_ERR(s);
1413                        goto fail;
1414                }
1415                ppgtt_get_shadow_entry(spt, &m, index);
1416                ppgtt_generate_shadow_entry(&m, s, we);
1417                ppgtt_set_shadow_entry(spt, &m, index);
1418        } else {
1419                ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we);
1420                if (ret)
1421                        goto fail;
1422        }
1423        return 0;
1424fail:
1425        gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n",
1426                spt, we->val64, we->type);
1427        return ret;
1428}
1429
1430static int sync_oos_page(struct intel_vgpu *vgpu,
1431                struct intel_vgpu_oos_page *oos_page)
1432{
1433        const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1434        struct intel_gvt *gvt = vgpu->gvt;
1435        struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
1436        struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
1437        struct intel_gvt_gtt_entry old, new;
1438        int index;
1439        int ret;
1440
1441        trace_oos_change(vgpu->id, "sync", oos_page->id,
1442                         spt, spt->guest_page.type);
1443
1444        old.type = new.type = get_entry_type(spt->guest_page.type);
1445        old.val64 = new.val64 = 0;
1446
1447        for (index = 0; index < (I915_GTT_PAGE_SIZE >>
1448                                info->gtt_entry_size_shift); index++) {
1449                ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu);
1450                ops->get_entry(NULL, &new, index, true,
1451                               spt->guest_page.gfn << PAGE_SHIFT, vgpu);
1452
1453                if (old.val64 == new.val64
1454                        && !test_and_clear_bit(index, spt->post_shadow_bitmap))
1455                        continue;
1456
1457                trace_oos_sync(vgpu->id, oos_page->id,
1458                                spt, spt->guest_page.type,
1459                                new.val64, index);
1460
1461                ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new);
1462                if (ret)
1463                        return ret;
1464
1465                ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu);
1466        }
1467
1468        spt->guest_page.write_cnt = 0;
1469        list_del_init(&spt->post_shadow_list);
1470        return 0;
1471}
1472
1473static int detach_oos_page(struct intel_vgpu *vgpu,
1474                struct intel_vgpu_oos_page *oos_page)
1475{
1476        struct intel_gvt *gvt = vgpu->gvt;
1477        struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
1478
1479        trace_oos_change(vgpu->id, "detach", oos_page->id,
1480                         spt, spt->guest_page.type);
1481
1482        spt->guest_page.write_cnt = 0;
1483        spt->guest_page.oos_page = NULL;
1484        oos_page->spt = NULL;
1485
1486        list_del_init(&oos_page->vm_list);
1487        list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head);
1488
1489        return 0;
1490}
1491
1492static int attach_oos_page(struct intel_vgpu_oos_page *oos_page,
1493                struct intel_vgpu_ppgtt_spt *spt)
1494{
1495        struct intel_gvt *gvt = spt->vgpu->gvt;
1496        int ret;
1497
1498        ret = intel_gvt_hypervisor_read_gpa(spt->vgpu,
1499                        spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
1500                        oos_page->mem, I915_GTT_PAGE_SIZE);
1501        if (ret)
1502                return ret;
1503
1504        oos_page->spt = spt;
1505        spt->guest_page.oos_page = oos_page;
1506
1507        list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head);
1508
1509        trace_oos_change(spt->vgpu->id, "attach", oos_page->id,
1510                         spt, spt->guest_page.type);
1511        return 0;
1512}
1513
1514static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt)
1515{
1516        struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1517        int ret;
1518
1519        ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn);
1520        if (ret)
1521                return ret;
1522
1523        trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id,
1524                         spt, spt->guest_page.type);
1525
1526        list_del_init(&oos_page->vm_list);
1527        return sync_oos_page(spt->vgpu, oos_page);
1528}
1529
1530static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt)
1531{
1532        struct intel_gvt *gvt = spt->vgpu->gvt;
1533        struct intel_gvt_gtt *gtt = &gvt->gtt;
1534        struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1535        int ret;
1536
1537        WARN(oos_page, "shadow PPGTT page has already has a oos page\n");
1538
1539        if (list_empty(&gtt->oos_page_free_list_head)) {
1540                oos_page = container_of(gtt->oos_page_use_list_head.next,
1541                        struct intel_vgpu_oos_page, list);
1542                ret = ppgtt_set_guest_page_sync(oos_page->spt);
1543                if (ret)
1544                        return ret;
1545                ret = detach_oos_page(spt->vgpu, oos_page);
1546                if (ret)
1547                        return ret;
1548        } else
1549                oos_page = container_of(gtt->oos_page_free_list_head.next,
1550                        struct intel_vgpu_oos_page, list);
1551        return attach_oos_page(oos_page, spt);
1552}
1553
1554static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt)
1555{
1556        struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1557
1558        if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n"))
1559                return -EINVAL;
1560
1561        trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id,
1562                         spt, spt->guest_page.type);
1563
1564        list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head);
1565        return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn);
1566}
1567
1568/**
1569 * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU
1570 * @vgpu: a vGPU
1571 *
1572 * This function is called before submitting a guest workload to host,
1573 * to sync all the out-of-synced shadow for vGPU
1574 *
1575 * Returns:
1576 * Zero on success, negative error code if failed.
1577 */
1578int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu)
1579{
1580        struct list_head *pos, *n;
1581        struct intel_vgpu_oos_page *oos_page;
1582        int ret;
1583
1584        if (!enable_out_of_sync)
1585                return 0;
1586
1587        list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) {
1588                oos_page = container_of(pos,
1589                                struct intel_vgpu_oos_page, vm_list);
1590                ret = ppgtt_set_guest_page_sync(oos_page->spt);
1591                if (ret)
1592                        return ret;
1593        }
1594        return 0;
1595}
1596
1597/*
1598 * The heart of PPGTT shadow page table.
1599 */
1600static int ppgtt_handle_guest_write_page_table(
1601                struct intel_vgpu_ppgtt_spt *spt,
1602                struct intel_gvt_gtt_entry *we, unsigned long index)
1603{
1604        struct intel_vgpu *vgpu = spt->vgpu;
1605        int type = spt->shadow_page.type;
1606        struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1607        struct intel_gvt_gtt_entry old_se;
1608        int new_present;
1609        int i, ret;
1610
1611        new_present = ops->test_present(we);
1612
1613        /*
1614         * Adding the new entry first and then removing the old one, that can
1615         * guarantee the ppgtt table is validated during the window between
1616         * adding and removal.
1617         */
1618        ppgtt_get_shadow_entry(spt, &old_se, index);
1619
1620        if (new_present) {
1621                ret = ppgtt_handle_guest_entry_add(spt, we, index);
1622                if (ret)
1623                        goto fail;
1624        }
1625
1626        ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index);
1627        if (ret)
1628                goto fail;
1629
1630        if (!new_present) {
1631                /* For 64KB splited entries, we need clear them all. */
1632                if (ops->test_64k_splited(&old_se) &&
1633                    !(index % GTT_64K_PTE_STRIDE)) {
1634                        gvt_vdbg_mm("remove splited 64K shadow entries\n");
1635                        for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
1636                                ops->clear_64k_splited(&old_se);
1637                                ops->set_pfn(&old_se,
1638                                        vgpu->gtt.scratch_pt[type].page_mfn);
1639                                ppgtt_set_shadow_entry(spt, &old_se, index + i);
1640                        }
1641                } else if (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY ||
1642                           old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
1643                        ops->clear_pse(&old_se);
1644                        ops->set_pfn(&old_se,
1645                                     vgpu->gtt.scratch_pt[type].page_mfn);
1646                        ppgtt_set_shadow_entry(spt, &old_se, index);
1647                } else {
1648                        ops->set_pfn(&old_se,
1649                                     vgpu->gtt.scratch_pt[type].page_mfn);
1650                        ppgtt_set_shadow_entry(spt, &old_se, index);
1651                }
1652        }
1653
1654        return 0;
1655fail:
1656        gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n",
1657                        spt, we->val64, we->type);
1658        return ret;
1659}
1660
1661
1662
1663static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt)
1664{
1665        return enable_out_of_sync
1666                && gtt_type_is_pte_pt(spt->guest_page.type)
1667                && spt->guest_page.write_cnt >= 2;
1668}
1669
1670static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt,
1671                unsigned long index)
1672{
1673        set_bit(index, spt->post_shadow_bitmap);
1674        if (!list_empty(&spt->post_shadow_list))
1675                return;
1676
1677        list_add_tail(&spt->post_shadow_list,
1678                        &spt->vgpu->gtt.post_shadow_list_head);
1679}
1680
1681/**
1682 * intel_vgpu_flush_post_shadow - flush the post shadow transactions
1683 * @vgpu: a vGPU
1684 *
1685 * This function is called before submitting a guest workload to host,
1686 * to flush all the post shadows for a vGPU.
1687 *
1688 * Returns:
1689 * Zero on success, negative error code if failed.
1690 */
1691int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu)
1692{
1693        struct list_head *pos, *n;
1694        struct intel_vgpu_ppgtt_spt *spt;
1695        struct intel_gvt_gtt_entry ge;
1696        unsigned long index;
1697        int ret;
1698
1699        list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) {
1700                spt = container_of(pos, struct intel_vgpu_ppgtt_spt,
1701                                post_shadow_list);
1702
1703                for_each_set_bit(index, spt->post_shadow_bitmap,
1704                                GTT_ENTRY_NUM_IN_ONE_PAGE) {
1705                        ppgtt_get_guest_entry(spt, &ge, index);
1706
1707                        ret = ppgtt_handle_guest_write_page_table(spt,
1708                                                        &ge, index);
1709                        if (ret)
1710                                return ret;
1711                        clear_bit(index, spt->post_shadow_bitmap);
1712                }
1713                list_del_init(&spt->post_shadow_list);
1714        }
1715        return 0;
1716}
1717
1718static int ppgtt_handle_guest_write_page_table_bytes(
1719                struct intel_vgpu_ppgtt_spt *spt,
1720                u64 pa, void *p_data, int bytes)
1721{
1722        struct intel_vgpu *vgpu = spt->vgpu;
1723        struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1724        const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1725        struct intel_gvt_gtt_entry we, se;
1726        unsigned long index;
1727        int ret;
1728
1729        index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift;
1730
1731        ppgtt_get_guest_entry(spt, &we, index);
1732
1733        /*
1734         * For page table which has 64K gtt entry, only PTE#0, PTE#16,
1735         * PTE#32, ... PTE#496 are used. Unused PTEs update should be
1736         * ignored.
1737         */
1738        if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY &&
1739            (index % GTT_64K_PTE_STRIDE)) {
1740                gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n",
1741                            index);
1742                return 0;
1743        }
1744
1745        if (bytes == info->gtt_entry_size) {
1746                ret = ppgtt_handle_guest_write_page_table(spt, &we, index);
1747                if (ret)
1748                        return ret;
1749        } else {
1750                if (!test_bit(index, spt->post_shadow_bitmap)) {
1751                        int type = spt->shadow_page.type;
1752
1753                        ppgtt_get_shadow_entry(spt, &se, index);
1754                        ret = ppgtt_handle_guest_entry_removal(spt, &se, index);
1755                        if (ret)
1756                                return ret;
1757                        ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
1758                        ppgtt_set_shadow_entry(spt, &se, index);
1759                }
1760                ppgtt_set_post_shadow(spt, index);
1761        }
1762
1763        if (!enable_out_of_sync)
1764                return 0;
1765
1766        spt->guest_page.write_cnt++;
1767
1768        if (spt->guest_page.oos_page)
1769                ops->set_entry(spt->guest_page.oos_page->mem, &we, index,
1770                                false, 0, vgpu);
1771
1772        if (can_do_out_of_sync(spt)) {
1773                if (!spt->guest_page.oos_page)
1774                        ppgtt_allocate_oos_page(spt);
1775
1776                ret = ppgtt_set_guest_page_oos(spt);
1777                if (ret < 0)
1778                        return ret;
1779        }
1780        return 0;
1781}
1782
1783static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
1784{
1785        struct intel_vgpu *vgpu = mm->vgpu;
1786        struct intel_gvt *gvt = vgpu->gvt;
1787        struct intel_gvt_gtt *gtt = &gvt->gtt;
1788        struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1789        struct intel_gvt_gtt_entry se;
1790        int index;
1791
1792        if (!mm->ppgtt_mm.shadowed)
1793                return;
1794
1795        for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) {
1796                ppgtt_get_shadow_root_entry(mm, &se, index);
1797
1798                if (!ops->test_present(&se))
1799                        continue;
1800
1801                ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se);
1802                se.val64 = 0;
1803                ppgtt_set_shadow_root_entry(mm, &se, index);
1804
1805                trace_spt_guest_change(vgpu->id, "destroy root pointer",
1806                                       NULL, se.type, se.val64, index);
1807        }
1808
1809        mm->ppgtt_mm.shadowed = false;
1810}
1811
1812
1813static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
1814{
1815        struct intel_vgpu *vgpu = mm->vgpu;
1816        struct intel_gvt *gvt = vgpu->gvt;
1817        struct intel_gvt_gtt *gtt = &gvt->gtt;
1818        struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1819        struct intel_vgpu_ppgtt_spt *spt;
1820        struct intel_gvt_gtt_entry ge, se;
1821        int index, ret;
1822
1823        if (mm->ppgtt_mm.shadowed)
1824                return 0;
1825
1826        mm->ppgtt_mm.shadowed = true;
1827
1828        for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
1829                ppgtt_get_guest_root_entry(mm, &ge, index);
1830
1831                if (!ops->test_present(&ge))
1832                        continue;
1833
1834                trace_spt_guest_change(vgpu->id, __func__, NULL,
1835                                       ge.type, ge.val64, index);
1836
1837                spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
1838                if (IS_ERR(spt)) {
1839                        gvt_vgpu_err("fail to populate guest root pointer\n");
1840                        ret = PTR_ERR(spt);
1841                        goto fail;
1842                }
1843                ppgtt_generate_shadow_entry(&se, spt, &ge);
1844                ppgtt_set_shadow_root_entry(mm, &se, index);
1845
1846                trace_spt_guest_change(vgpu->id, "populate root pointer",
1847                                       NULL, se.type, se.val64, index);
1848        }
1849
1850        return 0;
1851fail:
1852        invalidate_ppgtt_mm(mm);
1853        return ret;
1854}
1855
1856static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu)
1857{
1858        struct intel_vgpu_mm *mm;
1859
1860        mm = kzalloc(sizeof(*mm), GFP_KERNEL);
1861        if (!mm)
1862                return NULL;
1863
1864        mm->vgpu = vgpu;
1865        kref_init(&mm->ref);
1866        atomic_set(&mm->pincount, 0);
1867
1868        return mm;
1869}
1870
1871static void vgpu_free_mm(struct intel_vgpu_mm *mm)
1872{
1873        kfree(mm);
1874}
1875
1876/**
1877 * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU
1878 * @vgpu: a vGPU
1879 * @root_entry_type: ppgtt root entry type
1880 * @pdps: guest pdps.
1881 *
1882 * This function is used to create a ppgtt mm object for a vGPU.
1883 *
1884 * Returns:
1885 * Zero on success, negative error code in pointer if failed.
1886 */
1887struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
1888                enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
1889{
1890        struct intel_gvt *gvt = vgpu->gvt;
1891        struct intel_vgpu_mm *mm;
1892        int ret;
1893
1894        mm = vgpu_alloc_mm(vgpu);
1895        if (!mm)
1896                return ERR_PTR(-ENOMEM);
1897
1898        mm->type = INTEL_GVT_MM_PPGTT;
1899
1900        GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY &&
1901                   root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY);
1902        mm->ppgtt_mm.root_entry_type = root_entry_type;
1903
1904        INIT_LIST_HEAD(&mm->ppgtt_mm.list);
1905        INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list);
1906        INIT_LIST_HEAD(&mm->ppgtt_mm.link);
1907
1908        if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
1909                mm->ppgtt_mm.guest_pdps[0] = pdps[0];
1910        else
1911                memcpy(mm->ppgtt_mm.guest_pdps, pdps,
1912                       sizeof(mm->ppgtt_mm.guest_pdps));
1913
1914        ret = shadow_ppgtt_mm(mm);
1915        if (ret) {
1916                gvt_vgpu_err("failed to shadow ppgtt mm\n");
1917                vgpu_free_mm(mm);
1918                return ERR_PTR(ret);
1919        }
1920
1921        list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head);
1922
1923        mutex_lock(&gvt->gtt.ppgtt_mm_lock);
1924        list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head);
1925        mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
1926
1927        return mm;
1928}
1929
1930static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
1931{
1932        struct intel_vgpu_mm *mm;
1933        unsigned long nr_entries;
1934
1935        mm = vgpu_alloc_mm(vgpu);
1936        if (!mm)
1937                return ERR_PTR(-ENOMEM);
1938
1939        mm->type = INTEL_GVT_MM_GGTT;
1940
1941        nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT;
1942        mm->ggtt_mm.virtual_ggtt =
1943                vzalloc(array_size(nr_entries,
1944                                   vgpu->gvt->device_info.gtt_entry_size));
1945        if (!mm->ggtt_mm.virtual_ggtt) {
1946                vgpu_free_mm(mm);
1947                return ERR_PTR(-ENOMEM);
1948        }
1949
1950        mm->ggtt_mm.host_ggtt_aperture = vzalloc((vgpu_aperture_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64));
1951        if (!mm->ggtt_mm.host_ggtt_aperture) {
1952                vfree(mm->ggtt_mm.virtual_ggtt);
1953                vgpu_free_mm(mm);
1954                return ERR_PTR(-ENOMEM);
1955        }
1956
1957        mm->ggtt_mm.host_ggtt_hidden = vzalloc((vgpu_hidden_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64));
1958        if (!mm->ggtt_mm.host_ggtt_hidden) {
1959                vfree(mm->ggtt_mm.host_ggtt_aperture);
1960                vfree(mm->ggtt_mm.virtual_ggtt);
1961                vgpu_free_mm(mm);
1962                return ERR_PTR(-ENOMEM);
1963        }
1964
1965        return mm;
1966}
1967
1968/**
1969 * _intel_vgpu_mm_release - destroy a mm object
1970 * @mm_ref: a kref object
1971 *
1972 * This function is used to destroy a mm object for vGPU
1973 *
1974 */
1975void _intel_vgpu_mm_release(struct kref *mm_ref)
1976{
1977        struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref);
1978
1979        if (GEM_WARN_ON(atomic_read(&mm->pincount)))
1980                gvt_err("vgpu mm pin count bug detected\n");
1981
1982        if (mm->type == INTEL_GVT_MM_PPGTT) {
1983                list_del(&mm->ppgtt_mm.list);
1984
1985                mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
1986                list_del(&mm->ppgtt_mm.lru_list);
1987                mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
1988
1989                invalidate_ppgtt_mm(mm);
1990        } else {
1991                vfree(mm->ggtt_mm.virtual_ggtt);
1992                vfree(mm->ggtt_mm.host_ggtt_aperture);
1993                vfree(mm->ggtt_mm.host_ggtt_hidden);
1994        }
1995
1996        vgpu_free_mm(mm);
1997}
1998
1999/**
2000 * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object
2001 * @mm: a vGPU mm object
2002 *
2003 * This function is called when user doesn't want to use a vGPU mm object
2004 */
2005void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
2006{
2007        atomic_dec_if_positive(&mm->pincount);
2008}
2009
2010/**
2011 * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object
2012 * @mm: target vgpu mm
2013 *
2014 * This function is called when user wants to use a vGPU mm object. If this
2015 * mm object hasn't been shadowed yet, the shadow will be populated at this
2016 * time.
2017 *
2018 * Returns:
2019 * Zero on success, negative error code if failed.
2020 */
2021int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm)
2022{
2023        int ret;
2024
2025        atomic_inc(&mm->pincount);
2026
2027        if (mm->type == INTEL_GVT_MM_PPGTT) {
2028                ret = shadow_ppgtt_mm(mm);
2029                if (ret)
2030                        return ret;
2031
2032                mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
2033                list_move_tail(&mm->ppgtt_mm.lru_list,
2034                               &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head);
2035                mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
2036        }
2037
2038        return 0;
2039}
2040
2041static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt)
2042{
2043        struct intel_vgpu_mm *mm;
2044        struct list_head *pos, *n;
2045
2046        mutex_lock(&gvt->gtt.ppgtt_mm_lock);
2047
2048        list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) {
2049                mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list);
2050
2051                if (atomic_read(&mm->pincount))
2052                        continue;
2053
2054                list_del_init(&mm->ppgtt_mm.lru_list);
2055                mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
2056                invalidate_ppgtt_mm(mm);
2057                return 1;
2058        }
2059        mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
2060        return 0;
2061}
2062
2063/*
2064 * GMA translation APIs.
2065 */
2066static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm,
2067                struct intel_gvt_gtt_entry *e, unsigned long index, bool guest)
2068{
2069        struct intel_vgpu *vgpu = mm->vgpu;
2070        struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
2071        struct intel_vgpu_ppgtt_spt *s;
2072
2073        s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
2074        if (!s)
2075                return -ENXIO;
2076
2077        if (!guest)
2078                ppgtt_get_shadow_entry(s, e, index);
2079        else
2080                ppgtt_get_guest_entry(s, e, index);
2081        return 0;
2082}
2083
2084/**
2085 * intel_vgpu_gma_to_gpa - translate a gma to GPA
2086 * @mm: mm object. could be a PPGTT or GGTT mm object
2087 * @gma: graphics memory address in this mm object
2088 *
2089 * This function is used to translate a graphics memory address in specific
2090 * graphics memory space to guest physical address.
2091 *
2092 * Returns:
2093 * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed.
2094 */
2095unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
2096{
2097        struct intel_vgpu *vgpu = mm->vgpu;
2098        struct intel_gvt *gvt = vgpu->gvt;
2099        struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops;
2100        struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops;
2101        unsigned long gpa = INTEL_GVT_INVALID_ADDR;
2102        unsigned long gma_index[4];
2103        struct intel_gvt_gtt_entry e;
2104        int i, levels = 0;
2105        int ret;
2106
2107        GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT &&
2108                   mm->type != INTEL_GVT_MM_PPGTT);
2109
2110        if (mm->type == INTEL_GVT_MM_GGTT) {
2111                if (!vgpu_gmadr_is_valid(vgpu, gma))
2112                        goto err;
2113
2114                ggtt_get_guest_entry(mm, &e,
2115                        gma_ops->gma_to_ggtt_pte_index(gma));
2116
2117                gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT)
2118                        + (gma & ~I915_GTT_PAGE_MASK);
2119
2120                trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa);
2121        } else {
2122                switch (mm->ppgtt_mm.root_entry_type) {
2123                case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
2124                        ppgtt_get_shadow_root_entry(mm, &e, 0);
2125
2126                        gma_index[0] = gma_ops->gma_to_pml4_index(gma);
2127                        gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma);
2128                        gma_index[2] = gma_ops->gma_to_pde_index(gma);
2129                        gma_index[3] = gma_ops->gma_to_pte_index(gma);
2130                        levels = 4;
2131                        break;
2132                case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
2133                        ppgtt_get_shadow_root_entry(mm, &e,
2134                                        gma_ops->gma_to_l3_pdp_index(gma));
2135
2136                        gma_index[0] = gma_ops->gma_to_pde_index(gma);
2137                        gma_index[1] = gma_ops->gma_to_pte_index(gma);
2138                        levels = 2;
2139                        break;
2140                default:
2141                        GEM_BUG_ON(1);
2142                }
2143
2144                /* walk the shadow page table and get gpa from guest entry */
2145                for (i = 0; i < levels; i++) {
2146                        ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i],
2147                                (i == levels - 1));
2148                        if (ret)
2149                                goto err;
2150
2151                        if (!pte_ops->test_present(&e)) {
2152                                gvt_dbg_core("GMA 0x%lx is not present\n", gma);
2153                                goto err;
2154                        }
2155                }
2156
2157                gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) +
2158                                        (gma & ~I915_GTT_PAGE_MASK);
2159                trace_gma_translate(vgpu->id, "ppgtt", 0,
2160                                    mm->ppgtt_mm.root_entry_type, gma, gpa);
2161        }
2162
2163        return gpa;
2164err:
2165        gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
2166        return INTEL_GVT_INVALID_ADDR;
2167}
2168
2169static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu,
2170        unsigned int off, void *p_data, unsigned int bytes)
2171{
2172        struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
2173        const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2174        unsigned long index = off >> info->gtt_entry_size_shift;
2175        unsigned long gma;
2176        struct intel_gvt_gtt_entry e;
2177
2178        if (bytes != 4 && bytes != 8)
2179                return -EINVAL;
2180
2181        gma = index << I915_GTT_PAGE_SHIFT;
2182        if (!intel_gvt_ggtt_validate_range(vgpu,
2183                                           gma, 1 << I915_GTT_PAGE_SHIFT)) {
2184                gvt_dbg_mm("read invalid ggtt at 0x%lx\n", gma);
2185                memset(p_data, 0, bytes);
2186                return 0;
2187        }
2188
2189        ggtt_get_guest_entry(ggtt_mm, &e, index);
2190        memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)),
2191                        bytes);
2192        return 0;
2193}
2194
2195/**
2196 * intel_vgpu_emulate_gtt_mmio_read - emulate GTT MMIO register read
2197 * @vgpu: a vGPU
2198 * @off: register offset
2199 * @p_data: data will be returned to guest
2200 * @bytes: data length
2201 *
2202 * This function is used to emulate the GTT MMIO register read
2203 *
2204 * Returns:
2205 * Zero on success, error code if failed.
2206 */
2207int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off,
2208        void *p_data, unsigned int bytes)
2209{
2210        const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2211        int ret;
2212
2213        if (bytes != 4 && bytes != 8)
2214                return -EINVAL;
2215
2216        off -= info->gtt_start_offset;
2217        ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes);
2218        return ret;
2219}
2220
2221static void ggtt_invalidate_pte(struct intel_vgpu *vgpu,
2222                struct intel_gvt_gtt_entry *entry)
2223{
2224        struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
2225        unsigned long pfn;
2226
2227        pfn = pte_ops->get_pfn(entry);
2228        if (pfn != vgpu->gvt->gtt.scratch_mfn)
2229                intel_gvt_hypervisor_dma_unmap_guest_page(vgpu,
2230                                                pfn << PAGE_SHIFT);
2231}
2232
2233static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
2234        void *p_data, unsigned int bytes)
2235{
2236        struct intel_gvt *gvt = vgpu->gvt;
2237        const struct intel_gvt_device_info *info = &gvt->device_info;
2238        struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
2239        struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
2240        unsigned long g_gtt_index = off >> info->gtt_entry_size_shift;
2241        unsigned long gma, gfn;
2242        struct intel_gvt_gtt_entry e = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
2243        struct intel_gvt_gtt_entry m = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
2244        dma_addr_t dma_addr;
2245        int ret;
2246        struct intel_gvt_partial_pte *partial_pte, *pos, *n;
2247        bool partial_update = false;
2248
2249        if (bytes != 4 && bytes != 8)
2250                return -EINVAL;
2251
2252        gma = g_gtt_index << I915_GTT_PAGE_SHIFT;
2253
2254        /* the VM may configure the whole GM space when ballooning is used */
2255        if (!vgpu_gmadr_is_valid(vgpu, gma))
2256                return 0;
2257
2258        e.type = GTT_TYPE_GGTT_PTE;
2259        memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
2260                        bytes);
2261
2262        /* If ggtt entry size is 8 bytes, and it's split into two 4 bytes
2263         * write, save the first 4 bytes in a list and update virtual
2264         * PTE. Only update shadow PTE when the second 4 bytes comes.
2265         */
2266        if (bytes < info->gtt_entry_size) {
2267                bool found = false;
2268
2269                list_for_each_entry_safe(pos, n,
2270                                &ggtt_mm->ggtt_mm.partial_pte_list, list) {
2271                        if (g_gtt_index == pos->offset >>
2272                                        info->gtt_entry_size_shift) {
2273                                if (off != pos->offset) {
2274                                        /* the second partial part*/
2275                                        int last_off = pos->offset &
2276                                                (info->gtt_entry_size - 1);
2277
2278                                        memcpy((void *)&e.val64 + last_off,
2279                                                (void *)&pos->data + last_off,
2280                                                bytes);
2281
2282                                        list_del(&pos->list);
2283                                        kfree(pos);
2284                                        found = true;
2285                                        break;
2286                                }
2287
2288                                /* update of the first partial part */
2289                                pos->data = e.val64;
2290                                ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
2291                                return 0;
2292                        }
2293                }
2294
2295                if (!found) {
2296                        /* the first partial part */
2297                        partial_pte = kzalloc(sizeof(*partial_pte), GFP_KERNEL);
2298                        if (!partial_pte)
2299                                return -ENOMEM;
2300                        partial_pte->offset = off;
2301                        partial_pte->data = e.val64;
2302                        list_add_tail(&partial_pte->list,
2303                                &ggtt_mm->ggtt_mm.partial_pte_list);
2304                        partial_update = true;
2305                }
2306        }
2307
2308        if (!partial_update && (ops->test_present(&e))) {
2309                gfn = ops->get_pfn(&e);
2310                m.val64 = e.val64;
2311                m.type = e.type;
2312
2313                /* one PTE update may be issued in multiple writes and the
2314                 * first write may not construct a valid gfn
2315                 */
2316                if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) {
2317                        ops->set_pfn(&m, gvt->gtt.scratch_mfn);
2318                        goto out;
2319                }
2320
2321                ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn,
2322                                                        PAGE_SIZE, &dma_addr);
2323                if (ret) {
2324                        gvt_vgpu_err("fail to populate guest ggtt entry\n");
2325                        /* guest driver may read/write the entry when partial
2326                         * update the entry in this situation p2m will fail
2327                         * settting the shadow entry to point to a scratch page
2328                         */
2329                        ops->set_pfn(&m, gvt->gtt.scratch_mfn);
2330                } else
2331                        ops->set_pfn(&m, dma_addr >> PAGE_SHIFT);
2332        } else {
2333                ops->set_pfn(&m, gvt->gtt.scratch_mfn);
2334                ops->clear_present(&m);
2335        }
2336
2337out:
2338        ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
2339
2340        ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index);
2341        ggtt_invalidate_pte(vgpu, &e);
2342
2343        ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index);
2344        ggtt_invalidate(gvt->gt);
2345        return 0;
2346}
2347
2348/*
2349 * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write
2350 * @vgpu: a vGPU
2351 * @off: register offset
2352 * @p_data: data from guest write
2353 * @bytes: data length
2354 *
2355 * This function is used to emulate the GTT MMIO register write
2356 *
2357 * Returns:
2358 * Zero on success, error code if failed.
2359 */
2360int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
2361                unsigned int off, void *p_data, unsigned int bytes)
2362{
2363        const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2364        int ret;
2365        struct intel_vgpu_submission *s = &vgpu->submission;
2366        struct intel_engine_cs *engine;
2367        int i;
2368
2369        if (bytes != 4 && bytes != 8)
2370                return -EINVAL;
2371
2372        off -= info->gtt_start_offset;
2373        ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes);
2374
2375        /* if ggtt of last submitted context is written,
2376         * that context is probably got unpinned.
2377         * Set last shadowed ctx to invalid.
2378         */
2379        for_each_engine(engine, vgpu->gvt->gt, i) {
2380                if (!s->last_ctx[i].valid)
2381                        continue;
2382
2383                if (s->last_ctx[i].lrca == (off >> info->gtt_entry_size_shift))
2384                        s->last_ctx[i].valid = false;
2385        }
2386        return ret;
2387}
2388
2389static int alloc_scratch_pages(struct intel_vgpu *vgpu,
2390                enum intel_gvt_gtt_type type)
2391{
2392        struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
2393        struct intel_vgpu_gtt *gtt = &vgpu->gtt;
2394        struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
2395        int page_entry_num = I915_GTT_PAGE_SIZE >>
2396                                vgpu->gvt->device_info.gtt_entry_size_shift;
2397        void *scratch_pt;
2398        int i;
2399        struct device *dev = vgpu->gvt->gt->i915->drm.dev;
2400        dma_addr_t daddr;
2401
2402        if (drm_WARN_ON(&i915->drm,
2403                        type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX))
2404                return -EINVAL;
2405
2406        scratch_pt = (void *)get_zeroed_page(GFP_KERNEL);
2407        if (!scratch_pt) {
2408                gvt_vgpu_err("fail to allocate scratch page\n");
2409                return -ENOMEM;
2410        }
2411
2412        daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0,
2413                        4096, PCI_DMA_BIDIRECTIONAL);
2414        if (dma_mapping_error(dev, daddr)) {
2415                gvt_vgpu_err("fail to dmamap scratch_pt\n");
2416                __free_page(virt_to_page(scratch_pt));
2417                return -ENOMEM;
2418        }
2419        gtt->scratch_pt[type].page_mfn =
2420                (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
2421        gtt->scratch_pt[type].page = virt_to_page(scratch_pt);
2422        gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n",
2423                        vgpu->id, type, gtt->scratch_pt[type].page_mfn);
2424
2425        /* Build the tree by full filled the scratch pt with the entries which
2426         * point to the next level scratch pt or scratch page. The
2427         * scratch_pt[type] indicate the scratch pt/scratch page used by the
2428         * 'type' pt.
2429         * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by
2430         * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self
2431         * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn.
2432         */
2433        if (type > GTT_TYPE_PPGTT_PTE_PT) {
2434                struct intel_gvt_gtt_entry se;
2435
2436                memset(&se, 0, sizeof(struct intel_gvt_gtt_entry));
2437                se.type = get_entry_type(type - 1);
2438                ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn);
2439
2440                /* The entry parameters like present/writeable/cache type
2441                 * set to the same as i915's scratch page tree.
2442                 */
2443                se.val64 |= _PAGE_PRESENT | _PAGE_RW;
2444                if (type == GTT_TYPE_PPGTT_PDE_PT)
2445                        se.val64 |= PPAT_CACHED;
2446
2447                for (i = 0; i < page_entry_num; i++)
2448                        ops->set_entry(scratch_pt, &se, i, false, 0, vgpu);
2449        }
2450
2451        return 0;
2452}
2453
2454static int release_scratch_page_tree(struct intel_vgpu *vgpu)
2455{
2456        int i;
2457        struct device *dev = vgpu->gvt->gt->i915->drm.dev;
2458        dma_addr_t daddr;
2459
2460        for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
2461                if (vgpu->gtt.scratch_pt[i].page != NULL) {
2462                        daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn <<
2463                                        I915_GTT_PAGE_SHIFT);
2464                        dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
2465                        __free_page(vgpu->gtt.scratch_pt[i].page);
2466                        vgpu->gtt.scratch_pt[i].page = NULL;
2467                        vgpu->gtt.scratch_pt[i].page_mfn = 0;
2468                }
2469        }
2470
2471        return 0;
2472}
2473
2474static int create_scratch_page_tree(struct intel_vgpu *vgpu)
2475{
2476        int i, ret;
2477
2478        for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
2479                ret = alloc_scratch_pages(vgpu, i);
2480                if (ret)
2481                        goto err;
2482        }
2483
2484        return 0;
2485
2486err:
2487        release_scratch_page_tree(vgpu);
2488        return ret;
2489}
2490
2491/**
2492 * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization
2493 * @vgpu: a vGPU
2494 *
2495 * This function is used to initialize per-vGPU graphics memory virtualization
2496 * components.
2497 *
2498 * Returns:
2499 * Zero on success, error code if failed.
2500 */
2501int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
2502{
2503        struct intel_vgpu_gtt *gtt = &vgpu->gtt;
2504
2505        INIT_RADIX_TREE(&gtt->spt_tree, GFP_KERNEL);
2506
2507        INIT_LIST_HEAD(&gtt->ppgtt_mm_list_head);
2508        INIT_LIST_HEAD(&gtt->oos_page_list_head);
2509        INIT_LIST_HEAD(&gtt->post_shadow_list_head);
2510
2511        gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu);
2512        if (IS_ERR(gtt->ggtt_mm)) {
2513                gvt_vgpu_err("fail to create mm for ggtt.\n");
2514                return PTR_ERR(gtt->ggtt_mm);
2515        }
2516
2517        intel_vgpu_reset_ggtt(vgpu, false);
2518
2519        INIT_LIST_HEAD(&gtt->ggtt_mm->ggtt_mm.partial_pte_list);
2520
2521        return create_scratch_page_tree(vgpu);
2522}
2523
2524void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
2525{
2526        struct list_head *pos, *n;
2527        struct intel_vgpu_mm *mm;
2528
2529        list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
2530                mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2531                intel_vgpu_destroy_mm(mm);
2532        }
2533
2534        if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head)))
2535                gvt_err("vgpu ppgtt mm is not fully destroyed\n");
2536
2537        if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) {
2538                gvt_err("Why we still has spt not freed?\n");
2539                ppgtt_free_all_spt(vgpu);
2540        }
2541}
2542
2543static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
2544{
2545        struct intel_gvt_partial_pte *pos, *next;
2546
2547        list_for_each_entry_safe(pos, next,
2548                                 &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list,
2549                                 list) {
2550                gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n",
2551                        pos->offset, pos->data);
2552                kfree(pos);
2553        }
2554        intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm);
2555        vgpu->gtt.ggtt_mm = NULL;
2556}
2557
2558/**
2559 * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization
2560 * @vgpu: a vGPU
2561 *
2562 * This function is used to clean up per-vGPU graphics memory virtualization
2563 * components.
2564 *
2565 * Returns:
2566 * Zero on success, error code if failed.
2567 */
2568void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
2569{
2570        intel_vgpu_destroy_all_ppgtt_mm(vgpu);
2571        intel_vgpu_destroy_ggtt_mm(vgpu);
2572        release_scratch_page_tree(vgpu);
2573}
2574
2575static void clean_spt_oos(struct intel_gvt *gvt)
2576{
2577        struct intel_gvt_gtt *gtt = &gvt->gtt;
2578        struct list_head *pos, *n;
2579        struct intel_vgpu_oos_page *oos_page;
2580
2581        WARN(!list_empty(&gtt->oos_page_use_list_head),
2582                "someone is still using oos page\n");
2583
2584        list_for_each_safe(pos, n, &gtt->oos_page_free_list_head) {
2585                oos_page = container_of(pos, struct intel_vgpu_oos_page, list);
2586                list_del(&oos_page->list);
2587                free_page((unsigned long)oos_page->mem);
2588                kfree(oos_page);
2589        }
2590}
2591
2592static int setup_spt_oos(struct intel_gvt *gvt)
2593{
2594        struct intel_gvt_gtt *gtt = &gvt->gtt;
2595        struct intel_vgpu_oos_page *oos_page;
2596        int i;
2597        int ret;
2598
2599        INIT_LIST_HEAD(&gtt->oos_page_free_list_head);
2600        INIT_LIST_HEAD(&gtt->oos_page_use_list_head);
2601
2602        for (i = 0; i < preallocated_oos_pages; i++) {
2603                oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL);
2604                if (!oos_page) {
2605                        ret = -ENOMEM;
2606                        goto fail;
2607                }
2608                oos_page->mem = (void *)__get_free_pages(GFP_KERNEL, 0);
2609                if (!oos_page->mem) {
2610                        ret = -ENOMEM;
2611                        kfree(oos_page);
2612                        goto fail;
2613                }
2614
2615                INIT_LIST_HEAD(&oos_page->list);
2616                INIT_LIST_HEAD(&oos_page->vm_list);
2617                oos_page->id = i;
2618                list_add_tail(&oos_page->list, &gtt->oos_page_free_list_head);
2619        }
2620
2621        gvt_dbg_mm("%d oos pages preallocated\n", i);
2622
2623        return 0;
2624fail:
2625        clean_spt_oos(gvt);
2626        return ret;
2627}
2628
2629/**
2630 * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object
2631 * @vgpu: a vGPU
2632 * @pdps: pdp root array
2633 *
2634 * This function is used to find a PPGTT mm object from mm object pool
2635 *
2636 * Returns:
2637 * pointer to mm object on success, NULL if failed.
2638 */
2639struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
2640                u64 pdps[])
2641{
2642        struct intel_vgpu_mm *mm;
2643        struct list_head *pos;
2644
2645        list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) {
2646                mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2647
2648                switch (mm->ppgtt_mm.root_entry_type) {
2649                case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
2650                        if (pdps[0] == mm->ppgtt_mm.guest_pdps[0])
2651                                return mm;
2652                        break;
2653                case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
2654                        if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps,
2655                                    sizeof(mm->ppgtt_mm.guest_pdps)))
2656                                return mm;
2657                        break;
2658                default:
2659                        GEM_BUG_ON(1);
2660                }
2661        }
2662        return NULL;
2663}
2664
2665/**
2666 * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object.
2667 * @vgpu: a vGPU
2668 * @root_entry_type: ppgtt root entry type
2669 * @pdps: guest pdps
2670 *
2671 * This function is used to find or create a PPGTT mm object from a guest.
2672 *
2673 * Returns:
2674 * Zero on success, negative error code if failed.
2675 */
2676struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
2677                enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
2678{
2679        struct intel_vgpu_mm *mm;
2680
2681        mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
2682        if (mm) {
2683                intel_vgpu_mm_get(mm);
2684        } else {
2685                mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps);
2686                if (IS_ERR(mm))
2687                        gvt_vgpu_err("fail to create mm\n");
2688        }
2689        return mm;
2690}
2691
2692/**
2693 * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object.
2694 * @vgpu: a vGPU
2695 * @pdps: guest pdps
2696 *
2697 * This function is used to find a PPGTT mm object from a guest and destroy it.
2698 *
2699 * Returns:
2700 * Zero on success, negative error code if failed.
2701 */
2702int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[])
2703{
2704        struct intel_vgpu_mm *mm;
2705
2706        mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
2707        if (!mm) {
2708                gvt_vgpu_err("fail to find ppgtt instance.\n");
2709                return -EINVAL;
2710        }
2711        intel_vgpu_mm_put(mm);
2712        return 0;
2713}
2714
2715/**
2716 * intel_gvt_init_gtt - initialize mm components of a GVT device
2717 * @gvt: GVT device
2718 *
2719 * This function is called at the initialization stage, to initialize
2720 * the mm components of a GVT device.
2721 *
2722 * Returns:
2723 * zero on success, negative error code if failed.
2724 */
2725int intel_gvt_init_gtt(struct intel_gvt *gvt)
2726{
2727        int ret;
2728        void *page;
2729        struct device *dev = gvt->gt->i915->drm.dev;
2730        dma_addr_t daddr;
2731
2732        gvt_dbg_core("init gtt\n");
2733
2734        gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
2735        gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
2736
2737        page = (void *)get_zeroed_page(GFP_KERNEL);
2738        if (!page) {
2739                gvt_err("fail to allocate scratch ggtt page\n");
2740                return -ENOMEM;
2741        }
2742
2743        daddr = dma_map_page(dev, virt_to_page(page), 0,
2744                        4096, PCI_DMA_BIDIRECTIONAL);
2745        if (dma_mapping_error(dev, daddr)) {
2746                gvt_err("fail to dmamap scratch ggtt page\n");
2747                __free_page(virt_to_page(page));
2748                return -ENOMEM;
2749        }
2750
2751        gvt->gtt.scratch_page = virt_to_page(page);
2752        gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
2753
2754        if (enable_out_of_sync) {
2755                ret = setup_spt_oos(gvt);
2756                if (ret) {
2757                        gvt_err("fail to initialize SPT oos\n");
2758                        dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
2759                        __free_page(gvt->gtt.scratch_page);
2760                        return ret;
2761                }
2762        }
2763        INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head);
2764        mutex_init(&gvt->gtt.ppgtt_mm_lock);
2765        return 0;
2766}
2767
2768/**
2769 * intel_gvt_clean_gtt - clean up mm components of a GVT device
2770 * @gvt: GVT device
2771 *
2772 * This function is called at the driver unloading stage, to clean up the
2773 * the mm components of a GVT device.
2774 *
2775 */
2776void intel_gvt_clean_gtt(struct intel_gvt *gvt)
2777{
2778        struct device *dev = gvt->gt->i915->drm.dev;
2779        dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn <<
2780                                        I915_GTT_PAGE_SHIFT);
2781
2782        dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
2783
2784        __free_page(gvt->gtt.scratch_page);
2785
2786        if (enable_out_of_sync)
2787                clean_spt_oos(gvt);
2788}
2789
2790/**
2791 * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances
2792 * @vgpu: a vGPU
2793 *
2794 * This function is called when invalidate all PPGTT instances of a vGPU.
2795 *
2796 */
2797void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu)
2798{
2799        struct list_head *pos, *n;
2800        struct intel_vgpu_mm *mm;
2801
2802        list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
2803                mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2804                if (mm->type == INTEL_GVT_MM_PPGTT) {
2805                        mutex_lock(&vgpu->gvt->gtt.ppgtt_mm_lock);
2806                        list_del_init(&mm->ppgtt_mm.lru_list);
2807                        mutex_unlock(&vgpu->gvt->gtt.ppgtt_mm_lock);
2808                        if (mm->ppgtt_mm.shadowed)
2809                                invalidate_ppgtt_mm(mm);
2810                }
2811        }
2812}
2813
2814/**
2815 * intel_vgpu_reset_ggtt - reset the GGTT entry
2816 * @vgpu: a vGPU
2817 * @invalidate_old: invalidate old entries
2818 *
2819 * This function is called at the vGPU create stage
2820 * to reset all the GGTT entries.
2821 *
2822 */
2823void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old)
2824{
2825        struct intel_gvt *gvt = vgpu->gvt;
2826        struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
2827        struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE};
2828        struct intel_gvt_gtt_entry old_entry;
2829        u32 index;
2830        u32 num_entries;
2831
2832        pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn);
2833        pte_ops->set_present(&entry);
2834
2835        index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
2836        num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
2837        while (num_entries--) {
2838                if (invalidate_old) {
2839                        ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
2840                        ggtt_invalidate_pte(vgpu, &old_entry);
2841                }
2842                ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
2843        }
2844
2845        index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
2846        num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
2847        while (num_entries--) {
2848                if (invalidate_old) {
2849                        ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
2850                        ggtt_invalidate_pte(vgpu, &old_entry);
2851                }
2852                ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
2853        }
2854
2855        ggtt_invalidate(gvt->gt);
2856}
2857
2858/**
2859 * intel_vgpu_reset_gtt - reset the all GTT related status
2860 * @vgpu: a vGPU
2861 *
2862 * This function is called from vfio core to reset reset all
2863 * GTT related status, including GGTT, PPGTT, scratch page.
2864 *
2865 */
2866void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu)
2867{
2868        /* Shadow pages are only created when there is no page
2869         * table tracking data, so remove page tracking data after
2870         * removing the shadow pages.
2871         */
2872        intel_vgpu_destroy_all_ppgtt_mm(vgpu);
2873        intel_vgpu_reset_ggtt(vgpu, true);
2874}
2875
2876/**
2877 * intel_gvt_restore_ggtt - restore all vGPU's ggtt entries
2878 * @gvt: intel gvt device
2879 *
2880 * This function is called at driver resume stage to restore
2881 * GGTT entries of every vGPU.
2882 *
2883 */
2884void intel_gvt_restore_ggtt(struct intel_gvt *gvt)
2885{
2886        struct intel_vgpu *vgpu;
2887        struct intel_vgpu_mm *mm;
2888        int id;
2889        gen8_pte_t pte;
2890        u32 idx, num_low, num_hi, offset;
2891
2892        /* Restore dirty host ggtt for all vGPUs */
2893        idr_for_each_entry(&(gvt)->vgpu_idr, vgpu, id) {
2894                mm = vgpu->gtt.ggtt_mm;
2895
2896                num_low = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
2897                offset = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
2898                for (idx = 0; idx < num_low; idx++) {
2899                        pte = mm->ggtt_mm.host_ggtt_aperture[idx];
2900                        if (pte & _PAGE_PRESENT)
2901                                write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte);
2902                }
2903
2904                num_hi = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
2905                offset = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
2906                for (idx = 0; idx < num_hi; idx++) {
2907                        pte = mm->ggtt_mm.host_ggtt_hidden[idx];
2908                        if (pte & _PAGE_PRESENT)
2909                                write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte);
2910                }
2911        }
2912}
2913