linux/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
<<
>>
Prefs
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2020 Intel Corporation
   4 */
   5
   6#include <linux/log2.h>
   7
   8#include "gem/i915_gem_lmem.h"
   9
  10#include "gen8_ppgtt.h"
  11#include "i915_scatterlist.h"
  12#include "i915_trace.h"
  13#include "i915_pvinfo.h"
  14#include "i915_vgpu.h"
  15#include "intel_gt.h"
  16#include "intel_gtt.h"
  17
  18static u64 gen8_pde_encode(const dma_addr_t addr,
  19                           const enum i915_cache_level level)
  20{
  21        u64 pde = addr | _PAGE_PRESENT | _PAGE_RW;
  22
  23        if (level != I915_CACHE_NONE)
  24                pde |= PPAT_CACHED_PDE;
  25        else
  26                pde |= PPAT_UNCACHED;
  27
  28        return pde;
  29}
  30
  31static u64 gen8_pte_encode(dma_addr_t addr,
  32                           enum i915_cache_level level,
  33                           u32 flags)
  34{
  35        gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
  36
  37        if (unlikely(flags & PTE_READ_ONLY))
  38                pte &= ~_PAGE_RW;
  39
  40        if (flags & PTE_LM)
  41                pte |= GEN12_PPGTT_PTE_LM;
  42
  43        switch (level) {
  44        case I915_CACHE_NONE:
  45                pte |= PPAT_UNCACHED;
  46                break;
  47        case I915_CACHE_WT:
  48                pte |= PPAT_DISPLAY_ELLC;
  49                break;
  50        default:
  51                pte |= PPAT_CACHED;
  52                break;
  53        }
  54
  55        return pte;
  56}
  57
  58static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
  59{
  60        struct drm_i915_private *i915 = ppgtt->vm.i915;
  61        struct intel_uncore *uncore = ppgtt->vm.gt->uncore;
  62        enum vgt_g2v_type msg;
  63        int i;
  64
  65        if (create)
  66                atomic_inc(px_used(ppgtt->pd)); /* never remove */
  67        else
  68                atomic_dec(px_used(ppgtt->pd));
  69
  70        mutex_lock(&i915->vgpu.lock);
  71
  72        if (i915_vm_is_4lvl(&ppgtt->vm)) {
  73                const u64 daddr = px_dma(ppgtt->pd);
  74
  75                intel_uncore_write(uncore,
  76                                   vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
  77                intel_uncore_write(uncore,
  78                                   vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
  79
  80                msg = create ?
  81                        VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
  82                        VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY;
  83        } else {
  84                for (i = 0; i < GEN8_3LVL_PDPES; i++) {
  85                        const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
  86
  87                        intel_uncore_write(uncore,
  88                                           vgtif_reg(pdp[i].lo),
  89                                           lower_32_bits(daddr));
  90                        intel_uncore_write(uncore,
  91                                           vgtif_reg(pdp[i].hi),
  92                                           upper_32_bits(daddr));
  93                }
  94
  95                msg = create ?
  96                        VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
  97                        VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY;
  98        }
  99
 100        /* g2v_notify atomically (via hv trap) consumes the message packet. */
 101        intel_uncore_write(uncore, vgtif_reg(g2v_notify), msg);
 102
 103        mutex_unlock(&i915->vgpu.lock);
 104}
 105
 106/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
 107#define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */
 108#define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE))
 109#define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64))
 110#define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES))
 111#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl))
 112#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl))
 113#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl))
 114
 115#define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt)
 116
 117static unsigned int
 118gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx)
 119{
 120        const int shift = gen8_pd_shift(lvl);
 121        const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
 122
 123        GEM_BUG_ON(start >= end);
 124        end += ~mask >> gen8_pd_shift(1);
 125
 126        *idx = i915_pde_index(start, shift);
 127        if ((start ^ end) & mask)
 128                return GEN8_PDES - *idx;
 129        else
 130                return i915_pde_index(end, shift) - *idx;
 131}
 132
 133static bool gen8_pd_contains(u64 start, u64 end, int lvl)
 134{
 135        const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
 136
 137        GEM_BUG_ON(start >= end);
 138        return (start ^ end) & mask && (start & ~mask) == 0;
 139}
 140
 141static unsigned int gen8_pt_count(u64 start, u64 end)
 142{
 143        GEM_BUG_ON(start >= end);
 144        if ((start ^ end) >> gen8_pd_shift(1))
 145                return GEN8_PDES - (start & (GEN8_PDES - 1));
 146        else
 147                return end - start;
 148}
 149
 150static unsigned int gen8_pd_top_count(const struct i915_address_space *vm)
 151{
 152        unsigned int shift = __gen8_pte_shift(vm->top);
 153
 154        return (vm->total + (1ull << shift) - 1) >> shift;
 155}
 156
 157static struct i915_page_directory *
 158gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx)
 159{
 160        struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
 161
 162        if (vm->top == 2)
 163                return ppgtt->pd;
 164        else
 165                return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top));
 166}
 167
 168static struct i915_page_directory *
 169gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr)
 170{
 171        return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT);
 172}
 173
 174static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
 175                                 struct i915_page_directory *pd,
 176                                 int count, int lvl)
 177{
 178        if (lvl) {
 179                void **pde = pd->entry;
 180
 181                do {
 182                        if (!*pde)
 183                                continue;
 184
 185                        __gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1);
 186                } while (pde++, --count);
 187        }
 188
 189        free_px(vm, &pd->pt, lvl);
 190}
 191
 192static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 193{
 194        struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 195
 196        if (intel_vgpu_active(vm->i915))
 197                gen8_ppgtt_notify_vgt(ppgtt, false);
 198
 199        __gen8_ppgtt_cleanup(vm, ppgtt->pd, gen8_pd_top_count(vm), vm->top);
 200        free_scratch(vm);
 201}
 202
 203static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
 204                              struct i915_page_directory * const pd,
 205                              u64 start, const u64 end, int lvl)
 206{
 207        const struct drm_i915_gem_object * const scratch = vm->scratch[lvl];
 208        unsigned int idx, len;
 209
 210        GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
 211
 212        len = gen8_pd_range(start, end, lvl--, &idx);
 213        DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
 214            __func__, vm, lvl + 1, start, end,
 215            idx, len, atomic_read(px_used(pd)));
 216        GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
 217
 218        do {
 219                struct i915_page_table *pt = pd->entry[idx];
 220
 221                if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
 222                    gen8_pd_contains(start, end, lvl)) {
 223                        DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
 224                            __func__, vm, lvl + 1, idx, start, end);
 225                        clear_pd_entry(pd, idx, scratch);
 226                        __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
 227                        start += (u64)I915_PDES << gen8_pd_shift(lvl);
 228                        continue;
 229                }
 230
 231                if (lvl) {
 232                        start = __gen8_ppgtt_clear(vm, as_pd(pt),
 233                                                   start, end, lvl);
 234                } else {
 235                        unsigned int count;
 236                        u64 *vaddr;
 237
 238                        count = gen8_pt_count(start, end);
 239                        DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n",
 240                            __func__, vm, lvl, start, end,
 241                            gen8_pd_index(start, 0), count,
 242                            atomic_read(&pt->used));
 243                        GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
 244
 245                        vaddr = kmap_atomic_px(pt);
 246                        memset64(vaddr + gen8_pd_index(start, 0),
 247                                 vm->scratch[0]->encode,
 248                                 count);
 249                        kunmap_atomic(vaddr);
 250
 251                        atomic_sub(count, &pt->used);
 252                        start += count;
 253                }
 254
 255                if (release_pd_entry(pd, idx, pt, scratch))
 256                        free_px(vm, pt, lvl);
 257        } while (idx++, --len);
 258
 259        return start;
 260}
 261
 262static void gen8_ppgtt_clear(struct i915_address_space *vm,
 263                             u64 start, u64 length)
 264{
 265        GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
 266        GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
 267        GEM_BUG_ON(range_overflows(start, length, vm->total));
 268
 269        start >>= GEN8_PTE_SHIFT;
 270        length >>= GEN8_PTE_SHIFT;
 271        GEM_BUG_ON(length == 0);
 272
 273        __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
 274                           start, start + length, vm->top);
 275}
 276
 277static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
 278                               struct i915_vm_pt_stash *stash,
 279                               struct i915_page_directory * const pd,
 280                               u64 * const start, const u64 end, int lvl)
 281{
 282        unsigned int idx, len;
 283
 284        GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
 285
 286        len = gen8_pd_range(*start, end, lvl--, &idx);
 287        DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
 288            __func__, vm, lvl + 1, *start, end,
 289            idx, len, atomic_read(px_used(pd)));
 290        GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1));
 291
 292        spin_lock(&pd->lock);
 293        GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */
 294        do {
 295                struct i915_page_table *pt = pd->entry[idx];
 296
 297                if (!pt) {
 298                        spin_unlock(&pd->lock);
 299
 300                        DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
 301                            __func__, vm, lvl + 1, idx);
 302
 303                        pt = stash->pt[!!lvl];
 304                        __i915_gem_object_pin_pages(pt->base);
 305                        i915_gem_object_make_unshrinkable(pt->base);
 306
 307                        if (lvl ||
 308                            gen8_pt_count(*start, end) < I915_PDES ||
 309                            intel_vgpu_active(vm->i915))
 310                                fill_px(pt, vm->scratch[lvl]->encode);
 311
 312                        spin_lock(&pd->lock);
 313                        if (likely(!pd->entry[idx])) {
 314                                stash->pt[!!lvl] = pt->stash;
 315                                atomic_set(&pt->used, 0);
 316                                set_pd_entry(pd, idx, pt);
 317                        } else {
 318                                pt = pd->entry[idx];
 319                        }
 320                }
 321
 322                if (lvl) {
 323                        atomic_inc(&pt->used);
 324                        spin_unlock(&pd->lock);
 325
 326                        __gen8_ppgtt_alloc(vm, stash,
 327                                           as_pd(pt), start, end, lvl);
 328
 329                        spin_lock(&pd->lock);
 330                        atomic_dec(&pt->used);
 331                        GEM_BUG_ON(!atomic_read(&pt->used));
 332                } else {
 333                        unsigned int count = gen8_pt_count(*start, end);
 334
 335                        DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n",
 336                            __func__, vm, lvl, *start, end,
 337                            gen8_pd_index(*start, 0), count,
 338                            atomic_read(&pt->used));
 339
 340                        atomic_add(count, &pt->used);
 341                        /* All other pdes may be simultaneously removed */
 342                        GEM_BUG_ON(atomic_read(&pt->used) > NALLOC * I915_PDES);
 343                        *start += count;
 344                }
 345        } while (idx++, --len);
 346        spin_unlock(&pd->lock);
 347}
 348
 349static void gen8_ppgtt_alloc(struct i915_address_space *vm,
 350                             struct i915_vm_pt_stash *stash,
 351                             u64 start, u64 length)
 352{
 353        GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
 354        GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
 355        GEM_BUG_ON(range_overflows(start, length, vm->total));
 356
 357        start >>= GEN8_PTE_SHIFT;
 358        length >>= GEN8_PTE_SHIFT;
 359        GEM_BUG_ON(length == 0);
 360
 361        __gen8_ppgtt_alloc(vm, stash, i915_vm_to_ppgtt(vm)->pd,
 362                           &start, start + length, vm->top);
 363}
 364
 365static __always_inline u64
 366gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
 367                      struct i915_page_directory *pdp,
 368                      struct sgt_dma *iter,
 369                      u64 idx,
 370                      enum i915_cache_level cache_level,
 371                      u32 flags)
 372{
 373        struct i915_page_directory *pd;
 374        const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
 375        gen8_pte_t *vaddr;
 376
 377        pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
 378        vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
 379        do {
 380                GEM_BUG_ON(sg_dma_len(iter->sg) < I915_GTT_PAGE_SIZE);
 381                vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
 382
 383                iter->dma += I915_GTT_PAGE_SIZE;
 384                if (iter->dma >= iter->max) {
 385                        iter->sg = __sg_next(iter->sg);
 386                        if (!iter->sg || sg_dma_len(iter->sg) == 0) {
 387                                idx = 0;
 388                                break;
 389                        }
 390
 391                        iter->dma = sg_dma_address(iter->sg);
 392                        iter->max = iter->dma + sg_dma_len(iter->sg);
 393                }
 394
 395                if (gen8_pd_index(++idx, 0) == 0) {
 396                        if (gen8_pd_index(idx, 1) == 0) {
 397                                /* Limited by sg length for 3lvl */
 398                                if (gen8_pd_index(idx, 2) == 0)
 399                                        break;
 400
 401                                pd = pdp->entry[gen8_pd_index(idx, 2)];
 402                        }
 403
 404                        clflush_cache_range(vaddr, PAGE_SIZE);
 405                        kunmap_atomic(vaddr);
 406                        vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
 407                }
 408        } while (1);
 409        clflush_cache_range(vaddr, PAGE_SIZE);
 410        kunmap_atomic(vaddr);
 411
 412        return idx;
 413}
 414
 415static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
 416                                   struct sgt_dma *iter,
 417                                   enum i915_cache_level cache_level,
 418                                   u32 flags)
 419{
 420        const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
 421        unsigned int rem = sg_dma_len(iter->sg);
 422        u64 start = vma->node.start;
 423
 424        GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm));
 425
 426        do {
 427                struct i915_page_directory * const pdp =
 428                        gen8_pdp_for_page_address(vma->vm, start);
 429                struct i915_page_directory * const pd =
 430                        i915_pd_entry(pdp, __gen8_pte_index(start, 2));
 431                gen8_pte_t encode = pte_encode;
 432                unsigned int maybe_64K = -1;
 433                unsigned int page_size;
 434                gen8_pte_t *vaddr;
 435                u16 index;
 436
 437                if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
 438                    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
 439                    rem >= I915_GTT_PAGE_SIZE_2M &&
 440                    !__gen8_pte_index(start, 0)) {
 441                        index = __gen8_pte_index(start, 1);
 442                        encode |= GEN8_PDE_PS_2M;
 443                        page_size = I915_GTT_PAGE_SIZE_2M;
 444
 445                        vaddr = kmap_atomic_px(pd);
 446                } else {
 447                        struct i915_page_table *pt =
 448                                i915_pt_entry(pd, __gen8_pte_index(start, 1));
 449
 450                        index = __gen8_pte_index(start, 0);
 451                        page_size = I915_GTT_PAGE_SIZE;
 452
 453                        if (!index &&
 454                            vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
 455                            IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
 456                            (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
 457                             rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
 458                                maybe_64K = __gen8_pte_index(start, 1);
 459
 460                        vaddr = kmap_atomic_px(pt);
 461                }
 462
 463                do {
 464                        GEM_BUG_ON(sg_dma_len(iter->sg) < page_size);
 465                        vaddr[index++] = encode | iter->dma;
 466
 467                        start += page_size;
 468                        iter->dma += page_size;
 469                        rem -= page_size;
 470                        if (iter->dma >= iter->max) {
 471                                iter->sg = __sg_next(iter->sg);
 472                                if (!iter->sg)
 473                                        break;
 474
 475                                rem = sg_dma_len(iter->sg);
 476                                if (!rem)
 477                                        break;
 478
 479                                iter->dma = sg_dma_address(iter->sg);
 480                                iter->max = iter->dma + rem;
 481
 482                                if (maybe_64K != -1 && index < I915_PDES &&
 483                                    !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
 484                                      (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
 485                                       rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)))
 486                                        maybe_64K = -1;
 487
 488                                if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
 489                                        break;
 490                        }
 491                } while (rem >= page_size && index < I915_PDES);
 492
 493                clflush_cache_range(vaddr, PAGE_SIZE);
 494                kunmap_atomic(vaddr);
 495
 496                /*
 497                 * Is it safe to mark the 2M block as 64K? -- Either we have
 498                 * filled whole page-table with 64K entries, or filled part of
 499                 * it and have reached the end of the sg table and we have
 500                 * enough padding.
 501                 */
 502                if (maybe_64K != -1 &&
 503                    (index == I915_PDES ||
 504                     (i915_vm_has_scratch_64K(vma->vm) &&
 505                      !iter->sg && IS_ALIGNED(vma->node.start +
 506                                              vma->node.size,
 507                                              I915_GTT_PAGE_SIZE_2M)))) {
 508                        vaddr = kmap_atomic_px(pd);
 509                        vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
 510                        kunmap_atomic(vaddr);
 511                        page_size = I915_GTT_PAGE_SIZE_64K;
 512
 513                        /*
 514                         * We write all 4K page entries, even when using 64K
 515                         * pages. In order to verify that the HW isn't cheating
 516                         * by using the 4K PTE instead of the 64K PTE, we want
 517                         * to remove all the surplus entries. If the HW skipped
 518                         * the 64K PTE, it will read/write into the scratch page
 519                         * instead - which we detect as missing results during
 520                         * selftests.
 521                         */
 522                        if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
 523                                u16 i;
 524
 525                                encode = vma->vm->scratch[0]->encode;
 526                                vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K));
 527
 528                                for (i = 1; i < index; i += 16)
 529                                        memset64(vaddr + i, encode, 15);
 530
 531                                kunmap_atomic(vaddr);
 532                        }
 533                }
 534
 535                vma->page_sizes.gtt |= page_size;
 536        } while (iter->sg && sg_dma_len(iter->sg));
 537}
 538
 539static void gen8_ppgtt_insert(struct i915_address_space *vm,
 540                              struct i915_vma *vma,
 541                              enum i915_cache_level cache_level,
 542                              u32 flags)
 543{
 544        struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
 545        struct sgt_dma iter = sgt_dma(vma);
 546
 547        if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
 548                gen8_ppgtt_insert_huge(vma, &iter, cache_level, flags);
 549        } else  {
 550                u64 idx = vma->node.start >> GEN8_PTE_SHIFT;
 551
 552                do {
 553                        struct i915_page_directory * const pdp =
 554                                gen8_pdp_for_page_index(vm, idx);
 555
 556                        idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx,
 557                                                    cache_level, flags);
 558                } while (idx);
 559
 560                vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
 561        }
 562}
 563
 564static int gen8_init_scratch(struct i915_address_space *vm)
 565{
 566        u32 pte_flags;
 567        int ret;
 568        int i;
 569
 570        /*
 571         * If everybody agrees to not to write into the scratch page,
 572         * we can reuse it for all vm, keeping contexts and processes separate.
 573         */
 574        if (vm->has_read_only && vm->gt->vm && !i915_is_ggtt(vm->gt->vm)) {
 575                struct i915_address_space *clone = vm->gt->vm;
 576
 577                GEM_BUG_ON(!clone->has_read_only);
 578
 579                vm->scratch_order = clone->scratch_order;
 580                for (i = 0; i <= vm->top; i++)
 581                        vm->scratch[i] = i915_gem_object_get(clone->scratch[i]);
 582
 583                return 0;
 584        }
 585
 586        ret = setup_scratch_page(vm);
 587        if (ret)
 588                return ret;
 589
 590        pte_flags = vm->has_read_only;
 591        if (i915_gem_object_is_lmem(vm->scratch[0]))
 592                pte_flags |= PTE_LM;
 593
 594        vm->scratch[0]->encode =
 595                gen8_pte_encode(px_dma(vm->scratch[0]),
 596                                I915_CACHE_LLC, pte_flags);
 597
 598        for (i = 1; i <= vm->top; i++) {
 599                struct drm_i915_gem_object *obj;
 600
 601                obj = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
 602                if (IS_ERR(obj))
 603                        goto free_scratch;
 604
 605                ret = pin_pt_dma(vm, obj);
 606                if (ret) {
 607                        i915_gem_object_put(obj);
 608                        goto free_scratch;
 609                }
 610
 611                fill_px(obj, vm->scratch[i - 1]->encode);
 612                obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_LLC);
 613
 614                vm->scratch[i] = obj;
 615        }
 616
 617        return 0;
 618
 619free_scratch:
 620        while (i--)
 621                i915_gem_object_put(vm->scratch[i]);
 622        return -ENOMEM;
 623}
 624
 625static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
 626{
 627        struct i915_address_space *vm = &ppgtt->vm;
 628        struct i915_page_directory *pd = ppgtt->pd;
 629        unsigned int idx;
 630
 631        GEM_BUG_ON(vm->top != 2);
 632        GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES);
 633
 634        for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
 635                struct i915_page_directory *pde;
 636                int err;
 637
 638                pde = alloc_pd(vm);
 639                if (IS_ERR(pde))
 640                        return PTR_ERR(pde);
 641
 642                err = pin_pt_dma(vm, pde->pt.base);
 643                if (err) {
 644                        free_pd(vm, pde);
 645                        return err;
 646                }
 647
 648                fill_px(pde, vm->scratch[1]->encode);
 649                set_pd_entry(pd, idx, pde);
 650                atomic_inc(px_used(pde)); /* keep pinned */
 651        }
 652        wmb();
 653
 654        return 0;
 655}
 656
 657static struct i915_page_directory *
 658gen8_alloc_top_pd(struct i915_address_space *vm)
 659{
 660        const unsigned int count = gen8_pd_top_count(vm);
 661        struct i915_page_directory *pd;
 662        int err;
 663
 664        GEM_BUG_ON(count > I915_PDES);
 665
 666        pd = __alloc_pd(count);
 667        if (unlikely(!pd))
 668                return ERR_PTR(-ENOMEM);
 669
 670        pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
 671        if (IS_ERR(pd->pt.base)) {
 672                err = PTR_ERR(pd->pt.base);
 673                pd->pt.base = NULL;
 674                goto err_pd;
 675        }
 676
 677        err = pin_pt_dma(vm, pd->pt.base);
 678        if (err)
 679                goto err_pd;
 680
 681        fill_page_dma(px_base(pd), vm->scratch[vm->top]->encode, count);
 682        atomic_inc(px_used(pd)); /* mark as pinned */
 683        return pd;
 684
 685err_pd:
 686        free_pd(vm, pd);
 687        return ERR_PTR(err);
 688}
 689
 690/*
 691 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
 692 * with a net effect resembling a 2-level page table in normal x86 terms. Each
 693 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
 694 * space.
 695 *
 696 */
 697struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
 698{
 699        struct i915_ppgtt *ppgtt;
 700        int err;
 701
 702        ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
 703        if (!ppgtt)
 704                return ERR_PTR(-ENOMEM);
 705
 706        ppgtt_init(ppgtt, gt);
 707        ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
 708        ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t));
 709
 710        /*
 711         * From bdw, there is hw support for read-only pages in the PPGTT.
 712         *
 713         * Gen11 has HSDES#:1807136187 unresolved. Disable ro support
 714         * for now.
 715         *
 716         * Gen12 has inherited the same read-only fault issue from gen11.
 717         */
 718        ppgtt->vm.has_read_only = !IS_GEN_RANGE(gt->i915, 11, 12);
 719
 720        ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
 721
 722        err = gen8_init_scratch(&ppgtt->vm);
 723        if (err)
 724                goto err_free;
 725
 726        ppgtt->pd = gen8_alloc_top_pd(&ppgtt->vm);
 727        if (IS_ERR(ppgtt->pd)) {
 728                err = PTR_ERR(ppgtt->pd);
 729                goto err_free_scratch;
 730        }
 731
 732        if (!i915_vm_is_4lvl(&ppgtt->vm)) {
 733                err = gen8_preallocate_top_level_pdp(ppgtt);
 734                if (err)
 735                        goto err_free_pd;
 736        }
 737
 738        ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
 739        ppgtt->vm.insert_entries = gen8_ppgtt_insert;
 740        ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
 741        ppgtt->vm.clear_range = gen8_ppgtt_clear;
 742
 743        ppgtt->vm.pte_encode = gen8_pte_encode;
 744
 745        if (intel_vgpu_active(gt->i915))
 746                gen8_ppgtt_notify_vgt(ppgtt, true);
 747
 748        ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
 749
 750        return ppgtt;
 751
 752err_free_pd:
 753        __gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd,
 754                             gen8_pd_top_count(&ppgtt->vm), ppgtt->vm.top);
 755err_free_scratch:
 756        free_scratch(&ppgtt->vm);
 757err_free:
 758        kfree(ppgtt);
 759        return ERR_PTR(err);
 760}
 761