linux/drivers/gpu/drm/i915/gt/intel_ggtt.c
<<
>>
Prefs
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2020 Intel Corporation
   4 */
   5
   6#include <linux/stop_machine.h>
   7
   8#include <asm/set_memory.h>
   9#include <asm/smp.h>
  10
  11#include <drm/i915_drm.h>
  12
  13#include "gem/i915_gem_lmem.h"
  14
  15#include "intel_gt.h"
  16#include "i915_drv.h"
  17#include "i915_scatterlist.h"
  18#include "i915_vgpu.h"
  19
  20#include "intel_gtt.h"
  21#include "gen8_ppgtt.h"
  22
  23static int
  24i915_get_ggtt_vma_pages(struct i915_vma *vma);
  25
  26static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
  27                                   unsigned long color,
  28                                   u64 *start,
  29                                   u64 *end)
  30{
  31        if (i915_node_color_differs(node, color))
  32                *start += I915_GTT_PAGE_SIZE;
  33
  34        /*
  35         * Also leave a space between the unallocated reserved node after the
  36         * GTT and any objects within the GTT, i.e. we use the color adjustment
  37         * to insert a guard page to prevent prefetches crossing over the
  38         * GTT boundary.
  39         */
  40        node = list_next_entry(node, node_list);
  41        if (node->color != color)
  42                *end -= I915_GTT_PAGE_SIZE;
  43}
  44
  45static int ggtt_init_hw(struct i915_ggtt *ggtt)
  46{
  47        struct drm_i915_private *i915 = ggtt->vm.i915;
  48
  49        i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
  50
  51        ggtt->vm.is_ggtt = true;
  52
  53        /* Only VLV supports read-only GGTT mappings */
  54        ggtt->vm.has_read_only = IS_VALLEYVIEW(i915);
  55
  56        if (!HAS_LLC(i915) && !HAS_PPGTT(i915))
  57                ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust;
  58
  59        if (ggtt->mappable_end) {
  60                if (!io_mapping_init_wc(&ggtt->iomap,
  61                                        ggtt->gmadr.start,
  62                                        ggtt->mappable_end)) {
  63                        ggtt->vm.cleanup(&ggtt->vm);
  64                        return -EIO;
  65                }
  66
  67                ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start,
  68                                              ggtt->mappable_end);
  69        }
  70
  71        intel_ggtt_init_fences(ggtt);
  72
  73        return 0;
  74}
  75
  76/**
  77 * i915_ggtt_init_hw - Initialize GGTT hardware
  78 * @i915: i915 device
  79 */
  80int i915_ggtt_init_hw(struct drm_i915_private *i915)
  81{
  82        int ret;
  83
  84        /*
  85         * Note that we use page colouring to enforce a guard page at the
  86         * end of the address space. This is required as the CS may prefetch
  87         * beyond the end of the batch buffer, across the page boundary,
  88         * and beyond the end of the GTT if we do not provide a guard.
  89         */
  90        ret = ggtt_init_hw(&i915->ggtt);
  91        if (ret)
  92                return ret;
  93
  94        return 0;
  95}
  96
  97/*
  98 * Certain Gen5 chipsets require idling the GPU before
  99 * unmapping anything from the GTT when VT-d is enabled.
 100 */
 101static bool needs_idle_maps(struct drm_i915_private *i915)
 102{
 103        /*
 104         * Query intel_iommu to see if we need the workaround. Presumably that
 105         * was loaded first.
 106         */
 107        if (!intel_vtd_active())
 108                return false;
 109
 110        if (GRAPHICS_VER(i915) == 5 && IS_MOBILE(i915))
 111                return true;
 112
 113        if (GRAPHICS_VER(i915) == 12)
 114                return true; /* XXX DMAR fault reason 7 */
 115
 116        return false;
 117}
 118
 119void i915_ggtt_suspend(struct i915_ggtt *ggtt)
 120{
 121        struct i915_vma *vma, *vn;
 122        int open;
 123
 124        mutex_lock(&ggtt->vm.mutex);
 125
 126        /* Skip rewriting PTE on VMA unbind. */
 127        open = atomic_xchg(&ggtt->vm.open, 0);
 128
 129        list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
 130                GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 131                i915_vma_wait_for_bind(vma);
 132
 133                if (i915_vma_is_pinned(vma))
 134                        continue;
 135
 136                if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
 137                        __i915_vma_evict(vma);
 138                        drm_mm_remove_node(&vma->node);
 139                }
 140        }
 141
 142        ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
 143        ggtt->invalidate(ggtt);
 144        atomic_set(&ggtt->vm.open, open);
 145
 146        mutex_unlock(&ggtt->vm.mutex);
 147
 148        intel_gt_check_and_clear_faults(ggtt->vm.gt);
 149}
 150
 151void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
 152{
 153        struct intel_uncore *uncore = ggtt->vm.gt->uncore;
 154
 155        spin_lock_irq(&uncore->lock);
 156        intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 157        intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6);
 158        spin_unlock_irq(&uncore->lock);
 159}
 160
 161static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
 162{
 163        struct intel_uncore *uncore = ggtt->vm.gt->uncore;
 164
 165        /*
 166         * Note that as an uncached mmio write, this will flush the
 167         * WCB of the writes into the GGTT before it triggers the invalidate.
 168         */
 169        intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 170}
 171
 172static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
 173{
 174        struct intel_uncore *uncore = ggtt->vm.gt->uncore;
 175        struct drm_i915_private *i915 = ggtt->vm.i915;
 176
 177        gen8_ggtt_invalidate(ggtt);
 178
 179        if (GRAPHICS_VER(i915) >= 12)
 180                intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
 181                                      GEN12_GUC_TLB_INV_CR_INVALIDATE);
 182        else
 183                intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
 184}
 185
 186static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
 187{
 188        intel_gtt_chipset_flush();
 189}
 190
 191u64 gen8_ggtt_pte_encode(dma_addr_t addr,
 192                         enum i915_cache_level level,
 193                         u32 flags)
 194{
 195        gen8_pte_t pte = addr | _PAGE_PRESENT;
 196
 197        if (flags & PTE_LM)
 198                pte |= GEN12_GGTT_PTE_LM;
 199
 200        return pte;
 201}
 202
 203static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 204{
 205        writeq(pte, addr);
 206}
 207
 208static void gen8_ggtt_insert_page(struct i915_address_space *vm,
 209                                  dma_addr_t addr,
 210                                  u64 offset,
 211                                  enum i915_cache_level level,
 212                                  u32 flags)
 213{
 214        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 215        gen8_pte_t __iomem *pte =
 216                (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
 217
 218        gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
 219
 220        ggtt->invalidate(ggtt);
 221}
 222
 223static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 224                                     struct i915_vma *vma,
 225                                     enum i915_cache_level level,
 226                                     u32 flags)
 227{
 228        const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
 229        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 230        gen8_pte_t __iomem *gte;
 231        gen8_pte_t __iomem *end;
 232        struct sgt_iter iter;
 233        dma_addr_t addr;
 234
 235        /*
 236         * Note that we ignore PTE_READ_ONLY here. The caller must be careful
 237         * not to allow the user to override access to a read only page.
 238         */
 239
 240        gte = (gen8_pte_t __iomem *)ggtt->gsm;
 241        gte += vma->node.start / I915_GTT_PAGE_SIZE;
 242        end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
 243
 244        for_each_sgt_daddr(addr, iter, vma->pages)
 245                gen8_set_pte(gte++, pte_encode | addr);
 246        GEM_BUG_ON(gte > end);
 247
 248        /* Fill the allocated but "unused" space beyond the end of the buffer */
 249        while (gte < end)
 250                gen8_set_pte(gte++, vm->scratch[0]->encode);
 251
 252        /*
 253         * We want to flush the TLBs only after we're certain all the PTE
 254         * updates have finished.
 255         */
 256        ggtt->invalidate(ggtt);
 257}
 258
 259static void gen6_ggtt_insert_page(struct i915_address_space *vm,
 260                                  dma_addr_t addr,
 261                                  u64 offset,
 262                                  enum i915_cache_level level,
 263                                  u32 flags)
 264{
 265        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 266        gen6_pte_t __iomem *pte =
 267                (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
 268
 269        iowrite32(vm->pte_encode(addr, level, flags), pte);
 270
 271        ggtt->invalidate(ggtt);
 272}
 273
 274/*
 275 * Binds an object into the global gtt with the specified cache level.
 276 * The object will be accessible to the GPU via commands whose operands
 277 * reference offsets within the global GTT as well as accessible by the GPU
 278 * through the GMADR mapped BAR (i915->mm.gtt->gtt).
 279 */
 280static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 281                                     struct i915_vma *vma,
 282                                     enum i915_cache_level level,
 283                                     u32 flags)
 284{
 285        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 286        gen6_pte_t __iomem *gte;
 287        gen6_pte_t __iomem *end;
 288        struct sgt_iter iter;
 289        dma_addr_t addr;
 290
 291        gte = (gen6_pte_t __iomem *)ggtt->gsm;
 292        gte += vma->node.start / I915_GTT_PAGE_SIZE;
 293        end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
 294
 295        for_each_sgt_daddr(addr, iter, vma->pages)
 296                iowrite32(vm->pte_encode(addr, level, flags), gte++);
 297        GEM_BUG_ON(gte > end);
 298
 299        /* Fill the allocated but "unused" space beyond the end of the buffer */
 300        while (gte < end)
 301                iowrite32(vm->scratch[0]->encode, gte++);
 302
 303        /*
 304         * We want to flush the TLBs only after we're certain all the PTE
 305         * updates have finished.
 306         */
 307        ggtt->invalidate(ggtt);
 308}
 309
 310static void nop_clear_range(struct i915_address_space *vm,
 311                            u64 start, u64 length)
 312{
 313}
 314
 315static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 316                                  u64 start, u64 length)
 317{
 318        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 319        unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
 320        unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
 321        const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
 322        gen8_pte_t __iomem *gtt_base =
 323                (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
 324        const int max_entries = ggtt_total_entries(ggtt) - first_entry;
 325        int i;
 326
 327        if (WARN(num_entries > max_entries,
 328                 "First entry = %d; Num entries = %d (max=%d)\n",
 329                 first_entry, num_entries, max_entries))
 330                num_entries = max_entries;
 331
 332        for (i = 0; i < num_entries; i++)
 333                gen8_set_pte(&gtt_base[i], scratch_pte);
 334}
 335
 336static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
 337{
 338        /*
 339         * Make sure the internal GAM fifo has been cleared of all GTT
 340         * writes before exiting stop_machine(). This guarantees that
 341         * any aperture accesses waiting to start in another process
 342         * cannot back up behind the GTT writes causing a hang.
 343         * The register can be any arbitrary GAM register.
 344         */
 345        intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6);
 346}
 347
 348struct insert_page {
 349        struct i915_address_space *vm;
 350        dma_addr_t addr;
 351        u64 offset;
 352        enum i915_cache_level level;
 353};
 354
 355static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
 356{
 357        struct insert_page *arg = _arg;
 358
 359        gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
 360        bxt_vtd_ggtt_wa(arg->vm);
 361
 362        return 0;
 363}
 364
 365static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
 366                                          dma_addr_t addr,
 367                                          u64 offset,
 368                                          enum i915_cache_level level,
 369                                          u32 unused)
 370{
 371        struct insert_page arg = { vm, addr, offset, level };
 372
 373        stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
 374}
 375
 376struct insert_entries {
 377        struct i915_address_space *vm;
 378        struct i915_vma *vma;
 379        enum i915_cache_level level;
 380        u32 flags;
 381};
 382
 383static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
 384{
 385        struct insert_entries *arg = _arg;
 386
 387        gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags);
 388        bxt_vtd_ggtt_wa(arg->vm);
 389
 390        return 0;
 391}
 392
 393static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
 394                                             struct i915_vma *vma,
 395                                             enum i915_cache_level level,
 396                                             u32 flags)
 397{
 398        struct insert_entries arg = { vm, vma, level, flags };
 399
 400        stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
 401}
 402
 403static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 404                                  u64 start, u64 length)
 405{
 406        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 407        unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
 408        unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
 409        gen6_pte_t scratch_pte, __iomem *gtt_base =
 410                (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
 411        const int max_entries = ggtt_total_entries(ggtt) - first_entry;
 412        int i;
 413
 414        if (WARN(num_entries > max_entries,
 415                 "First entry = %d; Num entries = %d (max=%d)\n",
 416                 first_entry, num_entries, max_entries))
 417                num_entries = max_entries;
 418
 419        scratch_pte = vm->scratch[0]->encode;
 420        for (i = 0; i < num_entries; i++)
 421                iowrite32(scratch_pte, &gtt_base[i]);
 422}
 423
 424static void i915_ggtt_insert_page(struct i915_address_space *vm,
 425                                  dma_addr_t addr,
 426                                  u64 offset,
 427                                  enum i915_cache_level cache_level,
 428                                  u32 unused)
 429{
 430        unsigned int flags = (cache_level == I915_CACHE_NONE) ?
 431                AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
 432
 433        intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
 434}
 435
 436static void i915_ggtt_insert_entries(struct i915_address_space *vm,
 437                                     struct i915_vma *vma,
 438                                     enum i915_cache_level cache_level,
 439                                     u32 unused)
 440{
 441        unsigned int flags = (cache_level == I915_CACHE_NONE) ?
 442                AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
 443
 444        intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
 445                                    flags);
 446}
 447
 448static void i915_ggtt_clear_range(struct i915_address_space *vm,
 449                                  u64 start, u64 length)
 450{
 451        intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
 452}
 453
 454static void ggtt_bind_vma(struct i915_address_space *vm,
 455                          struct i915_vm_pt_stash *stash,
 456                          struct i915_vma *vma,
 457                          enum i915_cache_level cache_level,
 458                          u32 flags)
 459{
 460        struct drm_i915_gem_object *obj = vma->obj;
 461        u32 pte_flags;
 462
 463        if (i915_vma_is_bound(vma, ~flags & I915_VMA_BIND_MASK))
 464                return;
 465
 466        /* Applicable to VLV (gen8+ do not support RO in the GGTT) */
 467        pte_flags = 0;
 468        if (i915_gem_object_is_readonly(obj))
 469                pte_flags |= PTE_READ_ONLY;
 470        if (i915_gem_object_is_lmem(obj))
 471                pte_flags |= PTE_LM;
 472
 473        vm->insert_entries(vm, vma, cache_level, pte_flags);
 474        vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
 475}
 476
 477static void ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
 478{
 479        vm->clear_range(vm, vma->node.start, vma->size);
 480}
 481
 482static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
 483{
 484        u64 size;
 485        int ret;
 486
 487        if (!intel_uc_uses_guc(&ggtt->vm.gt->uc))
 488                return 0;
 489
 490        GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
 491        size = ggtt->vm.total - GUC_GGTT_TOP;
 492
 493        ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size,
 494                                   GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
 495                                   PIN_NOEVICT);
 496        if (ret)
 497                drm_dbg(&ggtt->vm.i915->drm,
 498                        "Failed to reserve top of GGTT for GuC\n");
 499
 500        return ret;
 501}
 502
 503static void ggtt_release_guc_top(struct i915_ggtt *ggtt)
 504{
 505        if (drm_mm_node_allocated(&ggtt->uc_fw))
 506                drm_mm_remove_node(&ggtt->uc_fw);
 507}
 508
 509static void cleanup_init_ggtt(struct i915_ggtt *ggtt)
 510{
 511        ggtt_release_guc_top(ggtt);
 512        if (drm_mm_node_allocated(&ggtt->error_capture))
 513                drm_mm_remove_node(&ggtt->error_capture);
 514        mutex_destroy(&ggtt->error_mutex);
 515}
 516
 517static int init_ggtt(struct i915_ggtt *ggtt)
 518{
 519        /*
 520         * Let GEM Manage all of the aperture.
 521         *
 522         * However, leave one page at the end still bound to the scratch page.
 523         * There are a number of places where the hardware apparently prefetches
 524         * past the end of the object, and we've seen multiple hangs with the
 525         * GPU head pointer stuck in a batchbuffer bound at the last page of the
 526         * aperture.  One page should be enough to keep any prefetching inside
 527         * of the aperture.
 528         */
 529        unsigned long hole_start, hole_end;
 530        struct drm_mm_node *entry;
 531        int ret;
 532
 533        /*
 534         * GuC requires all resources that we're sharing with it to be placed in
 535         * non-WOPCM memory. If GuC is not present or not in use we still need a
 536         * small bias as ring wraparound at offset 0 sometimes hangs. No idea
 537         * why.
 538         */
 539        ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE,
 540                               intel_wopcm_guc_size(&ggtt->vm.i915->wopcm));
 541
 542        ret = intel_vgt_balloon(ggtt);
 543        if (ret)
 544                return ret;
 545
 546        mutex_init(&ggtt->error_mutex);
 547        if (ggtt->mappable_end) {
 548                /*
 549                 * Reserve a mappable slot for our lockless error capture.
 550                 *
 551                 * We strongly prefer taking address 0x0 in order to protect
 552                 * other critical buffers against accidental overwrites,
 553                 * as writing to address 0 is a very common mistake.
 554                 *
 555                 * Since 0 may already be in use by the system (e.g. the BIOS
 556                 * framebuffer), we let the reservation fail quietly and hope
 557                 * 0 remains reserved always.
 558                 *
 559                 * If we fail to reserve 0, and then fail to find any space
 560                 * for an error-capture, remain silent. We can afford not
 561                 * to reserve an error_capture node as we have fallback
 562                 * paths, and we trust that 0 will remain reserved. However,
 563                 * the only likely reason for failure to insert is a driver
 564                 * bug, which we expect to cause other failures...
 565                 */
 566                ggtt->error_capture.size = I915_GTT_PAGE_SIZE;
 567                ggtt->error_capture.color = I915_COLOR_UNEVICTABLE;
 568                if (drm_mm_reserve_node(&ggtt->vm.mm, &ggtt->error_capture))
 569                        drm_mm_insert_node_in_range(&ggtt->vm.mm,
 570                                                    &ggtt->error_capture,
 571                                                    ggtt->error_capture.size, 0,
 572                                                    ggtt->error_capture.color,
 573                                                    0, ggtt->mappable_end,
 574                                                    DRM_MM_INSERT_LOW);
 575        }
 576        if (drm_mm_node_allocated(&ggtt->error_capture))
 577                drm_dbg(&ggtt->vm.i915->drm,
 578                        "Reserved GGTT:[%llx, %llx] for use by error capture\n",
 579                        ggtt->error_capture.start,
 580                        ggtt->error_capture.start + ggtt->error_capture.size);
 581
 582        /*
 583         * The upper portion of the GuC address space has a sizeable hole
 584         * (several MB) that is inaccessible by GuC. Reserve this range within
 585         * GGTT as it can comfortably hold GuC/HuC firmware images.
 586         */
 587        ret = ggtt_reserve_guc_top(ggtt);
 588        if (ret)
 589                goto err;
 590
 591        /* Clear any non-preallocated blocks */
 592        drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
 593                drm_dbg(&ggtt->vm.i915->drm,
 594                        "clearing unused GTT space: [%lx, %lx]\n",
 595                        hole_start, hole_end);
 596                ggtt->vm.clear_range(&ggtt->vm, hole_start,
 597                                     hole_end - hole_start);
 598        }
 599
 600        /* And finally clear the reserved guard page */
 601        ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
 602
 603        return 0;
 604
 605err:
 606        cleanup_init_ggtt(ggtt);
 607        return ret;
 608}
 609
 610static void aliasing_gtt_bind_vma(struct i915_address_space *vm,
 611                                  struct i915_vm_pt_stash *stash,
 612                                  struct i915_vma *vma,
 613                                  enum i915_cache_level cache_level,
 614                                  u32 flags)
 615{
 616        u32 pte_flags;
 617
 618        /* Currently applicable only to VLV */
 619        pte_flags = 0;
 620        if (i915_gem_object_is_readonly(vma->obj))
 621                pte_flags |= PTE_READ_ONLY;
 622
 623        if (flags & I915_VMA_LOCAL_BIND)
 624                ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm,
 625                               stash, vma, cache_level, flags);
 626
 627        if (flags & I915_VMA_GLOBAL_BIND)
 628                vm->insert_entries(vm, vma, cache_level, pte_flags);
 629}
 630
 631static void aliasing_gtt_unbind_vma(struct i915_address_space *vm,
 632                                    struct i915_vma *vma)
 633{
 634        if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
 635                vm->clear_range(vm, vma->node.start, vma->size);
 636
 637        if (i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND))
 638                ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma);
 639}
 640
 641static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
 642{
 643        struct i915_vm_pt_stash stash = {};
 644        struct i915_ppgtt *ppgtt;
 645        int err;
 646
 647        ppgtt = i915_ppgtt_create(ggtt->vm.gt);
 648        if (IS_ERR(ppgtt))
 649                return PTR_ERR(ppgtt);
 650
 651        if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) {
 652                err = -ENODEV;
 653                goto err_ppgtt;
 654        }
 655
 656        err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, ggtt->vm.total);
 657        if (err)
 658                goto err_ppgtt;
 659
 660        i915_gem_object_lock(ppgtt->vm.scratch[0], NULL);
 661        err = i915_vm_map_pt_stash(&ppgtt->vm, &stash);
 662        i915_gem_object_unlock(ppgtt->vm.scratch[0]);
 663        if (err)
 664                goto err_stash;
 665
 666        /*
 667         * Note we only pre-allocate as far as the end of the global
 668         * GTT. On 48b / 4-level page-tables, the difference is very,
 669         * very significant! We have to preallocate as GVT/vgpu does
 670         * not like the page directory disappearing.
 671         */
 672        ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total);
 673
 674        ggtt->alias = ppgtt;
 675        ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
 676
 677        GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
 678        ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
 679
 680        GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
 681        ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
 682
 683        i915_vm_free_pt_stash(&ppgtt->vm, &stash);
 684        return 0;
 685
 686err_stash:
 687        i915_vm_free_pt_stash(&ppgtt->vm, &stash);
 688err_ppgtt:
 689        i915_vm_put(&ppgtt->vm);
 690        return err;
 691}
 692
 693static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
 694{
 695        struct i915_ppgtt *ppgtt;
 696
 697        ppgtt = fetch_and_zero(&ggtt->alias);
 698        if (!ppgtt)
 699                return;
 700
 701        i915_vm_put(&ppgtt->vm);
 702
 703        ggtt->vm.vma_ops.bind_vma   = ggtt_bind_vma;
 704        ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
 705}
 706
 707int i915_init_ggtt(struct drm_i915_private *i915)
 708{
 709        int ret;
 710
 711        ret = init_ggtt(&i915->ggtt);
 712        if (ret)
 713                return ret;
 714
 715        if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
 716                ret = init_aliasing_ppgtt(&i915->ggtt);
 717                if (ret)
 718                        cleanup_init_ggtt(&i915->ggtt);
 719        }
 720
 721        return 0;
 722}
 723
 724static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
 725{
 726        struct i915_vma *vma, *vn;
 727
 728        atomic_set(&ggtt->vm.open, 0);
 729
 730        rcu_barrier(); /* flush the RCU'ed__i915_vm_release */
 731        flush_workqueue(ggtt->vm.i915->wq);
 732
 733        mutex_lock(&ggtt->vm.mutex);
 734
 735        list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
 736                WARN_ON(__i915_vma_unbind(vma));
 737
 738        if (drm_mm_node_allocated(&ggtt->error_capture))
 739                drm_mm_remove_node(&ggtt->error_capture);
 740        mutex_destroy(&ggtt->error_mutex);
 741
 742        ggtt_release_guc_top(ggtt);
 743        intel_vgt_deballoon(ggtt);
 744
 745        ggtt->vm.cleanup(&ggtt->vm);
 746
 747        mutex_unlock(&ggtt->vm.mutex);
 748        i915_address_space_fini(&ggtt->vm);
 749
 750        arch_phys_wc_del(ggtt->mtrr);
 751
 752        if (ggtt->iomap.size)
 753                io_mapping_fini(&ggtt->iomap);
 754}
 755
 756/**
 757 * i915_ggtt_driver_release - Clean up GGTT hardware initialization
 758 * @i915: i915 device
 759 */
 760void i915_ggtt_driver_release(struct drm_i915_private *i915)
 761{
 762        struct i915_ggtt *ggtt = &i915->ggtt;
 763
 764        fini_aliasing_ppgtt(ggtt);
 765
 766        intel_ggtt_fini_fences(ggtt);
 767        ggtt_cleanup_hw(ggtt);
 768}
 769
 770/**
 771 * i915_ggtt_driver_late_release - Cleanup of GGTT that needs to be done after
 772 * all free objects have been drained.
 773 * @i915: i915 device
 774 */
 775void i915_ggtt_driver_late_release(struct drm_i915_private *i915)
 776{
 777        struct i915_ggtt *ggtt = &i915->ggtt;
 778
 779        GEM_WARN_ON(kref_read(&ggtt->vm.resv_ref) != 1);
 780        dma_resv_fini(&ggtt->vm._resv);
 781}
 782
 783static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
 784{
 785        snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
 786        snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
 787        return snb_gmch_ctl << 20;
 788}
 789
 790static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
 791{
 792        bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
 793        bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
 794        if (bdw_gmch_ctl)
 795                bdw_gmch_ctl = 1 << bdw_gmch_ctl;
 796
 797#ifdef CONFIG_X86_32
 798        /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
 799        if (bdw_gmch_ctl > 4)
 800                bdw_gmch_ctl = 4;
 801#endif
 802
 803        return bdw_gmch_ctl << 20;
 804}
 805
 806static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
 807{
 808        gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
 809        gmch_ctrl &= SNB_GMCH_GGMS_MASK;
 810
 811        if (gmch_ctrl)
 812                return 1 << (20 + gmch_ctrl);
 813
 814        return 0;
 815}
 816
 817static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
 818{
 819        struct drm_i915_private *i915 = ggtt->vm.i915;
 820        struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
 821        phys_addr_t phys_addr;
 822        u32 pte_flags;
 823        int ret;
 824
 825        /* For Modern GENs the PTEs and register space are split in the BAR */
 826        phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
 827
 828        /*
 829         * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
 830         * will be dropped. For WC mappings in general we have 64 byte burst
 831         * writes when the WC buffer is flushed, so we can't use it, but have to
 832         * resort to an uncached mapping. The WC issue is easily caught by the
 833         * readback check when writing GTT PTE entries.
 834         */
 835        if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11)
 836                ggtt->gsm = ioremap(phys_addr, size);
 837        else
 838                ggtt->gsm = ioremap_wc(phys_addr, size);
 839        if (!ggtt->gsm) {
 840                drm_err(&i915->drm, "Failed to map the ggtt page table\n");
 841                return -ENOMEM;
 842        }
 843
 844        kref_init(&ggtt->vm.resv_ref);
 845        ret = setup_scratch_page(&ggtt->vm);
 846        if (ret) {
 847                drm_err(&i915->drm, "Scratch setup failed\n");
 848                /* iounmap will also get called at remove, but meh */
 849                iounmap(ggtt->gsm);
 850                return ret;
 851        }
 852
 853        pte_flags = 0;
 854        if (i915_gem_object_is_lmem(ggtt->vm.scratch[0]))
 855                pte_flags |= PTE_LM;
 856
 857        ggtt->vm.scratch[0]->encode =
 858                ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
 859                                    I915_CACHE_NONE, pte_flags);
 860
 861        return 0;
 862}
 863
 864int ggtt_set_pages(struct i915_vma *vma)
 865{
 866        int ret;
 867
 868        GEM_BUG_ON(vma->pages);
 869
 870        ret = i915_get_ggtt_vma_pages(vma);
 871        if (ret)
 872                return ret;
 873
 874        vma->page_sizes = vma->obj->mm.page_sizes;
 875
 876        return 0;
 877}
 878
 879static void gen6_gmch_remove(struct i915_address_space *vm)
 880{
 881        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 882
 883        iounmap(ggtt->gsm);
 884        free_scratch(vm);
 885}
 886
 887static struct resource pci_resource(struct pci_dev *pdev, int bar)
 888{
 889        return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar),
 890                                               pci_resource_len(pdev, bar));
 891}
 892
 893static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 894{
 895        struct drm_i915_private *i915 = ggtt->vm.i915;
 896        struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
 897        unsigned int size;
 898        u16 snb_gmch_ctl;
 899
 900        /* TODO: We're not aware of mappable constraints on gen8 yet */
 901        if (!HAS_LMEM(i915)) {
 902                ggtt->gmadr = pci_resource(pdev, 2);
 903                ggtt->mappable_end = resource_size(&ggtt->gmadr);
 904        }
 905
 906        pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
 907        if (IS_CHERRYVIEW(i915))
 908                size = chv_get_total_gtt_size(snb_gmch_ctl);
 909        else
 910                size = gen8_get_total_gtt_size(snb_gmch_ctl);
 911
 912        ggtt->vm.alloc_pt_dma = alloc_pt_dma;
 913
 914        ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
 915        ggtt->vm.cleanup = gen6_gmch_remove;
 916        ggtt->vm.insert_page = gen8_ggtt_insert_page;
 917        ggtt->vm.clear_range = nop_clear_range;
 918        if (intel_scanout_needs_vtd_wa(i915))
 919                ggtt->vm.clear_range = gen8_ggtt_clear_range;
 920
 921        ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
 922
 923        /*
 924         * Serialize GTT updates with aperture access on BXT if VT-d is on,
 925         * and always on CHV.
 926         */
 927        if (intel_vm_no_concurrent_access_wa(i915)) {
 928                ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
 929                ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
 930                ggtt->vm.bind_async_flags =
 931                        I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
 932        }
 933
 934        ggtt->invalidate = gen8_ggtt_invalidate;
 935
 936        ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
 937        ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
 938        ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
 939        ggtt->vm.vma_ops.clear_pages = clear_pages;
 940
 941        ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
 942
 943        setup_private_pat(ggtt->vm.gt->uncore);
 944
 945        return ggtt_probe_common(ggtt, size);
 946}
 947
 948static u64 snb_pte_encode(dma_addr_t addr,
 949                          enum i915_cache_level level,
 950                          u32 flags)
 951{
 952        gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
 953
 954        switch (level) {
 955        case I915_CACHE_L3_LLC:
 956        case I915_CACHE_LLC:
 957                pte |= GEN6_PTE_CACHE_LLC;
 958                break;
 959        case I915_CACHE_NONE:
 960                pte |= GEN6_PTE_UNCACHED;
 961                break;
 962        default:
 963                MISSING_CASE(level);
 964        }
 965
 966        return pte;
 967}
 968
 969static u64 ivb_pte_encode(dma_addr_t addr,
 970                          enum i915_cache_level level,
 971                          u32 flags)
 972{
 973        gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
 974
 975        switch (level) {
 976        case I915_CACHE_L3_LLC:
 977                pte |= GEN7_PTE_CACHE_L3_LLC;
 978                break;
 979        case I915_CACHE_LLC:
 980                pte |= GEN6_PTE_CACHE_LLC;
 981                break;
 982        case I915_CACHE_NONE:
 983                pte |= GEN6_PTE_UNCACHED;
 984                break;
 985        default:
 986                MISSING_CASE(level);
 987        }
 988
 989        return pte;
 990}
 991
 992static u64 byt_pte_encode(dma_addr_t addr,
 993                          enum i915_cache_level level,
 994                          u32 flags)
 995{
 996        gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
 997
 998        if (!(flags & PTE_READ_ONLY))
 999                pte |= BYT_PTE_WRITEABLE;
1000
1001        if (level != I915_CACHE_NONE)
1002                pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
1003
1004        return pte;
1005}
1006
1007static u64 hsw_pte_encode(dma_addr_t addr,
1008                          enum i915_cache_level level,
1009                          u32 flags)
1010{
1011        gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
1012
1013        if (level != I915_CACHE_NONE)
1014                pte |= HSW_WB_LLC_AGE3;
1015
1016        return pte;
1017}
1018
1019static u64 iris_pte_encode(dma_addr_t addr,
1020                           enum i915_cache_level level,
1021                           u32 flags)
1022{
1023        gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
1024
1025        switch (level) {
1026        case I915_CACHE_NONE:
1027                break;
1028        case I915_CACHE_WT:
1029                pte |= HSW_WT_ELLC_LLC_AGE3;
1030                break;
1031        default:
1032                pte |= HSW_WB_ELLC_LLC_AGE3;
1033                break;
1034        }
1035
1036        return pte;
1037}
1038
1039static int gen6_gmch_probe(struct i915_ggtt *ggtt)
1040{
1041        struct drm_i915_private *i915 = ggtt->vm.i915;
1042        struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
1043        unsigned int size;
1044        u16 snb_gmch_ctl;
1045
1046        ggtt->gmadr = pci_resource(pdev, 2);
1047        ggtt->mappable_end = resource_size(&ggtt->gmadr);
1048
1049        /*
1050         * 64/512MB is the current min/max we actually know of, but this is
1051         * just a coarse sanity check.
1052         */
1053        if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
1054                drm_err(&i915->drm, "Unknown GMADR size (%pa)\n",
1055                        &ggtt->mappable_end);
1056                return -ENXIO;
1057        }
1058
1059        pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
1060
1061        size = gen6_get_total_gtt_size(snb_gmch_ctl);
1062        ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
1063
1064        ggtt->vm.alloc_pt_dma = alloc_pt_dma;
1065
1066        ggtt->vm.clear_range = nop_clear_range;
1067        if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
1068                ggtt->vm.clear_range = gen6_ggtt_clear_range;
1069        ggtt->vm.insert_page = gen6_ggtt_insert_page;
1070        ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
1071        ggtt->vm.cleanup = gen6_gmch_remove;
1072
1073        ggtt->invalidate = gen6_ggtt_invalidate;
1074
1075        if (HAS_EDRAM(i915))
1076                ggtt->vm.pte_encode = iris_pte_encode;
1077        else if (IS_HASWELL(i915))
1078                ggtt->vm.pte_encode = hsw_pte_encode;
1079        else if (IS_VALLEYVIEW(i915))
1080                ggtt->vm.pte_encode = byt_pte_encode;
1081        else if (GRAPHICS_VER(i915) >= 7)
1082                ggtt->vm.pte_encode = ivb_pte_encode;
1083        else
1084                ggtt->vm.pte_encode = snb_pte_encode;
1085
1086        ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
1087        ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
1088        ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
1089        ggtt->vm.vma_ops.clear_pages = clear_pages;
1090
1091        return ggtt_probe_common(ggtt, size);
1092}
1093
1094static void i915_gmch_remove(struct i915_address_space *vm)
1095{
1096        intel_gmch_remove();
1097}
1098
1099static int i915_gmch_probe(struct i915_ggtt *ggtt)
1100{
1101        struct drm_i915_private *i915 = ggtt->vm.i915;
1102        phys_addr_t gmadr_base;
1103        int ret;
1104
1105        ret = intel_gmch_probe(i915->bridge_dev, to_pci_dev(i915->drm.dev), NULL);
1106        if (!ret) {
1107                drm_err(&i915->drm, "failed to set up gmch\n");
1108                return -EIO;
1109        }
1110
1111        intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
1112
1113        ggtt->gmadr =
1114                (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
1115
1116        ggtt->vm.alloc_pt_dma = alloc_pt_dma;
1117
1118        if (needs_idle_maps(i915)) {
1119                drm_notice(&i915->drm,
1120                           "Flushing DMA requests before IOMMU unmaps; performance may be degraded\n");
1121                ggtt->do_idle_maps = true;
1122        }
1123
1124        ggtt->vm.insert_page = i915_ggtt_insert_page;
1125        ggtt->vm.insert_entries = i915_ggtt_insert_entries;
1126        ggtt->vm.clear_range = i915_ggtt_clear_range;
1127        ggtt->vm.cleanup = i915_gmch_remove;
1128
1129        ggtt->invalidate = gmch_ggtt_invalidate;
1130
1131        ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
1132        ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
1133        ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
1134        ggtt->vm.vma_ops.clear_pages = clear_pages;
1135
1136        if (unlikely(ggtt->do_idle_maps))
1137                drm_notice(&i915->drm,
1138                           "Applying Ironlake quirks for intel_iommu\n");
1139
1140        return 0;
1141}
1142
1143static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
1144{
1145        struct drm_i915_private *i915 = gt->i915;
1146        int ret;
1147
1148        ggtt->vm.gt = gt;
1149        ggtt->vm.i915 = i915;
1150        ggtt->vm.dma = i915->drm.dev;
1151        dma_resv_init(&ggtt->vm._resv);
1152
1153        if (GRAPHICS_VER(i915) <= 5)
1154                ret = i915_gmch_probe(ggtt);
1155        else if (GRAPHICS_VER(i915) < 8)
1156                ret = gen6_gmch_probe(ggtt);
1157        else
1158                ret = gen8_gmch_probe(ggtt);
1159        if (ret) {
1160                dma_resv_fini(&ggtt->vm._resv);
1161                return ret;
1162        }
1163
1164        if ((ggtt->vm.total - 1) >> 32) {
1165                drm_err(&i915->drm,
1166                        "We never expected a Global GTT with more than 32bits"
1167                        " of address space! Found %lldM!\n",
1168                        ggtt->vm.total >> 20);
1169                ggtt->vm.total = 1ULL << 32;
1170                ggtt->mappable_end =
1171                        min_t(u64, ggtt->mappable_end, ggtt->vm.total);
1172        }
1173
1174        if (ggtt->mappable_end > ggtt->vm.total) {
1175                drm_err(&i915->drm,
1176                        "mappable aperture extends past end of GGTT,"
1177                        " aperture=%pa, total=%llx\n",
1178                        &ggtt->mappable_end, ggtt->vm.total);
1179                ggtt->mappable_end = ggtt->vm.total;
1180        }
1181
1182        /* GMADR is the PCI mmio aperture into the global GTT. */
1183        drm_dbg(&i915->drm, "GGTT size = %lluM\n", ggtt->vm.total >> 20);
1184        drm_dbg(&i915->drm, "GMADR size = %lluM\n",
1185                (u64)ggtt->mappable_end >> 20);
1186        drm_dbg(&i915->drm, "DSM size = %lluM\n",
1187                (u64)resource_size(&intel_graphics_stolen_res) >> 20);
1188
1189        return 0;
1190}
1191
1192/**
1193 * i915_ggtt_probe_hw - Probe GGTT hardware location
1194 * @i915: i915 device
1195 */
1196int i915_ggtt_probe_hw(struct drm_i915_private *i915)
1197{
1198        int ret;
1199
1200        ret = ggtt_probe_hw(&i915->ggtt, &i915->gt);
1201        if (ret)
1202                return ret;
1203
1204        if (intel_vtd_active())
1205                drm_info(&i915->drm, "VT-d active for gfx access\n");
1206
1207        return 0;
1208}
1209
1210int i915_ggtt_enable_hw(struct drm_i915_private *i915)
1211{
1212        if (GRAPHICS_VER(i915) < 6 && !intel_enable_gtt())
1213                return -EIO;
1214
1215        return 0;
1216}
1217
1218void i915_ggtt_enable_guc(struct i915_ggtt *ggtt)
1219{
1220        GEM_BUG_ON(ggtt->invalidate != gen8_ggtt_invalidate);
1221
1222        ggtt->invalidate = guc_ggtt_invalidate;
1223
1224        ggtt->invalidate(ggtt);
1225}
1226
1227void i915_ggtt_disable_guc(struct i915_ggtt *ggtt)
1228{
1229        /* XXX Temporary pardon for error unload */
1230        if (ggtt->invalidate == gen8_ggtt_invalidate)
1231                return;
1232
1233        /* We should only be called after i915_ggtt_enable_guc() */
1234        GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate);
1235
1236        ggtt->invalidate = gen8_ggtt_invalidate;
1237
1238        ggtt->invalidate(ggtt);
1239}
1240
1241void i915_ggtt_resume(struct i915_ggtt *ggtt)
1242{
1243        struct i915_vma *vma;
1244        bool flush = false;
1245        int open;
1246
1247        intel_gt_check_and_clear_faults(ggtt->vm.gt);
1248
1249        /* First fill our portion of the GTT with scratch pages */
1250        ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
1251
1252        /* Skip rewriting PTE on VMA unbind. */
1253        open = atomic_xchg(&ggtt->vm.open, 0);
1254
1255        /* clflush objects bound into the GGTT and rebind them. */
1256        list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) {
1257                struct drm_i915_gem_object *obj = vma->obj;
1258                unsigned int was_bound =
1259                        atomic_read(&vma->flags) & I915_VMA_BIND_MASK;
1260
1261                GEM_BUG_ON(!was_bound);
1262                vma->ops->bind_vma(&ggtt->vm, NULL, vma,
1263                                   obj ? obj->cache_level : 0,
1264                                   was_bound);
1265                if (obj) { /* only used during resume => exclusive access */
1266                        flush |= fetch_and_zero(&obj->write_domain);
1267                        obj->read_domains |= I915_GEM_DOMAIN_GTT;
1268                }
1269        }
1270
1271        atomic_set(&ggtt->vm.open, open);
1272        ggtt->invalidate(ggtt);
1273
1274        if (flush)
1275                wbinvd_on_all_cpus();
1276
1277        if (GRAPHICS_VER(ggtt->vm.i915) >= 8)
1278                setup_private_pat(ggtt->vm.gt->uncore);
1279
1280        intel_ggtt_restore_fences(ggtt);
1281}
1282
1283static struct scatterlist *
1284rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset,
1285             unsigned int width, unsigned int height,
1286             unsigned int src_stride, unsigned int dst_stride,
1287             struct sg_table *st, struct scatterlist *sg)
1288{
1289        unsigned int column, row;
1290        unsigned int src_idx;
1291
1292        for (column = 0; column < width; column++) {
1293                unsigned int left;
1294
1295                src_idx = src_stride * (height - 1) + column + offset;
1296                for (row = 0; row < height; row++) {
1297                        st->nents++;
1298                        /*
1299                         * We don't need the pages, but need to initialize
1300                         * the entries so the sg list can be happily traversed.
1301                         * The only thing we need are DMA addresses.
1302                         */
1303                        sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0);
1304                        sg_dma_address(sg) =
1305                                i915_gem_object_get_dma_address(obj, src_idx);
1306                        sg_dma_len(sg) = I915_GTT_PAGE_SIZE;
1307                        sg = sg_next(sg);
1308                        src_idx -= src_stride;
1309                }
1310
1311                left = (dst_stride - height) * I915_GTT_PAGE_SIZE;
1312
1313                if (!left)
1314                        continue;
1315
1316                st->nents++;
1317
1318                /*
1319                 * The DE ignores the PTEs for the padding tiles, the sg entry
1320                 * here is just a conenience to indicate how many padding PTEs
1321                 * to insert at this spot.
1322                 */
1323                sg_set_page(sg, NULL, left, 0);
1324                sg_dma_address(sg) = 0;
1325                sg_dma_len(sg) = left;
1326                sg = sg_next(sg);
1327        }
1328
1329        return sg;
1330}
1331
1332static noinline struct sg_table *
1333intel_rotate_pages(struct intel_rotation_info *rot_info,
1334                   struct drm_i915_gem_object *obj)
1335{
1336        unsigned int size = intel_rotation_info_size(rot_info);
1337        struct drm_i915_private *i915 = to_i915(obj->base.dev);
1338        struct sg_table *st;
1339        struct scatterlist *sg;
1340        int ret = -ENOMEM;
1341        int i;
1342
1343        /* Allocate target SG list. */
1344        st = kmalloc(sizeof(*st), GFP_KERNEL);
1345        if (!st)
1346                goto err_st_alloc;
1347
1348        ret = sg_alloc_table(st, size, GFP_KERNEL);
1349        if (ret)
1350                goto err_sg_alloc;
1351
1352        st->nents = 0;
1353        sg = st->sgl;
1354
1355        for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++)
1356                sg = rotate_pages(obj, rot_info->plane[i].offset,
1357                                  rot_info->plane[i].width, rot_info->plane[i].height,
1358                                  rot_info->plane[i].src_stride,
1359                                  rot_info->plane[i].dst_stride,
1360                                  st, sg);
1361
1362        return st;
1363
1364err_sg_alloc:
1365        kfree(st);
1366err_st_alloc:
1367
1368        drm_dbg(&i915->drm, "Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
1369                obj->base.size, rot_info->plane[0].width,
1370                rot_info->plane[0].height, size);
1371
1372        return ERR_PTR(ret);
1373}
1374
1375static struct scatterlist *
1376remap_pages(struct drm_i915_gem_object *obj, unsigned int offset,
1377            unsigned int width, unsigned int height,
1378            unsigned int src_stride, unsigned int dst_stride,
1379            struct sg_table *st, struct scatterlist *sg)
1380{
1381        unsigned int row;
1382
1383        for (row = 0; row < height; row++) {
1384                unsigned int left = width * I915_GTT_PAGE_SIZE;
1385
1386                while (left) {
1387                        dma_addr_t addr;
1388                        unsigned int length;
1389
1390                        /*
1391                         * We don't need the pages, but need to initialize
1392                         * the entries so the sg list can be happily traversed.
1393                         * The only thing we need are DMA addresses.
1394                         */
1395
1396                        addr = i915_gem_object_get_dma_address_len(obj, offset, &length);
1397
1398                        length = min(left, length);
1399
1400                        st->nents++;
1401
1402                        sg_set_page(sg, NULL, length, 0);
1403                        sg_dma_address(sg) = addr;
1404                        sg_dma_len(sg) = length;
1405                        sg = sg_next(sg);
1406
1407                        offset += length / I915_GTT_PAGE_SIZE;
1408                        left -= length;
1409                }
1410
1411                offset += src_stride - width;
1412
1413                left = (dst_stride - width) * I915_GTT_PAGE_SIZE;
1414
1415                if (!left)
1416                        continue;
1417
1418                st->nents++;
1419
1420                /*
1421                 * The DE ignores the PTEs for the padding tiles, the sg entry
1422                 * here is just a conenience to indicate how many padding PTEs
1423                 * to insert at this spot.
1424                 */
1425                sg_set_page(sg, NULL, left, 0);
1426                sg_dma_address(sg) = 0;
1427                sg_dma_len(sg) = left;
1428                sg = sg_next(sg);
1429        }
1430
1431        return sg;
1432}
1433
1434static noinline struct sg_table *
1435intel_remap_pages(struct intel_remapped_info *rem_info,
1436                  struct drm_i915_gem_object *obj)
1437{
1438        unsigned int size = intel_remapped_info_size(rem_info);
1439        struct drm_i915_private *i915 = to_i915(obj->base.dev);
1440        struct sg_table *st;
1441        struct scatterlist *sg;
1442        int ret = -ENOMEM;
1443        int i;
1444
1445        /* Allocate target SG list. */
1446        st = kmalloc(sizeof(*st), GFP_KERNEL);
1447        if (!st)
1448                goto err_st_alloc;
1449
1450        ret = sg_alloc_table(st, size, GFP_KERNEL);
1451        if (ret)
1452                goto err_sg_alloc;
1453
1454        st->nents = 0;
1455        sg = st->sgl;
1456
1457        for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
1458                sg = remap_pages(obj, rem_info->plane[i].offset,
1459                                 rem_info->plane[i].width, rem_info->plane[i].height,
1460                                 rem_info->plane[i].src_stride, rem_info->plane[i].dst_stride,
1461                                 st, sg);
1462        }
1463
1464        i915_sg_trim(st);
1465
1466        return st;
1467
1468err_sg_alloc:
1469        kfree(st);
1470err_st_alloc:
1471
1472        drm_dbg(&i915->drm, "Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
1473                obj->base.size, rem_info->plane[0].width,
1474                rem_info->plane[0].height, size);
1475
1476        return ERR_PTR(ret);
1477}
1478
1479static noinline struct sg_table *
1480intel_partial_pages(const struct i915_ggtt_view *view,
1481                    struct drm_i915_gem_object *obj)
1482{
1483        struct sg_table *st;
1484        struct scatterlist *sg, *iter;
1485        unsigned int count = view->partial.size;
1486        unsigned int offset;
1487        int ret = -ENOMEM;
1488
1489        st = kmalloc(sizeof(*st), GFP_KERNEL);
1490        if (!st)
1491                goto err_st_alloc;
1492
1493        ret = sg_alloc_table(st, count, GFP_KERNEL);
1494        if (ret)
1495                goto err_sg_alloc;
1496
1497        iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset);
1498        GEM_BUG_ON(!iter);
1499
1500        sg = st->sgl;
1501        st->nents = 0;
1502        do {
1503                unsigned int len;
1504
1505                len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT),
1506                          count << PAGE_SHIFT);
1507                sg_set_page(sg, NULL, len, 0);
1508                sg_dma_address(sg) =
1509                        sg_dma_address(iter) + (offset << PAGE_SHIFT);
1510                sg_dma_len(sg) = len;
1511
1512                st->nents++;
1513                count -= len >> PAGE_SHIFT;
1514                if (count == 0) {
1515                        sg_mark_end(sg);
1516                        i915_sg_trim(st); /* Drop any unused tail entries. */
1517
1518                        return st;
1519                }
1520
1521                sg = __sg_next(sg);
1522                iter = __sg_next(iter);
1523                offset = 0;
1524        } while (1);
1525
1526err_sg_alloc:
1527        kfree(st);
1528err_st_alloc:
1529        return ERR_PTR(ret);
1530}
1531
1532static int
1533i915_get_ggtt_vma_pages(struct i915_vma *vma)
1534{
1535        int ret;
1536
1537        /*
1538         * The vma->pages are only valid within the lifespan of the borrowed
1539         * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
1540         * must be the vma->pages. A simple rule is that vma->pages must only
1541         * be accessed when the obj->mm.pages are pinned.
1542         */
1543        GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
1544
1545        switch (vma->ggtt_view.type) {
1546        default:
1547                GEM_BUG_ON(vma->ggtt_view.type);
1548                fallthrough;
1549        case I915_GGTT_VIEW_NORMAL:
1550                vma->pages = vma->obj->mm.pages;
1551                return 0;
1552
1553        case I915_GGTT_VIEW_ROTATED:
1554                vma->pages =
1555                        intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
1556                break;
1557
1558        case I915_GGTT_VIEW_REMAPPED:
1559                vma->pages =
1560                        intel_remap_pages(&vma->ggtt_view.remapped, vma->obj);
1561                break;
1562
1563        case I915_GGTT_VIEW_PARTIAL:
1564                vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
1565                break;
1566        }
1567
1568        ret = 0;
1569        if (IS_ERR(vma->pages)) {
1570                ret = PTR_ERR(vma->pages);
1571                vma->pages = NULL;
1572                drm_err(&vma->vm->i915->drm,
1573                        "Failed to get pages for VMA view type %u (%d)!\n",
1574                        vma->ggtt_view.type, ret);
1575        }
1576        return ret;
1577}
1578