linux/drivers/gpu/drm/i915/i915_gem_gtt.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2010 Daniel Vetter
   3 * Copyright © 2011-2014 Intel Corporation
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the "Software"),
   7 * to deal in the Software without restriction, including without limitation
   8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9 * and/or sell copies of the Software, and to permit persons to whom the
  10 * Software is furnished to do so, subject to the following conditions:
  11 *
  12 * The above copyright notice and this permission notice (including the next
  13 * paragraph) shall be included in all copies or substantial portions of the
  14 * Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22 * IN THE SOFTWARE.
  23 *
  24 */
  25
  26#include <linux/slab.h> /* fault-inject.h is not standalone! */
  27
  28#include <linux/fault-inject.h>
  29#include <linux/log2.h>
  30#include <linux/random.h>
  31#include <linux/seq_file.h>
  32#include <linux/stop_machine.h>
  33
  34#include <asm/set_memory.h>
  35
  36#include <drm/i915_drm.h>
  37
  38#include "display/intel_frontbuffer.h"
  39
  40#include "i915_drv.h"
  41#include "i915_scatterlist.h"
  42#include "i915_trace.h"
  43#include "i915_vgpu.h"
  44#include "intel_drv.h"
  45
  46#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
  47
  48/**
  49 * DOC: Global GTT views
  50 *
  51 * Background and previous state
  52 *
  53 * Historically objects could exists (be bound) in global GTT space only as
  54 * singular instances with a view representing all of the object's backing pages
  55 * in a linear fashion. This view will be called a normal view.
  56 *
  57 * To support multiple views of the same object, where the number of mapped
  58 * pages is not equal to the backing store, or where the layout of the pages
  59 * is not linear, concept of a GGTT view was added.
  60 *
  61 * One example of an alternative view is a stereo display driven by a single
  62 * image. In this case we would have a framebuffer looking like this
  63 * (2x2 pages):
  64 *
  65 *    12
  66 *    34
  67 *
  68 * Above would represent a normal GGTT view as normally mapped for GPU or CPU
  69 * rendering. In contrast, fed to the display engine would be an alternative
  70 * view which could look something like this:
  71 *
  72 *   1212
  73 *   3434
  74 *
  75 * In this example both the size and layout of pages in the alternative view is
  76 * different from the normal view.
  77 *
  78 * Implementation and usage
  79 *
  80 * GGTT views are implemented using VMAs and are distinguished via enum
  81 * i915_ggtt_view_type and struct i915_ggtt_view.
  82 *
  83 * A new flavour of core GEM functions which work with GGTT bound objects were
  84 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
  85 * renaming  in large amounts of code. They take the struct i915_ggtt_view
  86 * parameter encapsulating all metadata required to implement a view.
  87 *
  88 * As a helper for callers which are only interested in the normal view,
  89 * globally const i915_ggtt_view_normal singleton instance exists. All old core
  90 * GEM API functions, the ones not taking the view parameter, are operating on,
  91 * or with the normal GGTT view.
  92 *
  93 * Code wanting to add or use a new GGTT view needs to:
  94 *
  95 * 1. Add a new enum with a suitable name.
  96 * 2. Extend the metadata in the i915_ggtt_view structure if required.
  97 * 3. Add support to i915_get_vma_pages().
  98 *
  99 * New views are required to build a scatter-gather table from within the
 100 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
 101 * exists for the lifetime of an VMA.
 102 *
 103 * Core API is designed to have copy semantics which means that passed in
 104 * struct i915_ggtt_view does not need to be persistent (left around after
 105 * calling the core API functions).
 106 *
 107 */
 108
 109static int
 110i915_get_ggtt_vma_pages(struct i915_vma *vma);
 111
 112static void gen6_ggtt_invalidate(struct drm_i915_private *i915)
 113{
 114        struct intel_uncore *uncore = &i915->uncore;
 115
 116        /*
 117         * Note that as an uncached mmio write, this will flush the
 118         * WCB of the writes into the GGTT before it triggers the invalidate.
 119         */
 120        intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 121}
 122
 123static void guc_ggtt_invalidate(struct drm_i915_private *i915)
 124{
 125        struct intel_uncore *uncore = &i915->uncore;
 126
 127        gen6_ggtt_invalidate(i915);
 128        intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
 129}
 130
 131static void gmch_ggtt_invalidate(struct drm_i915_private *i915)
 132{
 133        intel_gtt_chipset_flush();
 134}
 135
 136static inline void i915_ggtt_invalidate(struct drm_i915_private *i915)
 137{
 138        i915->ggtt.invalidate(i915);
 139}
 140
 141static int ppgtt_bind_vma(struct i915_vma *vma,
 142                          enum i915_cache_level cache_level,
 143                          u32 unused)
 144{
 145        u32 pte_flags;
 146        int err;
 147
 148        if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
 149                err = vma->vm->allocate_va_range(vma->vm,
 150                                                 vma->node.start, vma->size);
 151                if (err)
 152                        return err;
 153        }
 154
 155        /* Applicable to VLV, and gen8+ */
 156        pte_flags = 0;
 157        if (i915_gem_object_is_readonly(vma->obj))
 158                pte_flags |= PTE_READ_ONLY;
 159
 160        vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
 161
 162        return 0;
 163}
 164
 165static void ppgtt_unbind_vma(struct i915_vma *vma)
 166{
 167        vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
 168}
 169
 170static int ppgtt_set_pages(struct i915_vma *vma)
 171{
 172        GEM_BUG_ON(vma->pages);
 173
 174        vma->pages = vma->obj->mm.pages;
 175
 176        vma->page_sizes = vma->obj->mm.page_sizes;
 177
 178        return 0;
 179}
 180
 181static void clear_pages(struct i915_vma *vma)
 182{
 183        GEM_BUG_ON(!vma->pages);
 184
 185        if (vma->pages != vma->obj->mm.pages) {
 186                sg_free_table(vma->pages);
 187                kfree(vma->pages);
 188        }
 189        vma->pages = NULL;
 190
 191        memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
 192}
 193
 194static u64 gen8_pte_encode(dma_addr_t addr,
 195                           enum i915_cache_level level,
 196                           u32 flags)
 197{
 198        gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
 199
 200        if (unlikely(flags & PTE_READ_ONLY))
 201                pte &= ~_PAGE_RW;
 202
 203        switch (level) {
 204        case I915_CACHE_NONE:
 205                pte |= PPAT_UNCACHED;
 206                break;
 207        case I915_CACHE_WT:
 208                pte |= PPAT_DISPLAY_ELLC;
 209                break;
 210        default:
 211                pte |= PPAT_CACHED;
 212                break;
 213        }
 214
 215        return pte;
 216}
 217
 218static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
 219                                  const enum i915_cache_level level)
 220{
 221        gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
 222        pde |= addr;
 223        if (level != I915_CACHE_NONE)
 224                pde |= PPAT_CACHED_PDE;
 225        else
 226                pde |= PPAT_UNCACHED;
 227        return pde;
 228}
 229
 230#define gen8_pdpe_encode gen8_pde_encode
 231#define gen8_pml4e_encode gen8_pde_encode
 232
 233static u64 snb_pte_encode(dma_addr_t addr,
 234                          enum i915_cache_level level,
 235                          u32 flags)
 236{
 237        gen6_pte_t pte = GEN6_PTE_VALID;
 238        pte |= GEN6_PTE_ADDR_ENCODE(addr);
 239
 240        switch (level) {
 241        case I915_CACHE_L3_LLC:
 242        case I915_CACHE_LLC:
 243                pte |= GEN6_PTE_CACHE_LLC;
 244                break;
 245        case I915_CACHE_NONE:
 246                pte |= GEN6_PTE_UNCACHED;
 247                break;
 248        default:
 249                MISSING_CASE(level);
 250        }
 251
 252        return pte;
 253}
 254
 255static u64 ivb_pte_encode(dma_addr_t addr,
 256                          enum i915_cache_level level,
 257                          u32 flags)
 258{
 259        gen6_pte_t pte = GEN6_PTE_VALID;
 260        pte |= GEN6_PTE_ADDR_ENCODE(addr);
 261
 262        switch (level) {
 263        case I915_CACHE_L3_LLC:
 264                pte |= GEN7_PTE_CACHE_L3_LLC;
 265                break;
 266        case I915_CACHE_LLC:
 267                pte |= GEN6_PTE_CACHE_LLC;
 268                break;
 269        case I915_CACHE_NONE:
 270                pte |= GEN6_PTE_UNCACHED;
 271                break;
 272        default:
 273                MISSING_CASE(level);
 274        }
 275
 276        return pte;
 277}
 278
 279static u64 byt_pte_encode(dma_addr_t addr,
 280                          enum i915_cache_level level,
 281                          u32 flags)
 282{
 283        gen6_pte_t pte = GEN6_PTE_VALID;
 284        pte |= GEN6_PTE_ADDR_ENCODE(addr);
 285
 286        if (!(flags & PTE_READ_ONLY))
 287                pte |= BYT_PTE_WRITEABLE;
 288
 289        if (level != I915_CACHE_NONE)
 290                pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
 291
 292        return pte;
 293}
 294
 295static u64 hsw_pte_encode(dma_addr_t addr,
 296                          enum i915_cache_level level,
 297                          u32 flags)
 298{
 299        gen6_pte_t pte = GEN6_PTE_VALID;
 300        pte |= HSW_PTE_ADDR_ENCODE(addr);
 301
 302        if (level != I915_CACHE_NONE)
 303                pte |= HSW_WB_LLC_AGE3;
 304
 305        return pte;
 306}
 307
 308static u64 iris_pte_encode(dma_addr_t addr,
 309                           enum i915_cache_level level,
 310                           u32 flags)
 311{
 312        gen6_pte_t pte = GEN6_PTE_VALID;
 313        pte |= HSW_PTE_ADDR_ENCODE(addr);
 314
 315        switch (level) {
 316        case I915_CACHE_NONE:
 317                break;
 318        case I915_CACHE_WT:
 319                pte |= HSW_WT_ELLC_LLC_AGE3;
 320                break;
 321        default:
 322                pte |= HSW_WB_ELLC_LLC_AGE3;
 323                break;
 324        }
 325
 326        return pte;
 327}
 328
 329static void stash_init(struct pagestash *stash)
 330{
 331        pagevec_init(&stash->pvec);
 332        spin_lock_init(&stash->lock);
 333}
 334
 335static struct page *stash_pop_page(struct pagestash *stash)
 336{
 337        struct page *page = NULL;
 338
 339        spin_lock(&stash->lock);
 340        if (likely(stash->pvec.nr))
 341                page = stash->pvec.pages[--stash->pvec.nr];
 342        spin_unlock(&stash->lock);
 343
 344        return page;
 345}
 346
 347static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec)
 348{
 349        unsigned int nr;
 350
 351        spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING);
 352
 353        nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec));
 354        memcpy(stash->pvec.pages + stash->pvec.nr,
 355               pvec->pages + pvec->nr - nr,
 356               sizeof(pvec->pages[0]) * nr);
 357        stash->pvec.nr += nr;
 358
 359        spin_unlock(&stash->lock);
 360
 361        pvec->nr -= nr;
 362}
 363
 364static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
 365{
 366        struct pagevec stack;
 367        struct page *page;
 368
 369        if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
 370                i915_gem_shrink_all(vm->i915);
 371
 372        page = stash_pop_page(&vm->free_pages);
 373        if (page)
 374                return page;
 375
 376        if (!vm->pt_kmap_wc)
 377                return alloc_page(gfp);
 378
 379        /* Look in our global stash of WC pages... */
 380        page = stash_pop_page(&vm->i915->mm.wc_stash);
 381        if (page)
 382                return page;
 383
 384        /*
 385         * Otherwise batch allocate pages to amortize cost of set_pages_wc.
 386         *
 387         * We have to be careful as page allocation may trigger the shrinker
 388         * (via direct reclaim) which will fill up the WC stash underneath us.
 389         * So we add our WB pages into a temporary pvec on the stack and merge
 390         * them into the WC stash after all the allocations are complete.
 391         */
 392        pagevec_init(&stack);
 393        do {
 394                struct page *page;
 395
 396                page = alloc_page(gfp);
 397                if (unlikely(!page))
 398                        break;
 399
 400                stack.pages[stack.nr++] = page;
 401        } while (pagevec_space(&stack));
 402
 403        if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) {
 404                page = stack.pages[--stack.nr];
 405
 406                /* Merge spare WC pages to the global stash */
 407                if (stack.nr)
 408                        stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
 409
 410                /* Push any surplus WC pages onto the local VM stash */
 411                if (stack.nr)
 412                        stash_push_pagevec(&vm->free_pages, &stack);
 413        }
 414
 415        /* Return unwanted leftovers */
 416        if (unlikely(stack.nr)) {
 417                WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr));
 418                __pagevec_release(&stack);
 419        }
 420
 421        return page;
 422}
 423
 424static void vm_free_pages_release(struct i915_address_space *vm,
 425                                  bool immediate)
 426{
 427        struct pagevec *pvec = &vm->free_pages.pvec;
 428        struct pagevec stack;
 429
 430        lockdep_assert_held(&vm->free_pages.lock);
 431        GEM_BUG_ON(!pagevec_count(pvec));
 432
 433        if (vm->pt_kmap_wc) {
 434                /*
 435                 * When we use WC, first fill up the global stash and then
 436                 * only if full immediately free the overflow.
 437                 */
 438                stash_push_pagevec(&vm->i915->mm.wc_stash, pvec);
 439
 440                /*
 441                 * As we have made some room in the VM's free_pages,
 442                 * we can wait for it to fill again. Unless we are
 443                 * inside i915_address_space_fini() and must
 444                 * immediately release the pages!
 445                 */
 446                if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1))
 447                        return;
 448
 449                /*
 450                 * We have to drop the lock to allow ourselves to sleep,
 451                 * so take a copy of the pvec and clear the stash for
 452                 * others to use it as we sleep.
 453                 */
 454                stack = *pvec;
 455                pagevec_reinit(pvec);
 456                spin_unlock(&vm->free_pages.lock);
 457
 458                pvec = &stack;
 459                set_pages_array_wb(pvec->pages, pvec->nr);
 460
 461                spin_lock(&vm->free_pages.lock);
 462        }
 463
 464        __pagevec_release(pvec);
 465}
 466
 467static void vm_free_page(struct i915_address_space *vm, struct page *page)
 468{
 469        /*
 470         * On !llc, we need to change the pages back to WB. We only do so
 471         * in bulk, so we rarely need to change the page attributes here,
 472         * but doing so requires a stop_machine() from deep inside arch/x86/mm.
 473         * To make detection of the possible sleep more likely, use an
 474         * unconditional might_sleep() for everybody.
 475         */
 476        might_sleep();
 477        spin_lock(&vm->free_pages.lock);
 478        while (!pagevec_space(&vm->free_pages.pvec))
 479                vm_free_pages_release(vm, false);
 480        GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE);
 481        pagevec_add(&vm->free_pages.pvec, page);
 482        spin_unlock(&vm->free_pages.lock);
 483}
 484
 485static void i915_address_space_init(struct i915_address_space *vm, int subclass)
 486{
 487        kref_init(&vm->ref);
 488
 489        /*
 490         * The vm->mutex must be reclaim safe (for use in the shrinker).
 491         * Do a dummy acquire now under fs_reclaim so that any allocation
 492         * attempt holding the lock is immediately reported by lockdep.
 493         */
 494        mutex_init(&vm->mutex);
 495        lockdep_set_subclass(&vm->mutex, subclass);
 496        i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
 497
 498        GEM_BUG_ON(!vm->total);
 499        drm_mm_init(&vm->mm, 0, vm->total);
 500        vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
 501
 502        stash_init(&vm->free_pages);
 503
 504        INIT_LIST_HEAD(&vm->unbound_list);
 505        INIT_LIST_HEAD(&vm->bound_list);
 506}
 507
 508static void i915_address_space_fini(struct i915_address_space *vm)
 509{
 510        spin_lock(&vm->free_pages.lock);
 511        if (pagevec_count(&vm->free_pages.pvec))
 512                vm_free_pages_release(vm, true);
 513        GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
 514        spin_unlock(&vm->free_pages.lock);
 515
 516        drm_mm_takedown(&vm->mm);
 517
 518        mutex_destroy(&vm->mutex);
 519}
 520
 521static int __setup_page_dma(struct i915_address_space *vm,
 522                            struct i915_page_dma *p,
 523                            gfp_t gfp)
 524{
 525        p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL);
 526        if (unlikely(!p->page))
 527                return -ENOMEM;
 528
 529        p->daddr = dma_map_page_attrs(vm->dma,
 530                                      p->page, 0, PAGE_SIZE,
 531                                      PCI_DMA_BIDIRECTIONAL,
 532                                      DMA_ATTR_SKIP_CPU_SYNC |
 533                                      DMA_ATTR_NO_WARN);
 534        if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
 535                vm_free_page(vm, p->page);
 536                return -ENOMEM;
 537        }
 538
 539        return 0;
 540}
 541
 542static int setup_page_dma(struct i915_address_space *vm,
 543                          struct i915_page_dma *p)
 544{
 545        return __setup_page_dma(vm, p, __GFP_HIGHMEM);
 546}
 547
 548static void cleanup_page_dma(struct i915_address_space *vm,
 549                             struct i915_page_dma *p)
 550{
 551        dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 552        vm_free_page(vm, p->page);
 553}
 554
 555#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
 556
 557#define setup_px(vm, px) setup_page_dma((vm), px_base(px))
 558#define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px))
 559#define fill_px(vm, px, v) fill_page_dma((vm), px_base(px), (v))
 560#define fill32_px(vm, px, v) fill_page_dma_32((vm), px_base(px), (v))
 561
 562static void fill_page_dma(struct i915_address_space *vm,
 563                          struct i915_page_dma *p,
 564                          const u64 val)
 565{
 566        u64 * const vaddr = kmap_atomic(p->page);
 567
 568        memset64(vaddr, val, PAGE_SIZE / sizeof(val));
 569
 570        kunmap_atomic(vaddr);
 571}
 572
 573static void fill_page_dma_32(struct i915_address_space *vm,
 574                             struct i915_page_dma *p,
 575                             const u32 v)
 576{
 577        fill_page_dma(vm, p, (u64)v << 32 | v);
 578}
 579
 580static int
 581setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
 582{
 583        unsigned long size;
 584
 585        /*
 586         * In order to utilize 64K pages for an object with a size < 2M, we will
 587         * need to support a 64K scratch page, given that every 16th entry for a
 588         * page-table operating in 64K mode must point to a properly aligned 64K
 589         * region, including any PTEs which happen to point to scratch.
 590         *
 591         * This is only relevant for the 48b PPGTT where we support
 592         * huge-gtt-pages, see also i915_vma_insert(). However, as we share the
 593         * scratch (read-only) between all vm, we create one 64k scratch page
 594         * for all.
 595         */
 596        size = I915_GTT_PAGE_SIZE_4K;
 597        if (i915_vm_is_4lvl(vm) &&
 598            HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
 599                size = I915_GTT_PAGE_SIZE_64K;
 600                gfp |= __GFP_NOWARN;
 601        }
 602        gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
 603
 604        do {
 605                int order = get_order(size);
 606                struct page *page;
 607                dma_addr_t addr;
 608
 609                page = alloc_pages(gfp, order);
 610                if (unlikely(!page))
 611                        goto skip;
 612
 613                addr = dma_map_page_attrs(vm->dma,
 614                                          page, 0, size,
 615                                          PCI_DMA_BIDIRECTIONAL,
 616                                          DMA_ATTR_SKIP_CPU_SYNC |
 617                                          DMA_ATTR_NO_WARN);
 618                if (unlikely(dma_mapping_error(vm->dma, addr)))
 619                        goto free_page;
 620
 621                if (unlikely(!IS_ALIGNED(addr, size)))
 622                        goto unmap_page;
 623
 624                vm->scratch_page.page = page;
 625                vm->scratch_page.daddr = addr;
 626                vm->scratch_order = order;
 627                return 0;
 628
 629unmap_page:
 630                dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL);
 631free_page:
 632                __free_pages(page, order);
 633skip:
 634                if (size == I915_GTT_PAGE_SIZE_4K)
 635                        return -ENOMEM;
 636
 637                size = I915_GTT_PAGE_SIZE_4K;
 638                gfp &= ~__GFP_NOWARN;
 639        } while (1);
 640}
 641
 642static void cleanup_scratch_page(struct i915_address_space *vm)
 643{
 644        struct i915_page_dma *p = &vm->scratch_page;
 645        int order = vm->scratch_order;
 646
 647        dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT,
 648                       PCI_DMA_BIDIRECTIONAL);
 649        __free_pages(p->page, order);
 650}
 651
 652static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
 653{
 654        struct i915_page_table *pt;
 655
 656        pt = kmalloc(sizeof(*pt), I915_GFP_ALLOW_FAIL);
 657        if (unlikely(!pt))
 658                return ERR_PTR(-ENOMEM);
 659
 660        if (unlikely(setup_px(vm, pt))) {
 661                kfree(pt);
 662                return ERR_PTR(-ENOMEM);
 663        }
 664
 665        atomic_set(&pt->used, 0);
 666
 667        return pt;
 668}
 669
 670static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
 671{
 672        cleanup_px(vm, pt);
 673        kfree(pt);
 674}
 675
 676static void gen8_initialize_pt(struct i915_address_space *vm,
 677                               struct i915_page_table *pt)
 678{
 679        fill_px(vm, pt, vm->scratch_pte);
 680}
 681
 682static void gen6_initialize_pt(struct i915_address_space *vm,
 683                               struct i915_page_table *pt)
 684{
 685        fill32_px(vm, pt, vm->scratch_pte);
 686}
 687
 688static struct i915_page_directory *__alloc_pd(void)
 689{
 690        struct i915_page_directory *pd;
 691
 692        pd = kmalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
 693
 694        if (unlikely(!pd))
 695                return NULL;
 696
 697        memset(&pd->base, 0, sizeof(pd->base));
 698        atomic_set(&pd->used, 0);
 699        spin_lock_init(&pd->lock);
 700
 701        /* for safety */
 702        pd->entry[0] = NULL;
 703
 704        return pd;
 705}
 706
 707static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
 708{
 709        struct i915_page_directory *pd;
 710
 711        pd = __alloc_pd();
 712        if (unlikely(!pd))
 713                return ERR_PTR(-ENOMEM);
 714
 715        if (unlikely(setup_px(vm, pd))) {
 716                kfree(pd);
 717                return ERR_PTR(-ENOMEM);
 718        }
 719
 720        return pd;
 721}
 722
 723static inline bool pd_has_phys_page(const struct i915_page_directory * const pd)
 724{
 725        return pd->base.page;
 726}
 727
 728static void free_pd(struct i915_address_space *vm,
 729                    struct i915_page_directory *pd)
 730{
 731        if (likely(pd_has_phys_page(pd)))
 732                cleanup_px(vm, pd);
 733
 734        kfree(pd);
 735}
 736
 737static void init_pd_with_page(struct i915_address_space *vm,
 738                              struct i915_page_directory * const pd,
 739                              struct i915_page_table *pt)
 740{
 741        fill_px(vm, pd, gen8_pde_encode(px_dma(pt), I915_CACHE_LLC));
 742        memset_p(pd->entry, pt, 512);
 743}
 744
 745static void init_pd(struct i915_address_space *vm,
 746                    struct i915_page_directory * const pd,
 747                    struct i915_page_directory * const to)
 748{
 749        GEM_DEBUG_BUG_ON(!pd_has_phys_page(pd));
 750
 751        fill_px(vm, pd, gen8_pdpe_encode(px_dma(to), I915_CACHE_LLC));
 752        memset_p(pd->entry, to, 512);
 753}
 754
 755/*
 756 * PDE TLBs are a pain to invalidate on GEN8+. When we modify
 757 * the page table structures, we mark them dirty so that
 758 * context switching/execlist queuing code takes extra steps
 759 * to ensure that tlbs are flushed.
 760 */
 761static void mark_tlbs_dirty(struct i915_ppgtt *ppgtt)
 762{
 763        ppgtt->pd_dirty_engines = ALL_ENGINES;
 764}
 765
 766/* Removes entries from a single page table, releasing it if it's empty.
 767 * Caller can use the return value to update higher-level entries.
 768 */
 769static bool gen8_ppgtt_clear_pt(const struct i915_address_space *vm,
 770                                struct i915_page_table *pt,
 771                                u64 start, u64 length)
 772{
 773        unsigned int num_entries = gen8_pte_count(start, length);
 774        gen8_pte_t *vaddr;
 775
 776        vaddr = kmap_atomic_px(pt);
 777        memset64(vaddr + gen8_pte_index(start), vm->scratch_pte, num_entries);
 778        kunmap_atomic(vaddr);
 779
 780        GEM_BUG_ON(num_entries > atomic_read(&pt->used));
 781        return !atomic_sub_return(num_entries, &pt->used);
 782}
 783
 784static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
 785                               struct i915_page_directory *pd,
 786                               struct i915_page_table *pt,
 787                               unsigned int pde)
 788{
 789        gen8_pde_t *vaddr;
 790
 791        vaddr = kmap_atomic_px(pd);
 792        vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC);
 793        kunmap_atomic(vaddr);
 794}
 795
 796static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 797                                struct i915_page_directory *pd,
 798                                u64 start, u64 length)
 799{
 800        struct i915_page_table *pt;
 801        u32 pde;
 802
 803        gen8_for_each_pde(pt, pd, start, length, pde) {
 804                bool free = false;
 805
 806                GEM_BUG_ON(pt == vm->scratch_pt);
 807
 808                if (!gen8_ppgtt_clear_pt(vm, pt, start, length))
 809                        continue;
 810
 811                spin_lock(&pd->lock);
 812                if (!atomic_read(&pt->used)) {
 813                        gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
 814                        pd->entry[pde] = vm->scratch_pt;
 815
 816                        GEM_BUG_ON(!atomic_read(&pd->used));
 817                        atomic_dec(&pd->used);
 818                        free = true;
 819                }
 820                spin_unlock(&pd->lock);
 821                if (free)
 822                        free_pt(vm, pt);
 823        }
 824
 825        return !atomic_read(&pd->used);
 826}
 827
 828static void gen8_ppgtt_set_pdpe(struct i915_page_directory *pdp,
 829                                struct i915_page_directory *pd,
 830                                unsigned int pdpe)
 831{
 832        gen8_ppgtt_pdpe_t *vaddr;
 833
 834        if (!pd_has_phys_page(pdp))
 835                return;
 836
 837        vaddr = kmap_atomic_px(pdp);
 838        vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
 839        kunmap_atomic(vaddr);
 840}
 841
 842/* Removes entries from a single page dir pointer, releasing it if it's empty.
 843 * Caller can use the return value to update higher-level entries
 844 */
 845static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 846                                 struct i915_page_directory * const pdp,
 847                                 u64 start, u64 length)
 848{
 849        struct i915_page_directory *pd;
 850        unsigned int pdpe;
 851
 852        gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
 853                bool free = false;
 854
 855                GEM_BUG_ON(pd == vm->scratch_pd);
 856
 857                if (!gen8_ppgtt_clear_pd(vm, pd, start, length))
 858                        continue;
 859
 860                spin_lock(&pdp->lock);
 861                if (!atomic_read(&pd->used)) {
 862                        gen8_ppgtt_set_pdpe(pdp, vm->scratch_pd, pdpe);
 863                        pdp->entry[pdpe] = vm->scratch_pd;
 864
 865                        GEM_BUG_ON(!atomic_read(&pdp->used));
 866                        atomic_dec(&pdp->used);
 867                        free = true;
 868                }
 869                spin_unlock(&pdp->lock);
 870                if (free)
 871                        free_pd(vm, pd);
 872        }
 873
 874        return !atomic_read(&pdp->used);
 875}
 876
 877static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
 878                                  u64 start, u64 length)
 879{
 880        gen8_ppgtt_clear_pdp(vm, i915_vm_to_ppgtt(vm)->pd, start, length);
 881}
 882
 883static void gen8_ppgtt_set_pml4e(struct i915_page_directory *pml4,
 884                                 struct i915_page_directory *pdp,
 885                                 unsigned int pml4e)
 886{
 887        gen8_ppgtt_pml4e_t *vaddr;
 888
 889        vaddr = kmap_atomic_px(pml4);
 890        vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
 891        kunmap_atomic(vaddr);
 892}
 893
 894/* Removes entries from a single pml4.
 895 * This is the top-level structure in 4-level page tables used on gen8+.
 896 * Empty entries are always scratch pml4e.
 897 */
 898static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
 899                                  u64 start, u64 length)
 900{
 901        struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 902        struct i915_page_directory * const pml4 = ppgtt->pd;
 903        struct i915_page_directory *pdp;
 904        unsigned int pml4e;
 905
 906        GEM_BUG_ON(!i915_vm_is_4lvl(vm));
 907
 908        gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
 909                bool free = false;
 910                GEM_BUG_ON(pdp == vm->scratch_pdp);
 911
 912                if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length))
 913                        continue;
 914
 915                spin_lock(&pml4->lock);
 916                if (!atomic_read(&pdp->used)) {
 917                        gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
 918                        pml4->entry[pml4e] = vm->scratch_pdp;
 919                        free = true;
 920                }
 921                spin_unlock(&pml4->lock);
 922                if (free)
 923                        free_pd(vm, pdp);
 924        }
 925}
 926
 927static inline struct sgt_dma {
 928        struct scatterlist *sg;
 929        dma_addr_t dma, max;
 930} sgt_dma(struct i915_vma *vma) {
 931        struct scatterlist *sg = vma->pages->sgl;
 932        dma_addr_t addr = sg_dma_address(sg);
 933        return (struct sgt_dma) { sg, addr, addr + sg->length };
 934}
 935
 936struct gen8_insert_pte {
 937        u16 pml4e;
 938        u16 pdpe;
 939        u16 pde;
 940        u16 pte;
 941};
 942
 943static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
 944{
 945        return (struct gen8_insert_pte) {
 946                 gen8_pml4e_index(start),
 947                 gen8_pdpe_index(start),
 948                 gen8_pde_index(start),
 949                 gen8_pte_index(start),
 950        };
 951}
 952
 953static __always_inline bool
 954gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
 955                              struct i915_page_directory *pdp,
 956                              struct sgt_dma *iter,
 957                              struct gen8_insert_pte *idx,
 958                              enum i915_cache_level cache_level,
 959                              u32 flags)
 960{
 961        struct i915_page_directory *pd;
 962        const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
 963        gen8_pte_t *vaddr;
 964        bool ret;
 965
 966        GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
 967        pd = i915_pd_entry(pdp, idx->pdpe);
 968        vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
 969        do {
 970                vaddr[idx->pte] = pte_encode | iter->dma;
 971
 972                iter->dma += I915_GTT_PAGE_SIZE;
 973                if (iter->dma >= iter->max) {
 974                        iter->sg = __sg_next(iter->sg);
 975                        if (!iter->sg) {
 976                                ret = false;
 977                                break;
 978                        }
 979
 980                        iter->dma = sg_dma_address(iter->sg);
 981                        iter->max = iter->dma + iter->sg->length;
 982                }
 983
 984                if (++idx->pte == GEN8_PTES) {
 985                        idx->pte = 0;
 986
 987                        if (++idx->pde == I915_PDES) {
 988                                idx->pde = 0;
 989
 990                                /* Limited by sg length for 3lvl */
 991                                if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
 992                                        idx->pdpe = 0;
 993                                        ret = true;
 994                                        break;
 995                                }
 996
 997                                GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
 998                                pd = pdp->entry[idx->pdpe];
 999                        }
1000
1001                        kunmap_atomic(vaddr);
1002                        vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
1003                }
1004        } while (1);
1005        kunmap_atomic(vaddr);
1006
1007        return ret;
1008}
1009
1010static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
1011                                   struct i915_vma *vma,
1012                                   enum i915_cache_level cache_level,
1013                                   u32 flags)
1014{
1015        struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1016        struct sgt_dma iter = sgt_dma(vma);
1017        struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
1018
1019        gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter, &idx,
1020                                      cache_level, flags);
1021
1022        vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1023}
1024
1025static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
1026                                           struct i915_page_directory *pml4,
1027                                           struct sgt_dma *iter,
1028                                           enum i915_cache_level cache_level,
1029                                           u32 flags)
1030{
1031        const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
1032        u64 start = vma->node.start;
1033        dma_addr_t rem = iter->sg->length;
1034
1035        do {
1036                struct gen8_insert_pte idx = gen8_insert_pte(start);
1037                struct i915_page_directory *pdp =
1038                        i915_pdp_entry(pml4, idx.pml4e);
1039                struct i915_page_directory *pd = i915_pd_entry(pdp, idx.pdpe);
1040                unsigned int page_size;
1041                bool maybe_64K = false;
1042                gen8_pte_t encode = pte_encode;
1043                gen8_pte_t *vaddr;
1044                u16 index, max;
1045
1046                if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
1047                    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
1048                    rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) {
1049                        index = idx.pde;
1050                        max = I915_PDES;
1051                        page_size = I915_GTT_PAGE_SIZE_2M;
1052
1053                        encode |= GEN8_PDE_PS_2M;
1054
1055                        vaddr = kmap_atomic_px(pd);
1056                } else {
1057                        struct i915_page_table *pt = i915_pt_entry(pd, idx.pde);
1058
1059                        index = idx.pte;
1060                        max = GEN8_PTES;
1061                        page_size = I915_GTT_PAGE_SIZE;
1062
1063                        if (!index &&
1064                            vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
1065                            IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
1066                            (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
1067                             rem >= (max - index) * I915_GTT_PAGE_SIZE))
1068                                maybe_64K = true;
1069
1070                        vaddr = kmap_atomic_px(pt);
1071                }
1072
1073                do {
1074                        GEM_BUG_ON(iter->sg->length < page_size);
1075                        vaddr[index++] = encode | iter->dma;
1076
1077                        start += page_size;
1078                        iter->dma += page_size;
1079                        rem -= page_size;
1080                        if (iter->dma >= iter->max) {
1081                                iter->sg = __sg_next(iter->sg);
1082                                if (!iter->sg)
1083                                        break;
1084
1085                                rem = iter->sg->length;
1086                                iter->dma = sg_dma_address(iter->sg);
1087                                iter->max = iter->dma + rem;
1088
1089                                if (maybe_64K && index < max &&
1090                                    !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
1091                                      (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
1092                                       rem >= (max - index) * I915_GTT_PAGE_SIZE)))
1093                                        maybe_64K = false;
1094
1095                                if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
1096                                        break;
1097                        }
1098                } while (rem >= page_size && index < max);
1099
1100                kunmap_atomic(vaddr);
1101
1102                /*
1103                 * Is it safe to mark the 2M block as 64K? -- Either we have
1104                 * filled whole page-table with 64K entries, or filled part of
1105                 * it and have reached the end of the sg table and we have
1106                 * enough padding.
1107                 */
1108                if (maybe_64K &&
1109                    (index == max ||
1110                     (i915_vm_has_scratch_64K(vma->vm) &&
1111                      !iter->sg && IS_ALIGNED(vma->node.start +
1112                                              vma->node.size,
1113                                              I915_GTT_PAGE_SIZE_2M)))) {
1114                        vaddr = kmap_atomic_px(pd);
1115                        vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
1116                        kunmap_atomic(vaddr);
1117                        page_size = I915_GTT_PAGE_SIZE_64K;
1118
1119                        /*
1120                         * We write all 4K page entries, even when using 64K
1121                         * pages. In order to verify that the HW isn't cheating
1122                         * by using the 4K PTE instead of the 64K PTE, we want
1123                         * to remove all the surplus entries. If the HW skipped
1124                         * the 64K PTE, it will read/write into the scratch page
1125                         * instead - which we detect as missing results during
1126                         * selftests.
1127                         */
1128                        if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
1129                                u16 i;
1130
1131                                encode = vma->vm->scratch_pte;
1132                                vaddr = kmap_atomic_px(i915_pt_entry(pd,
1133                                                                     idx.pde));
1134
1135                                for (i = 1; i < index; i += 16)
1136                                        memset64(vaddr + i, encode, 15);
1137
1138                                kunmap_atomic(vaddr);
1139                        }
1140                }
1141
1142                vma->page_sizes.gtt |= page_size;
1143        } while (iter->sg);
1144}
1145
1146static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
1147                                   struct i915_vma *vma,
1148                                   enum i915_cache_level cache_level,
1149                                   u32 flags)
1150{
1151        struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1152        struct sgt_dma iter = sgt_dma(vma);
1153        struct i915_page_directory * const pml4 = ppgtt->pd;
1154
1155        if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
1156                gen8_ppgtt_insert_huge_entries(vma, pml4, &iter, cache_level,
1157                                               flags);
1158        } else {
1159                struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
1160
1161                while (gen8_ppgtt_insert_pte_entries(ppgtt,
1162                                                     i915_pdp_entry(pml4, idx.pml4e++),
1163                                                     &iter, &idx, cache_level,
1164                                                     flags))
1165                        GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
1166
1167                vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1168        }
1169}
1170
1171static void gen8_free_page_tables(struct i915_address_space *vm,
1172                                  struct i915_page_directory *pd)
1173{
1174        int i;
1175
1176        for (i = 0; i < I915_PDES; i++) {
1177                if (pd->entry[i] != vm->scratch_pt)
1178                        free_pt(vm, pd->entry[i]);
1179        }
1180}
1181
1182static int gen8_init_scratch(struct i915_address_space *vm)
1183{
1184        int ret;
1185
1186        /*
1187         * If everybody agrees to not to write into the scratch page,
1188         * we can reuse it for all vm, keeping contexts and processes separate.
1189         */
1190        if (vm->has_read_only &&
1191            vm->i915->kernel_context &&
1192            vm->i915->kernel_context->vm) {
1193                struct i915_address_space *clone = vm->i915->kernel_context->vm;
1194
1195                GEM_BUG_ON(!clone->has_read_only);
1196
1197                vm->scratch_order = clone->scratch_order;
1198                vm->scratch_pte = clone->scratch_pte;
1199                vm->scratch_pt  = clone->scratch_pt;
1200                vm->scratch_pd  = clone->scratch_pd;
1201                vm->scratch_pdp = clone->scratch_pdp;
1202                return 0;
1203        }
1204
1205        ret = setup_scratch_page(vm, __GFP_HIGHMEM);
1206        if (ret)
1207                return ret;
1208
1209        vm->scratch_pte =
1210                gen8_pte_encode(vm->scratch_page.daddr,
1211                                I915_CACHE_LLC,
1212                                vm->has_read_only);
1213
1214        vm->scratch_pt = alloc_pt(vm);
1215        if (IS_ERR(vm->scratch_pt)) {
1216                ret = PTR_ERR(vm->scratch_pt);
1217                goto free_scratch_page;
1218        }
1219
1220        vm->scratch_pd = alloc_pd(vm);
1221        if (IS_ERR(vm->scratch_pd)) {
1222                ret = PTR_ERR(vm->scratch_pd);
1223                goto free_pt;
1224        }
1225
1226        if (i915_vm_is_4lvl(vm)) {
1227                vm->scratch_pdp = alloc_pd(vm);
1228                if (IS_ERR(vm->scratch_pdp)) {
1229                        ret = PTR_ERR(vm->scratch_pdp);
1230                        goto free_pd;
1231                }
1232        }
1233
1234        gen8_initialize_pt(vm, vm->scratch_pt);
1235        init_pd_with_page(vm, vm->scratch_pd, vm->scratch_pt);
1236        if (i915_vm_is_4lvl(vm))
1237                init_pd(vm, vm->scratch_pdp, vm->scratch_pd);
1238
1239        return 0;
1240
1241free_pd:
1242        free_pd(vm, vm->scratch_pd);
1243free_pt:
1244        free_pt(vm, vm->scratch_pt);
1245free_scratch_page:
1246        cleanup_scratch_page(vm);
1247
1248        return ret;
1249}
1250
1251static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
1252{
1253        struct i915_address_space *vm = &ppgtt->vm;
1254        struct drm_i915_private *dev_priv = vm->i915;
1255        enum vgt_g2v_type msg;
1256        int i;
1257
1258        if (i915_vm_is_4lvl(vm)) {
1259                const u64 daddr = px_dma(ppgtt->pd);
1260
1261                I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
1262                I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
1263
1264                msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
1265                                VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
1266        } else {
1267                for (i = 0; i < GEN8_3LVL_PDPES; i++) {
1268                        const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
1269
1270                        I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
1271                        I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
1272                }
1273
1274                msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
1275                                VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
1276        }
1277
1278        I915_WRITE(vgtif_reg(g2v_notify), msg);
1279
1280        return 0;
1281}
1282
1283static void gen8_free_scratch(struct i915_address_space *vm)
1284{
1285        if (!vm->scratch_page.daddr)
1286                return;
1287
1288        if (i915_vm_is_4lvl(vm))
1289                free_pd(vm, vm->scratch_pdp);
1290        free_pd(vm, vm->scratch_pd);
1291        free_pt(vm, vm->scratch_pt);
1292        cleanup_scratch_page(vm);
1293}
1294
1295static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
1296                                    struct i915_page_directory *pdp)
1297{
1298        const unsigned int pdpes = i915_pdpes_per_pdp(vm);
1299        int i;
1300
1301        for (i = 0; i < pdpes; i++) {
1302                if (pdp->entry[i] == vm->scratch_pd)
1303                        continue;
1304
1305                gen8_free_page_tables(vm, pdp->entry[i]);
1306                free_pd(vm, pdp->entry[i]);
1307        }
1308
1309        free_pd(vm, pdp);
1310}
1311
1312static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
1313{
1314        struct i915_page_directory * const pml4 = ppgtt->pd;
1315        int i;
1316
1317        for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
1318                struct i915_page_directory *pdp = i915_pdp_entry(pml4, i);
1319
1320                if (pdp == ppgtt->vm.scratch_pdp)
1321                        continue;
1322
1323                gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp);
1324        }
1325
1326        free_pd(&ppgtt->vm, pml4);
1327}
1328
1329static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
1330{
1331        struct drm_i915_private *i915 = vm->i915;
1332        struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1333
1334        if (intel_vgpu_active(i915))
1335                gen8_ppgtt_notify_vgt(ppgtt, false);
1336
1337        if (i915_vm_is_4lvl(vm))
1338                gen8_ppgtt_cleanup_4lvl(ppgtt);
1339        else
1340                gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pd);
1341
1342        gen8_free_scratch(vm);
1343}
1344
1345static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
1346                               struct i915_page_directory *pd,
1347                               u64 start, u64 length)
1348{
1349        struct i915_page_table *pt, *alloc = NULL;
1350        u64 from = start;
1351        unsigned int pde;
1352        int ret = 0;
1353
1354        spin_lock(&pd->lock);
1355        gen8_for_each_pde(pt, pd, start, length, pde) {
1356                const int count = gen8_pte_count(start, length);
1357
1358                if (pt == vm->scratch_pt) {
1359                        spin_unlock(&pd->lock);
1360
1361                        pt = fetch_and_zero(&alloc);
1362                        if (!pt)
1363                                pt = alloc_pt(vm);
1364                        if (IS_ERR(pt)) {
1365                                ret = PTR_ERR(pt);
1366                                goto unwind;
1367                        }
1368
1369                        if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
1370                                gen8_initialize_pt(vm, pt);
1371
1372                        spin_lock(&pd->lock);
1373                        if (pd->entry[pde] == vm->scratch_pt) {
1374                                gen8_ppgtt_set_pde(vm, pd, pt, pde);
1375                                pd->entry[pde] = pt;
1376                                atomic_inc(&pd->used);
1377                        } else {
1378                                alloc = pt;
1379                                pt = pd->entry[pde];
1380                        }
1381                }
1382
1383                atomic_add(count, &pt->used);
1384        }
1385        spin_unlock(&pd->lock);
1386        goto out;
1387
1388unwind:
1389        gen8_ppgtt_clear_pd(vm, pd, from, start - from);
1390out:
1391        if (alloc)
1392                free_pt(vm, alloc);
1393        return ret;
1394}
1395
1396static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
1397                                struct i915_page_directory *pdp,
1398                                u64 start, u64 length)
1399{
1400        struct i915_page_directory *pd, *alloc = NULL;
1401        u64 from = start;
1402        unsigned int pdpe;
1403        int ret = 0;
1404
1405        spin_lock(&pdp->lock);
1406        gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1407                if (pd == vm->scratch_pd) {
1408                        spin_unlock(&pdp->lock);
1409
1410                        pd = fetch_and_zero(&alloc);
1411                        if (!pd)
1412                                pd = alloc_pd(vm);
1413                        if (IS_ERR(pd)) {
1414                                ret = PTR_ERR(pd);
1415                                goto unwind;
1416                        }
1417
1418                        init_pd_with_page(vm, pd, vm->scratch_pt);
1419
1420                        spin_lock(&pdp->lock);
1421                        if (pdp->entry[pdpe] == vm->scratch_pd) {
1422                                gen8_ppgtt_set_pdpe(pdp, pd, pdpe);
1423                                pdp->entry[pdpe] = pd;
1424                                atomic_inc(&pdp->used);
1425                        } else {
1426                                alloc = pd;
1427                                pd = pdp->entry[pdpe];
1428                        }
1429                }
1430                atomic_inc(&pd->used);
1431                spin_unlock(&pdp->lock);
1432
1433                ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
1434                if (unlikely(ret))
1435                        goto unwind_pd;
1436
1437                spin_lock(&pdp->lock);
1438                atomic_dec(&pd->used);
1439        }
1440        spin_unlock(&pdp->lock);
1441        goto out;
1442
1443unwind_pd:
1444        spin_lock(&pdp->lock);
1445        if (atomic_dec_and_test(&pd->used)) {
1446                gen8_ppgtt_set_pdpe(pdp, vm->scratch_pd, pdpe);
1447                pdp->entry[pdpe] = vm->scratch_pd;
1448                GEM_BUG_ON(!atomic_read(&pdp->used));
1449                atomic_dec(&pdp->used);
1450                GEM_BUG_ON(alloc);
1451                alloc = pd; /* defer the free to after the lock */
1452        }
1453        spin_unlock(&pdp->lock);
1454unwind:
1455        gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
1456out:
1457        if (alloc)
1458                free_pd(vm, alloc);
1459        return ret;
1460}
1461
1462static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
1463                                 u64 start, u64 length)
1464{
1465        return gen8_ppgtt_alloc_pdp(vm,
1466                                    i915_vm_to_ppgtt(vm)->pd, start, length);
1467}
1468
1469static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
1470                                 u64 start, u64 length)
1471{
1472        struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1473        struct i915_page_directory * const pml4 = ppgtt->pd;
1474        struct i915_page_directory *pdp, *alloc = NULL;
1475        u64 from = start;
1476        int ret = 0;
1477        u32 pml4e;
1478
1479        spin_lock(&pml4->lock);
1480        gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1481                if (pdp == vm->scratch_pdp) {
1482                        spin_unlock(&pml4->lock);
1483
1484                        pdp = fetch_and_zero(&alloc);
1485                        if (!pdp)
1486                                pdp = alloc_pd(vm);
1487                        if (IS_ERR(pdp)) {
1488                                ret = PTR_ERR(pdp);
1489                                goto unwind;
1490                        }
1491
1492                        init_pd(vm, pdp, vm->scratch_pd);
1493
1494                        spin_lock(&pml4->lock);
1495                        if (pml4->entry[pml4e] == vm->scratch_pdp) {
1496                                gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
1497                                pml4->entry[pml4e] = pdp;
1498                        } else {
1499                                alloc = pdp;
1500                                pdp = pml4->entry[pml4e];
1501                        }
1502                }
1503                atomic_inc(&pdp->used);
1504                spin_unlock(&pml4->lock);
1505
1506                ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
1507                if (unlikely(ret))
1508                        goto unwind_pdp;
1509
1510                spin_lock(&pml4->lock);
1511                atomic_dec(&pdp->used);
1512        }
1513        spin_unlock(&pml4->lock);
1514        goto out;
1515
1516unwind_pdp:
1517        spin_lock(&pml4->lock);
1518        if (atomic_dec_and_test(&pdp->used)) {
1519                gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
1520                pml4->entry[pml4e] = vm->scratch_pdp;
1521                GEM_BUG_ON(alloc);
1522                alloc = pdp; /* defer the free until after the lock */
1523        }
1524        spin_unlock(&pml4->lock);
1525unwind:
1526        gen8_ppgtt_clear_4lvl(vm, from, start - from);
1527out:
1528        if (alloc)
1529                free_pd(vm, alloc);
1530        return ret;
1531}
1532
1533static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
1534{
1535        struct i915_address_space *vm = &ppgtt->vm;
1536        struct i915_page_directory *pdp = ppgtt->pd;
1537        struct i915_page_directory *pd;
1538        u64 start = 0, length = ppgtt->vm.total;
1539        u64 from = start;
1540        unsigned int pdpe;
1541
1542        gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1543                pd = alloc_pd(vm);
1544                if (IS_ERR(pd))
1545                        goto unwind;
1546
1547                init_pd_with_page(vm, pd, vm->scratch_pt);
1548                gen8_ppgtt_set_pdpe(pdp, pd, pdpe);
1549
1550                atomic_inc(&pdp->used);
1551        }
1552
1553        atomic_inc(&pdp->used); /* never remove */
1554
1555        return 0;
1556
1557unwind:
1558        start -= from;
1559        gen8_for_each_pdpe(pd, pdp, from, start, pdpe) {
1560                gen8_ppgtt_set_pdpe(pdp, vm->scratch_pd, pdpe);
1561                free_pd(vm, pd);
1562        }
1563        atomic_set(&pdp->used, 0);
1564        return -ENOMEM;
1565}
1566
1567static void ppgtt_init(struct drm_i915_private *i915,
1568                       struct i915_ppgtt *ppgtt)
1569{
1570        ppgtt->vm.i915 = i915;
1571        ppgtt->vm.dma = &i915->drm.pdev->dev;
1572        ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size);
1573
1574        i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
1575
1576        ppgtt->vm.vma_ops.bind_vma    = ppgtt_bind_vma;
1577        ppgtt->vm.vma_ops.unbind_vma  = ppgtt_unbind_vma;
1578        ppgtt->vm.vma_ops.set_pages   = ppgtt_set_pages;
1579        ppgtt->vm.vma_ops.clear_pages = clear_pages;
1580}
1581
1582/*
1583 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1584 * with a net effect resembling a 2-level page table in normal x86 terms. Each
1585 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1586 * space.
1587 *
1588 */
1589static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
1590{
1591        struct i915_ppgtt *ppgtt;
1592        int err;
1593
1594        ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
1595        if (!ppgtt)
1596                return ERR_PTR(-ENOMEM);
1597
1598        ppgtt_init(i915, ppgtt);
1599
1600        /*
1601         * From bdw, there is hw support for read-only pages in the PPGTT.
1602         *
1603         * Gen11 has HSDES#:1807136187 unresolved. Disable ro support
1604         * for now.
1605         */
1606        ppgtt->vm.has_read_only = INTEL_GEN(i915) != 11;
1607
1608        /* There are only few exceptions for gen >=6. chv and bxt.
1609         * And we are not sure about the latter so play safe for now.
1610         */
1611        if (IS_CHERRYVIEW(i915) || IS_BROXTON(i915))
1612                ppgtt->vm.pt_kmap_wc = true;
1613
1614        err = gen8_init_scratch(&ppgtt->vm);
1615        if (err)
1616                goto err_free;
1617
1618        ppgtt->pd = __alloc_pd();
1619        if (!ppgtt->pd) {
1620                err = -ENOMEM;
1621                goto err_free_scratch;
1622        }
1623
1624        if (i915_vm_is_4lvl(&ppgtt->vm)) {
1625                err = setup_px(&ppgtt->vm, ppgtt->pd);
1626                if (err)
1627                        goto err_free_pdp;
1628
1629                init_pd(&ppgtt->vm, ppgtt->pd, ppgtt->vm.scratch_pdp);
1630
1631                ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl;
1632                ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl;
1633                ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl;
1634        } else {
1635                /*
1636                 * We don't need to setup dma for top level pdp, only
1637                 * for entries. So point entries to scratch.
1638                 */
1639                memset_p(ppgtt->pd->entry, ppgtt->vm.scratch_pd,
1640                         GEN8_3LVL_PDPES);
1641
1642                if (intel_vgpu_active(i915)) {
1643                        err = gen8_preallocate_top_level_pdp(ppgtt);
1644                        if (err)
1645                                goto err_free_pdp;
1646                }
1647
1648                ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl;
1649                ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl;
1650                ppgtt->vm.clear_range = gen8_ppgtt_clear_3lvl;
1651        }
1652
1653        if (intel_vgpu_active(i915))
1654                gen8_ppgtt_notify_vgt(ppgtt, true);
1655
1656        ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
1657
1658        return ppgtt;
1659
1660err_free_pdp:
1661        free_pd(&ppgtt->vm, ppgtt->pd);
1662err_free_scratch:
1663        gen8_free_scratch(&ppgtt->vm);
1664err_free:
1665        kfree(ppgtt);
1666        return ERR_PTR(err);
1667}
1668
1669/* Write pde (index) from the page directory @pd to the page table @pt */
1670static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
1671                                  const unsigned int pde,
1672                                  const struct i915_page_table *pt)
1673{
1674        /* Caller needs to make sure the write completes if necessary */
1675        iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
1676                  ppgtt->pd_addr + pde);
1677}
1678
1679static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv)
1680{
1681        struct intel_engine_cs *engine;
1682        u32 ecochk, ecobits;
1683        enum intel_engine_id id;
1684
1685        ecobits = I915_READ(GAC_ECO_BITS);
1686        I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1687
1688        ecochk = I915_READ(GAM_ECOCHK);
1689        if (IS_HASWELL(dev_priv)) {
1690                ecochk |= ECOCHK_PPGTT_WB_HSW;
1691        } else {
1692                ecochk |= ECOCHK_PPGTT_LLC_IVB;
1693                ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1694        }
1695        I915_WRITE(GAM_ECOCHK, ecochk);
1696
1697        for_each_engine(engine, dev_priv, id) {
1698                /* GFX_MODE is per-ring on gen7+ */
1699                ENGINE_WRITE(engine,
1700                             RING_MODE_GEN7,
1701                             _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1702        }
1703}
1704
1705static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv)
1706{
1707        u32 ecochk, gab_ctl, ecobits;
1708
1709        ecobits = I915_READ(GAC_ECO_BITS);
1710        I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1711                   ECOBITS_PPGTT_CACHE64B);
1712
1713        gab_ctl = I915_READ(GAB_CTL);
1714        I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1715
1716        ecochk = I915_READ(GAM_ECOCHK);
1717        I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1718
1719        if (HAS_PPGTT(dev_priv)) /* may be disabled for VT-d */
1720                I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1721}
1722
1723/* PPGTT support for Sandybdrige/Gen6 and later */
1724static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1725                                   u64 start, u64 length)
1726{
1727        struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
1728        const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
1729        const gen6_pte_t scratch_pte = vm->scratch_pte;
1730        unsigned int pde = first_entry / GEN6_PTES;
1731        unsigned int pte = first_entry % GEN6_PTES;
1732        unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
1733
1734        while (num_entries) {
1735                struct i915_page_table * const pt =
1736                        i915_pt_entry(ppgtt->base.pd, pde++);
1737                const unsigned int count = min(num_entries, GEN6_PTES - pte);
1738                gen6_pte_t *vaddr;
1739
1740                GEM_BUG_ON(pt == vm->scratch_pt);
1741
1742                num_entries -= count;
1743
1744                GEM_BUG_ON(count > atomic_read(&pt->used));
1745                if (!atomic_sub_return(count, &pt->used))
1746                        ppgtt->scan_for_unused_pt = true;
1747
1748                /*
1749                 * Note that the hw doesn't support removing PDE on the fly
1750                 * (they are cached inside the context with no means to
1751                 * invalidate the cache), so we can only reset the PTE
1752                 * entries back to scratch.
1753                 */
1754
1755                vaddr = kmap_atomic_px(pt);
1756                memset32(vaddr + pte, scratch_pte, count);
1757                kunmap_atomic(vaddr);
1758
1759                pte = 0;
1760        }
1761}
1762
1763static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1764                                      struct i915_vma *vma,
1765                                      enum i915_cache_level cache_level,
1766                                      u32 flags)
1767{
1768        struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1769        struct i915_page_directory * const pd = ppgtt->pd;
1770        unsigned first_entry = vma->node.start / I915_GTT_PAGE_SIZE;
1771        unsigned act_pt = first_entry / GEN6_PTES;
1772        unsigned act_pte = first_entry % GEN6_PTES;
1773        const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
1774        struct sgt_dma iter = sgt_dma(vma);
1775        gen6_pte_t *vaddr;
1776
1777        GEM_BUG_ON(i915_pt_entry(pd, act_pt) == vm->scratch_pt);
1778
1779        vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
1780        do {
1781                vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
1782
1783                iter.dma += I915_GTT_PAGE_SIZE;
1784                if (iter.dma == iter.max) {
1785                        iter.sg = __sg_next(iter.sg);
1786                        if (!iter.sg)
1787                                break;
1788
1789                        iter.dma = sg_dma_address(iter.sg);
1790                        iter.max = iter.dma + iter.sg->length;
1791                }
1792
1793                if (++act_pte == GEN6_PTES) {
1794                        kunmap_atomic(vaddr);
1795                        vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt));
1796                        act_pte = 0;
1797                }
1798        } while (1);
1799        kunmap_atomic(vaddr);
1800
1801        vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1802}
1803
1804static int gen6_alloc_va_range(struct i915_address_space *vm,
1805                               u64 start, u64 length)
1806{
1807        struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
1808        struct i915_page_directory * const pd = ppgtt->base.pd;
1809        struct i915_page_table *pt, *alloc = NULL;
1810        intel_wakeref_t wakeref;
1811        u64 from = start;
1812        unsigned int pde;
1813        bool flush = false;
1814        int ret = 0;
1815
1816        wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
1817
1818        spin_lock(&pd->lock);
1819        gen6_for_each_pde(pt, pd, start, length, pde) {
1820                const unsigned int count = gen6_pte_count(start, length);
1821
1822                if (pt == vm->scratch_pt) {
1823                        spin_unlock(&pd->lock);
1824
1825                        pt = fetch_and_zero(&alloc);
1826                        if (!pt)
1827                                pt = alloc_pt(vm);
1828                        if (IS_ERR(pt)) {
1829                                ret = PTR_ERR(pt);
1830                                goto unwind_out;
1831                        }
1832
1833                        gen6_initialize_pt(vm, pt);
1834
1835                        spin_lock(&pd->lock);
1836                        if (pd->entry[pde] == vm->scratch_pt) {
1837                                pd->entry[pde] = pt;
1838                                if (i915_vma_is_bound(ppgtt->vma,
1839                                                      I915_VMA_GLOBAL_BIND)) {
1840                                        gen6_write_pde(ppgtt, pde, pt);
1841                                        flush = true;
1842                                }
1843                        } else {
1844                                alloc = pt;
1845                                pt = pd->entry[pde];
1846                        }
1847                }
1848
1849                atomic_add(count, &pt->used);
1850        }
1851        spin_unlock(&pd->lock);
1852
1853        if (flush) {
1854                mark_tlbs_dirty(&ppgtt->base);
1855                gen6_ggtt_invalidate(vm->i915);
1856        }
1857
1858        goto out;
1859
1860unwind_out:
1861        gen6_ppgtt_clear_range(vm, from, start - from);
1862out:
1863        if (alloc)
1864                free_pt(vm, alloc);
1865        intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
1866        return ret;
1867}
1868
1869static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
1870{
1871        struct i915_address_space * const vm = &ppgtt->base.vm;
1872        struct i915_page_directory * const pd = ppgtt->base.pd;
1873        struct i915_page_table *unused;
1874        u32 pde;
1875        int ret;
1876
1877        ret = setup_scratch_page(vm, __GFP_HIGHMEM);
1878        if (ret)
1879                return ret;
1880
1881        vm->scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
1882                                         I915_CACHE_NONE,
1883                                         PTE_READ_ONLY);
1884
1885        vm->scratch_pt = alloc_pt(vm);
1886        if (IS_ERR(vm->scratch_pt)) {
1887                cleanup_scratch_page(vm);
1888                return PTR_ERR(vm->scratch_pt);
1889        }
1890
1891        gen6_initialize_pt(vm, vm->scratch_pt);
1892
1893        gen6_for_all_pdes(unused, pd, pde)
1894                pd->entry[pde] = vm->scratch_pt;
1895
1896        return 0;
1897}
1898
1899static void gen6_ppgtt_free_scratch(struct i915_address_space *vm)
1900{
1901        free_pt(vm, vm->scratch_pt);
1902        cleanup_scratch_page(vm);
1903}
1904
1905static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
1906{
1907        struct i915_page_directory * const pd = ppgtt->base.pd;
1908        struct i915_page_table *pt;
1909        u32 pde;
1910
1911        gen6_for_all_pdes(pt, pd, pde)
1912                if (pt != ppgtt->base.vm.scratch_pt)
1913                        free_pt(&ppgtt->base.vm, pt);
1914}
1915
1916struct gen6_ppgtt_cleanup_work {
1917        struct work_struct base;
1918        struct i915_vma *vma;
1919};
1920
1921static void gen6_ppgtt_cleanup_work(struct work_struct *wrk)
1922{
1923        struct gen6_ppgtt_cleanup_work *work =
1924                container_of(wrk, typeof(*work), base);
1925        /* Side note, vma->vm is the GGTT not the ppgtt we just destroyed! */
1926        struct drm_i915_private *i915 = work->vma->vm->i915;
1927
1928        mutex_lock(&i915->drm.struct_mutex);
1929        i915_vma_destroy(work->vma);
1930        mutex_unlock(&i915->drm.struct_mutex);
1931
1932        kfree(work);
1933}
1934
1935static int nop_set_pages(struct i915_vma *vma)
1936{
1937        return -ENODEV;
1938}
1939
1940static void nop_clear_pages(struct i915_vma *vma)
1941{
1942}
1943
1944static int nop_bind(struct i915_vma *vma,
1945                    enum i915_cache_level cache_level,
1946                    u32 unused)
1947{
1948        return -ENODEV;
1949}
1950
1951static void nop_unbind(struct i915_vma *vma)
1952{
1953}
1954
1955static const struct i915_vma_ops nop_vma_ops = {
1956        .set_pages = nop_set_pages,
1957        .clear_pages = nop_clear_pages,
1958        .bind_vma = nop_bind,
1959        .unbind_vma = nop_unbind,
1960};
1961
1962static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
1963{
1964        struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
1965        struct gen6_ppgtt_cleanup_work *work = ppgtt->work;
1966
1967        /* FIXME remove the struct_mutex to bring the locking under control */
1968        INIT_WORK(&work->base, gen6_ppgtt_cleanup_work);
1969        work->vma = ppgtt->vma;
1970        work->vma->ops = &nop_vma_ops;
1971        schedule_work(&work->base);
1972
1973        gen6_ppgtt_free_pd(ppgtt);
1974        gen6_ppgtt_free_scratch(vm);
1975        kfree(ppgtt->base.pd);
1976}
1977
1978static int pd_vma_set_pages(struct i915_vma *vma)
1979{
1980        vma->pages = ERR_PTR(-ENODEV);
1981        return 0;
1982}
1983
1984static void pd_vma_clear_pages(struct i915_vma *vma)
1985{
1986        GEM_BUG_ON(!vma->pages);
1987
1988        vma->pages = NULL;
1989}
1990
1991static int pd_vma_bind(struct i915_vma *vma,
1992                       enum i915_cache_level cache_level,
1993                       u32 unused)
1994{
1995        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
1996        struct gen6_ppgtt *ppgtt = vma->private;
1997        u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
1998        struct i915_page_table *pt;
1999        unsigned int pde;
2000
2001        ppgtt->base.pd->base.ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
2002        ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
2003
2004        gen6_for_all_pdes(pt, ppgtt->base.pd, pde)
2005                gen6_write_pde(ppgtt, pde, pt);
2006
2007        mark_tlbs_dirty(&ppgtt->base);
2008        gen6_ggtt_invalidate(ppgtt->base.vm.i915);
2009
2010        return 0;
2011}
2012
2013static void pd_vma_unbind(struct i915_vma *vma)
2014{
2015        struct gen6_ppgtt *ppgtt = vma->private;
2016        struct i915_page_directory * const pd = ppgtt->base.pd;
2017        struct i915_page_table * const scratch_pt = ppgtt->base.vm.scratch_pt;
2018        struct i915_page_table *pt;
2019        unsigned int pde;
2020
2021        if (!ppgtt->scan_for_unused_pt)
2022                return;
2023
2024        /* Free all no longer used page tables */
2025        gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
2026                if (atomic_read(&pt->used) || pt == scratch_pt)
2027                        continue;
2028
2029                free_pt(&ppgtt->base.vm, pt);
2030                pd->entry[pde] = scratch_pt;
2031        }
2032
2033        ppgtt->scan_for_unused_pt = false;
2034}
2035
2036static const struct i915_vma_ops pd_vma_ops = {
2037        .set_pages = pd_vma_set_pages,
2038        .clear_pages = pd_vma_clear_pages,
2039        .bind_vma = pd_vma_bind,
2040        .unbind_vma = pd_vma_unbind,
2041};
2042
2043static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
2044{
2045        struct drm_i915_private *i915 = ppgtt->base.vm.i915;
2046        struct i915_ggtt *ggtt = &i915->ggtt;
2047        struct i915_vma *vma;
2048
2049        GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
2050        GEM_BUG_ON(size > ggtt->vm.total);
2051
2052        vma = i915_vma_alloc();
2053        if (!vma)
2054                return ERR_PTR(-ENOMEM);
2055
2056        i915_active_init(i915, &vma->active, NULL);
2057        INIT_ACTIVE_REQUEST(&vma->last_fence);
2058
2059        vma->vm = &ggtt->vm;
2060        vma->ops = &pd_vma_ops;
2061        vma->private = ppgtt;
2062
2063        vma->size = size;
2064        vma->fence_size = size;
2065        vma->flags = I915_VMA_GGTT;
2066        vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
2067
2068        INIT_LIST_HEAD(&vma->obj_link);
2069        INIT_LIST_HEAD(&vma->closed_link);
2070
2071        mutex_lock(&vma->vm->mutex);
2072        list_add(&vma->vm_link, &vma->vm->unbound_list);
2073        mutex_unlock(&vma->vm->mutex);
2074
2075        return vma;
2076}
2077
2078int gen6_ppgtt_pin(struct i915_ppgtt *base)
2079{
2080        struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
2081        int err;
2082
2083        GEM_BUG_ON(ppgtt->base.vm.closed);
2084
2085        /*
2086         * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
2087         * which will be pinned into every active context.
2088         * (When vma->pin_count becomes atomic, I expect we will naturally
2089         * need a larger, unpacked, type and kill this redundancy.)
2090         */
2091        if (ppgtt->pin_count++)
2092                return 0;
2093
2094        /*
2095         * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
2096         * allocator works in address space sizes, so it's multiplied by page
2097         * size. We allocate at the top of the GTT to avoid fragmentation.
2098         */
2099        err = i915_vma_pin(ppgtt->vma,
2100                           0, GEN6_PD_ALIGN,
2101                           PIN_GLOBAL | PIN_HIGH);
2102        if (err)
2103                goto unpin;
2104
2105        return 0;
2106
2107unpin:
2108        ppgtt->pin_count = 0;
2109        return err;
2110}
2111
2112void gen6_ppgtt_unpin(struct i915_ppgtt *base)
2113{
2114        struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
2115
2116        GEM_BUG_ON(!ppgtt->pin_count);
2117        if (--ppgtt->pin_count)
2118                return;
2119
2120        i915_vma_unpin(ppgtt->vma);
2121}
2122
2123void gen6_ppgtt_unpin_all(struct i915_ppgtt *base)
2124{
2125        struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
2126
2127        if (!ppgtt->pin_count)
2128                return;
2129
2130        ppgtt->pin_count = 0;
2131        i915_vma_unpin(ppgtt->vma);
2132}
2133
2134static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
2135{
2136        struct i915_ggtt * const ggtt = &i915->ggtt;
2137        struct gen6_ppgtt *ppgtt;
2138        int err;
2139
2140        ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2141        if (!ppgtt)
2142                return ERR_PTR(-ENOMEM);
2143
2144        ppgtt_init(i915, &ppgtt->base);
2145
2146        ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
2147        ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
2148        ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
2149        ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
2150
2151        ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
2152
2153        ppgtt->work = kmalloc(sizeof(*ppgtt->work), GFP_KERNEL);
2154        if (!ppgtt->work) {
2155                err = -ENOMEM;
2156                goto err_free;
2157        }
2158
2159        ppgtt->base.pd = __alloc_pd();
2160        if (!ppgtt->base.pd) {
2161                err = -ENOMEM;
2162                goto err_work;
2163        }
2164
2165        err = gen6_ppgtt_init_scratch(ppgtt);
2166        if (err)
2167                goto err_pd;
2168
2169        ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
2170        if (IS_ERR(ppgtt->vma)) {
2171                err = PTR_ERR(ppgtt->vma);
2172                goto err_scratch;
2173        }
2174
2175        return &ppgtt->base;
2176
2177err_scratch:
2178        gen6_ppgtt_free_scratch(&ppgtt->base.vm);
2179err_pd:
2180        kfree(ppgtt->base.pd);
2181err_work:
2182        kfree(ppgtt->work);
2183err_free:
2184        kfree(ppgtt);
2185        return ERR_PTR(err);
2186}
2187
2188static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
2189{
2190        /* This function is for gtt related workarounds. This function is
2191         * called on driver load and after a GPU reset, so you can place
2192         * workarounds here even if they get overwritten by GPU reset.
2193         */
2194        /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
2195        if (IS_BROADWELL(dev_priv))
2196                I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
2197        else if (IS_CHERRYVIEW(dev_priv))
2198                I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
2199        else if (IS_GEN9_LP(dev_priv))
2200                I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
2201        else if (INTEL_GEN(dev_priv) >= 9)
2202                I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
2203
2204        /*
2205         * To support 64K PTEs we need to first enable the use of the
2206         * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
2207         * mmio, otherwise the page-walker will simply ignore the IPS bit. This
2208         * shouldn't be needed after GEN10.
2209         *
2210         * 64K pages were first introduced from BDW+, although technically they
2211         * only *work* from gen9+. For pre-BDW we instead have the option for
2212         * 32K pages, but we don't currently have any support for it in our
2213         * driver.
2214         */
2215        if (HAS_PAGE_SIZES(dev_priv, I915_GTT_PAGE_SIZE_64K) &&
2216            INTEL_GEN(dev_priv) <= 10)
2217                I915_WRITE(GEN8_GAMW_ECO_DEV_RW_IA,
2218                           I915_READ(GEN8_GAMW_ECO_DEV_RW_IA) |
2219                           GAMW_ECO_ENABLE_64K_IPS_FIELD);
2220}
2221
2222int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv)
2223{
2224        gtt_write_workarounds(dev_priv);
2225
2226        if (IS_GEN(dev_priv, 6))
2227                gen6_ppgtt_enable(dev_priv);
2228        else if (IS_GEN(dev_priv, 7))
2229                gen7_ppgtt_enable(dev_priv);
2230
2231        return 0;
2232}
2233
2234static struct i915_ppgtt *
2235__ppgtt_create(struct drm_i915_private *i915)
2236{
2237        if (INTEL_GEN(i915) < 8)
2238                return gen6_ppgtt_create(i915);
2239        else
2240                return gen8_ppgtt_create(i915);
2241}
2242
2243struct i915_ppgtt *
2244i915_ppgtt_create(struct drm_i915_private *i915)
2245{
2246        struct i915_ppgtt *ppgtt;
2247
2248        ppgtt = __ppgtt_create(i915);
2249        if (IS_ERR(ppgtt))
2250                return ppgtt;
2251
2252        trace_i915_ppgtt_create(&ppgtt->vm);
2253
2254        return ppgtt;
2255}
2256
2257static void ppgtt_destroy_vma(struct i915_address_space *vm)
2258{
2259        struct list_head *phases[] = {
2260                &vm->bound_list,
2261                &vm->unbound_list,
2262                NULL,
2263        }, **phase;
2264
2265        vm->closed = true;
2266        for (phase = phases; *phase; phase++) {
2267                struct i915_vma *vma, *vn;
2268
2269                list_for_each_entry_safe(vma, vn, *phase, vm_link)
2270                        i915_vma_destroy(vma);
2271        }
2272}
2273
2274void i915_vm_release(struct kref *kref)
2275{
2276        struct i915_address_space *vm =
2277                container_of(kref, struct i915_address_space, ref);
2278
2279        GEM_BUG_ON(i915_is_ggtt(vm));
2280        trace_i915_ppgtt_release(vm);
2281
2282        ppgtt_destroy_vma(vm);
2283
2284        GEM_BUG_ON(!list_empty(&vm->bound_list));
2285        GEM_BUG_ON(!list_empty(&vm->unbound_list));
2286
2287        vm->cleanup(vm);
2288        i915_address_space_fini(vm);
2289
2290        kfree(vm);
2291}
2292
2293/* Certain Gen5 chipsets require require idling the GPU before
2294 * unmapping anything from the GTT when VT-d is enabled.
2295 */
2296static bool needs_idle_maps(struct drm_i915_private *dev_priv)
2297{
2298        /* Query intel_iommu to see if we need the workaround. Presumably that
2299         * was loaded first.
2300         */
2301        return IS_GEN(dev_priv, 5) && IS_MOBILE(dev_priv) && intel_vtd_active();
2302}
2303
2304void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv)
2305{
2306        struct i915_ggtt *ggtt = &dev_priv->ggtt;
2307
2308        /* Don't bother messing with faults pre GEN6 as we have little
2309         * documentation supporting that it's a good idea.
2310         */
2311        if (INTEL_GEN(dev_priv) < 6)
2312                return;
2313
2314        i915_check_and_clear_faults(dev_priv);
2315
2316        ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
2317
2318        i915_ggtt_invalidate(dev_priv);
2319}
2320
2321int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
2322                               struct sg_table *pages)
2323{
2324        do {
2325                if (dma_map_sg_attrs(&obj->base.dev->pdev->dev,
2326                                     pages->sgl, pages->nents,
2327                                     PCI_DMA_BIDIRECTIONAL,
2328                                     DMA_ATTR_NO_WARN))
2329                        return 0;
2330
2331                /*
2332                 * If the DMA remap fails, one cause can be that we have
2333                 * too many objects pinned in a small remapping table,
2334                 * such as swiotlb. Incrementally purge all other objects and
2335                 * try again - if there are no more pages to remove from
2336                 * the DMA remapper, i915_gem_shrink will return 0.
2337                 */
2338                GEM_BUG_ON(obj->mm.pages == pages);
2339        } while (i915_gem_shrink(to_i915(obj->base.dev),
2340                                 obj->base.size >> PAGE_SHIFT, NULL,
2341                                 I915_SHRINK_BOUND |
2342                                 I915_SHRINK_UNBOUND));
2343
2344        return -ENOSPC;
2345}
2346
2347static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
2348{
2349        writeq(pte, addr);
2350}
2351
2352static void gen8_ggtt_insert_page(struct i915_address_space *vm,
2353                                  dma_addr_t addr,
2354                                  u64 offset,
2355                                  enum i915_cache_level level,
2356                                  u32 unused)
2357{
2358        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2359        gen8_pte_t __iomem *pte =
2360                (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
2361
2362        gen8_set_pte(pte, gen8_pte_encode(addr, level, 0));
2363
2364        ggtt->invalidate(vm->i915);
2365}
2366
2367static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2368                                     struct i915_vma *vma,
2369                                     enum i915_cache_level level,
2370                                     u32 flags)
2371{
2372        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2373        struct sgt_iter sgt_iter;
2374        gen8_pte_t __iomem *gtt_entries;
2375        const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
2376        dma_addr_t addr;
2377
2378        /*
2379         * Note that we ignore PTE_READ_ONLY here. The caller must be careful
2380         * not to allow the user to override access to a read only page.
2381         */
2382
2383        gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
2384        gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE;
2385        for_each_sgt_dma(addr, sgt_iter, vma->pages)
2386                gen8_set_pte(gtt_entries++, pte_encode | addr);
2387
2388        /*
2389         * We want to flush the TLBs only after we're certain all the PTE
2390         * updates have finished.
2391         */
2392        ggtt->invalidate(vm->i915);
2393}
2394
2395static void gen6_ggtt_insert_page(struct i915_address_space *vm,
2396                                  dma_addr_t addr,
2397                                  u64 offset,
2398                                  enum i915_cache_level level,
2399                                  u32 flags)
2400{
2401        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2402        gen6_pte_t __iomem *pte =
2403                (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
2404
2405        iowrite32(vm->pte_encode(addr, level, flags), pte);
2406
2407        ggtt->invalidate(vm->i915);
2408}
2409
2410/*
2411 * Binds an object into the global gtt with the specified cache level. The object
2412 * will be accessible to the GPU via commands whose operands reference offsets
2413 * within the global GTT as well as accessible by the GPU through the GMADR
2414 * mapped BAR (dev_priv->mm.gtt->gtt).
2415 */
2416static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2417                                     struct i915_vma *vma,
2418                                     enum i915_cache_level level,
2419                                     u32 flags)
2420{
2421        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2422        gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
2423        unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE;
2424        struct sgt_iter iter;
2425        dma_addr_t addr;
2426        for_each_sgt_dma(addr, iter, vma->pages)
2427                iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
2428
2429        /*
2430         * We want to flush the TLBs only after we're certain all the PTE
2431         * updates have finished.
2432         */
2433        ggtt->invalidate(vm->i915);
2434}
2435
2436static void nop_clear_range(struct i915_address_space *vm,
2437                            u64 start, u64 length)
2438{
2439}
2440
2441static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2442                                  u64 start, u64 length)
2443{
2444        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2445        unsigned first_entry = start / I915_GTT_PAGE_SIZE;
2446        unsigned num_entries = length / I915_GTT_PAGE_SIZE;
2447        const gen8_pte_t scratch_pte = vm->scratch_pte;
2448        gen8_pte_t __iomem *gtt_base =
2449                (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
2450        const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2451        int i;
2452
2453        if (WARN(num_entries > max_entries,
2454                 "First entry = %d; Num entries = %d (max=%d)\n",
2455                 first_entry, num_entries, max_entries))
2456                num_entries = max_entries;
2457
2458        for (i = 0; i < num_entries; i++)
2459                gen8_set_pte(&gtt_base[i], scratch_pte);
2460}
2461
2462static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
2463{
2464        struct drm_i915_private *dev_priv = vm->i915;
2465
2466        /*
2467         * Make sure the internal GAM fifo has been cleared of all GTT
2468         * writes before exiting stop_machine(). This guarantees that
2469         * any aperture accesses waiting to start in another process
2470         * cannot back up behind the GTT writes causing a hang.
2471         * The register can be any arbitrary GAM register.
2472         */
2473        POSTING_READ(GFX_FLSH_CNTL_GEN6);
2474}
2475
2476struct insert_page {
2477        struct i915_address_space *vm;
2478        dma_addr_t addr;
2479        u64 offset;
2480        enum i915_cache_level level;
2481};
2482
2483static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
2484{
2485        struct insert_page *arg = _arg;
2486
2487        gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
2488        bxt_vtd_ggtt_wa(arg->vm);
2489
2490        return 0;
2491}
2492
2493static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
2494                                          dma_addr_t addr,
2495                                          u64 offset,
2496                                          enum i915_cache_level level,
2497                                          u32 unused)
2498{
2499        struct insert_page arg = { vm, addr, offset, level };
2500
2501        stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
2502}
2503
2504struct insert_entries {
2505        struct i915_address_space *vm;
2506        struct i915_vma *vma;
2507        enum i915_cache_level level;
2508        u32 flags;
2509};
2510
2511static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
2512{
2513        struct insert_entries *arg = _arg;
2514
2515        gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags);
2516        bxt_vtd_ggtt_wa(arg->vm);
2517
2518        return 0;
2519}
2520
2521static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
2522                                             struct i915_vma *vma,
2523                                             enum i915_cache_level level,
2524                                             u32 flags)
2525{
2526        struct insert_entries arg = { vm, vma, level, flags };
2527
2528        stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
2529}
2530
2531struct clear_range {
2532        struct i915_address_space *vm;
2533        u64 start;
2534        u64 length;
2535};
2536
2537static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
2538{
2539        struct clear_range *arg = _arg;
2540
2541        gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
2542        bxt_vtd_ggtt_wa(arg->vm);
2543
2544        return 0;
2545}
2546
2547static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
2548                                          u64 start,
2549                                          u64 length)
2550{
2551        struct clear_range arg = { vm, start, length };
2552
2553        stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
2554}
2555
2556static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2557                                  u64 start, u64 length)
2558{
2559        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2560        unsigned first_entry = start / I915_GTT_PAGE_SIZE;
2561        unsigned num_entries = length / I915_GTT_PAGE_SIZE;
2562        gen6_pte_t scratch_pte, __iomem *gtt_base =
2563                (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
2564        const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2565        int i;
2566
2567        if (WARN(num_entries > max_entries,
2568                 "First entry = %d; Num entries = %d (max=%d)\n",
2569                 first_entry, num_entries, max_entries))
2570                num_entries = max_entries;
2571
2572        scratch_pte = vm->scratch_pte;
2573
2574        for (i = 0; i < num_entries; i++)
2575                iowrite32(scratch_pte, &gtt_base[i]);
2576}
2577
2578static void i915_ggtt_insert_page(struct i915_address_space *vm,
2579                                  dma_addr_t addr,
2580                                  u64 offset,
2581                                  enum i915_cache_level cache_level,
2582                                  u32 unused)
2583{
2584        unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2585                AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2586
2587        intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
2588}
2589
2590static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2591                                     struct i915_vma *vma,
2592                                     enum i915_cache_level cache_level,
2593                                     u32 unused)
2594{
2595        unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2596                AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2597
2598        intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
2599                                    flags);
2600}
2601
2602static void i915_ggtt_clear_range(struct i915_address_space *vm,
2603                                  u64 start, u64 length)
2604{
2605        intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
2606}
2607
2608static int ggtt_bind_vma(struct i915_vma *vma,
2609                         enum i915_cache_level cache_level,
2610                         u32 flags)
2611{
2612        struct drm_i915_private *i915 = vma->vm->i915;
2613        struct drm_i915_gem_object *obj = vma->obj;
2614        intel_wakeref_t wakeref;
2615        u32 pte_flags;
2616
2617        /* Applicable to VLV (gen8+ do not support RO in the GGTT) */
2618        pte_flags = 0;
2619        if (i915_gem_object_is_readonly(obj))
2620                pte_flags |= PTE_READ_ONLY;
2621
2622        with_intel_runtime_pm(&i915->runtime_pm, wakeref)
2623                vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
2624
2625        vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
2626
2627        /*
2628         * Without aliasing PPGTT there's no difference between
2629         * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2630         * upgrade to both bound if we bind either to avoid double-binding.
2631         */
2632        vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
2633
2634        return 0;
2635}
2636
2637static void ggtt_unbind_vma(struct i915_vma *vma)
2638{
2639        struct drm_i915_private *i915 = vma->vm->i915;
2640        intel_wakeref_t wakeref;
2641
2642        with_intel_runtime_pm(&i915->runtime_pm, wakeref)
2643                vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
2644}
2645
2646static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2647                                 enum i915_cache_level cache_level,
2648                                 u32 flags)
2649{
2650        struct drm_i915_private *i915 = vma->vm->i915;
2651        u32 pte_flags;
2652        int ret;
2653
2654        /* Currently applicable only to VLV */
2655        pte_flags = 0;
2656        if (i915_gem_object_is_readonly(vma->obj))
2657                pte_flags |= PTE_READ_ONLY;
2658
2659        if (flags & I915_VMA_LOCAL_BIND) {
2660                struct i915_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
2661
2662                if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
2663                        ret = appgtt->vm.allocate_va_range(&appgtt->vm,
2664                                                           vma->node.start,
2665                                                           vma->size);
2666                        if (ret)
2667                                return ret;
2668                }
2669
2670                appgtt->vm.insert_entries(&appgtt->vm, vma, cache_level,
2671                                          pte_flags);
2672        }
2673
2674        if (flags & I915_VMA_GLOBAL_BIND) {
2675                intel_wakeref_t wakeref;
2676
2677                with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
2678                        vma->vm->insert_entries(vma->vm, vma,
2679                                                cache_level, pte_flags);
2680                }
2681        }
2682
2683        return 0;
2684}
2685
2686static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
2687{
2688        struct drm_i915_private *i915 = vma->vm->i915;
2689
2690        if (vma->flags & I915_VMA_GLOBAL_BIND) {
2691                struct i915_address_space *vm = vma->vm;
2692                intel_wakeref_t wakeref;
2693
2694                with_intel_runtime_pm(&i915->runtime_pm, wakeref)
2695                        vm->clear_range(vm, vma->node.start, vma->size);
2696        }
2697
2698        if (vma->flags & I915_VMA_LOCAL_BIND) {
2699                struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->vm;
2700
2701                vm->clear_range(vm, vma->node.start, vma->size);
2702        }
2703}
2704
2705void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
2706                               struct sg_table *pages)
2707{
2708        struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2709        struct device *kdev = &dev_priv->drm.pdev->dev;
2710        struct i915_ggtt *ggtt = &dev_priv->ggtt;
2711
2712        if (unlikely(ggtt->do_idle_maps)) {
2713                if (i915_gem_wait_for_idle(dev_priv, 0, MAX_SCHEDULE_TIMEOUT)) {
2714                        DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
2715                        /* Wait a bit, in hopes it avoids the hang */
2716                        udelay(10);
2717                }
2718        }
2719
2720        dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL);
2721}
2722
2723static int ggtt_set_pages(struct i915_vma *vma)
2724{
2725        int ret;
2726
2727        GEM_BUG_ON(vma->pages);
2728
2729        ret = i915_get_ggtt_vma_pages(vma);
2730        if (ret)
2731                return ret;
2732
2733        vma->page_sizes = vma->obj->mm.page_sizes;
2734
2735        return 0;
2736}
2737
2738static void i915_gtt_color_adjust(const struct drm_mm_node *node,
2739                                  unsigned long color,
2740                                  u64 *start,
2741                                  u64 *end)
2742{
2743        if (node->allocated && node->color != color)
2744                *start += I915_GTT_PAGE_SIZE;
2745
2746        /* Also leave a space between the unallocated reserved node after the
2747         * GTT and any objects within the GTT, i.e. we use the color adjustment
2748         * to insert a guard page to prevent prefetches crossing over the
2749         * GTT boundary.
2750         */
2751        node = list_next_entry(node, node_list);
2752        if (node->color != color)
2753                *end -= I915_GTT_PAGE_SIZE;
2754}
2755
2756static int init_aliasing_ppgtt(struct drm_i915_private *i915)
2757{
2758        struct i915_ggtt *ggtt = &i915->ggtt;
2759        struct i915_ppgtt *ppgtt;
2760        int err;
2761
2762        ppgtt = i915_ppgtt_create(i915);
2763        if (IS_ERR(ppgtt))
2764                return PTR_ERR(ppgtt);
2765
2766        if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) {
2767                err = -ENODEV;
2768                goto err_ppgtt;
2769        }
2770
2771        /*
2772         * Note we only pre-allocate as far as the end of the global
2773         * GTT. On 48b / 4-level page-tables, the difference is very,
2774         * very significant! We have to preallocate as GVT/vgpu does
2775         * not like the page directory disappearing.
2776         */
2777        err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total);
2778        if (err)
2779                goto err_ppgtt;
2780
2781        i915->mm.aliasing_ppgtt = ppgtt;
2782
2783        GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
2784        ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
2785
2786        GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
2787        ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
2788
2789        return 0;
2790
2791err_ppgtt:
2792        i915_vm_put(&ppgtt->vm);
2793        return err;
2794}
2795
2796static void fini_aliasing_ppgtt(struct drm_i915_private *i915)
2797{
2798        struct i915_ggtt *ggtt = &i915->ggtt;
2799        struct i915_ppgtt *ppgtt;
2800
2801        ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt);
2802        if (!ppgtt)
2803                return;
2804
2805        i915_vm_put(&ppgtt->vm);
2806
2807        ggtt->vm.vma_ops.bind_vma   = ggtt_bind_vma;
2808        ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
2809}
2810
2811static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
2812{
2813        u64 size;
2814        int ret;
2815
2816        if (!USES_GUC(ggtt->vm.i915))
2817                return 0;
2818
2819        GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
2820        size = ggtt->vm.total - GUC_GGTT_TOP;
2821
2822        ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size,
2823                                   GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
2824                                   PIN_NOEVICT);
2825        if (ret)
2826                DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n");
2827
2828        return ret;
2829}
2830
2831static void ggtt_release_guc_top(struct i915_ggtt *ggtt)
2832{
2833        if (drm_mm_node_allocated(&ggtt->uc_fw))
2834                drm_mm_remove_node(&ggtt->uc_fw);
2835}
2836
2837int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
2838{
2839        /* Let GEM Manage all of the aperture.
2840         *
2841         * However, leave one page at the end still bound to the scratch page.
2842         * There are a number of places where the hardware apparently prefetches
2843         * past the end of the object, and we've seen multiple hangs with the
2844         * GPU head pointer stuck in a batchbuffer bound at the last page of the
2845         * aperture.  One page should be enough to keep any prefetching inside
2846         * of the aperture.
2847         */
2848        struct i915_ggtt *ggtt = &dev_priv->ggtt;
2849        unsigned long hole_start, hole_end;
2850        struct drm_mm_node *entry;
2851        int ret;
2852
2853        /*
2854         * GuC requires all resources that we're sharing with it to be placed in
2855         * non-WOPCM memory. If GuC is not present or not in use we still need a
2856         * small bias as ring wraparound at offset 0 sometimes hangs. No idea
2857         * why.
2858         */
2859        ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE,
2860                               intel_wopcm_guc_size(&dev_priv->wopcm));
2861
2862        ret = intel_vgt_balloon(dev_priv);
2863        if (ret)
2864                return ret;
2865
2866        /* Reserve a mappable slot for our lockless error capture */
2867        ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, &ggtt->error_capture,
2868                                          PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
2869                                          0, ggtt->mappable_end,
2870                                          DRM_MM_INSERT_LOW);
2871        if (ret)
2872                return ret;
2873
2874        /*
2875         * The upper portion of the GuC address space has a sizeable hole
2876         * (several MB) that is inaccessible by GuC. Reserve this range within
2877         * GGTT as it can comfortably hold GuC/HuC firmware images.
2878         */
2879        ret = ggtt_reserve_guc_top(ggtt);
2880        if (ret)
2881                goto err_reserve;
2882
2883        /* Clear any non-preallocated blocks */
2884        drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
2885                DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2886                              hole_start, hole_end);
2887                ggtt->vm.clear_range(&ggtt->vm, hole_start,
2888                                     hole_end - hole_start);
2889        }
2890
2891        /* And finally clear the reserved guard page */
2892        ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
2893
2894        if (INTEL_PPGTT(dev_priv) == INTEL_PPGTT_ALIASING) {
2895                ret = init_aliasing_ppgtt(dev_priv);
2896                if (ret)
2897                        goto err_appgtt;
2898        }
2899
2900        return 0;
2901
2902err_appgtt:
2903        ggtt_release_guc_top(ggtt);
2904err_reserve:
2905        drm_mm_remove_node(&ggtt->error_capture);
2906        return ret;
2907}
2908
2909/**
2910 * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization
2911 * @dev_priv: i915 device
2912 */
2913void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
2914{
2915        struct i915_ggtt *ggtt = &dev_priv->ggtt;
2916        struct i915_vma *vma, *vn;
2917        struct pagevec *pvec;
2918
2919        ggtt->vm.closed = true;
2920
2921        mutex_lock(&dev_priv->drm.struct_mutex);
2922        fini_aliasing_ppgtt(dev_priv);
2923
2924        list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
2925                WARN_ON(i915_vma_unbind(vma));
2926
2927        if (drm_mm_node_allocated(&ggtt->error_capture))
2928                drm_mm_remove_node(&ggtt->error_capture);
2929
2930        ggtt_release_guc_top(ggtt);
2931
2932        if (drm_mm_initialized(&ggtt->vm.mm)) {
2933                intel_vgt_deballoon(dev_priv);
2934                i915_address_space_fini(&ggtt->vm);
2935        }
2936
2937        ggtt->vm.cleanup(&ggtt->vm);
2938
2939        pvec = &dev_priv->mm.wc_stash.pvec;
2940        if (pvec->nr) {
2941                set_pages_array_wb(pvec->pages, pvec->nr);
2942                __pagevec_release(pvec);
2943        }
2944
2945        mutex_unlock(&dev_priv->drm.struct_mutex);
2946
2947        arch_phys_wc_del(ggtt->mtrr);
2948        io_mapping_fini(&ggtt->iomap);
2949
2950        i915_gem_cleanup_stolen(dev_priv);
2951}
2952
2953static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2954{
2955        snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2956        snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2957        return snb_gmch_ctl << 20;
2958}
2959
2960static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2961{
2962        bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2963        bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2964        if (bdw_gmch_ctl)
2965                bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2966
2967#ifdef CONFIG_X86_32
2968        /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
2969        if (bdw_gmch_ctl > 4)
2970                bdw_gmch_ctl = 4;
2971#endif
2972
2973        return bdw_gmch_ctl << 20;
2974}
2975
2976static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
2977{
2978        gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
2979        gmch_ctrl &= SNB_GMCH_GGMS_MASK;
2980
2981        if (gmch_ctrl)
2982                return 1 << (20 + gmch_ctrl);
2983
2984        return 0;
2985}
2986
2987static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
2988{
2989        struct drm_i915_private *dev_priv = ggtt->vm.i915;
2990        struct pci_dev *pdev = dev_priv->drm.pdev;
2991        phys_addr_t phys_addr;
2992        int ret;
2993
2994        /* For Modern GENs the PTEs and register space are split in the BAR */
2995        phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
2996
2997        /*
2998         * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range
2999         * will be dropped. For WC mappings in general we have 64 byte burst
3000         * writes when the WC buffer is flushed, so we can't use it, but have to
3001         * resort to an uncached mapping. The WC issue is easily caught by the
3002         * readback check when writing GTT PTE entries.
3003         */
3004        if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10)
3005                ggtt->gsm = ioremap_nocache(phys_addr, size);
3006        else
3007                ggtt->gsm = ioremap_wc(phys_addr, size);
3008        if (!ggtt->gsm) {
3009                DRM_ERROR("Failed to map the ggtt page table\n");
3010                return -ENOMEM;
3011        }
3012
3013        ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
3014        if (ret) {
3015                DRM_ERROR("Scratch setup failed\n");
3016                /* iounmap will also get called at remove, but meh */
3017                iounmap(ggtt->gsm);
3018                return ret;
3019        }
3020
3021        ggtt->vm.scratch_pte =
3022                ggtt->vm.pte_encode(ggtt->vm.scratch_page.daddr,
3023                                    I915_CACHE_NONE, 0);
3024
3025        return 0;
3026}
3027
3028static struct intel_ppat_entry *
3029__alloc_ppat_entry(struct intel_ppat *ppat, unsigned int index, u8 value)
3030{
3031        struct intel_ppat_entry *entry = &ppat->entries[index];
3032
3033        GEM_BUG_ON(index >= ppat->max_entries);
3034        GEM_BUG_ON(test_bit(index, ppat->used));
3035
3036        entry->ppat = ppat;
3037        entry->value = value;
3038        kref_init(&entry->ref);
3039        set_bit(index, ppat->used);
3040        set_bit(index, ppat->dirty);
3041
3042        return entry;
3043}
3044
3045static void __free_ppat_entry(struct intel_ppat_entry *entry)
3046{
3047        struct intel_ppat *ppat = entry->ppat;
3048        unsigned int index = entry - ppat->entries;
3049
3050        GEM_BUG_ON(index >= ppat->max_entries);
3051        GEM_BUG_ON(!test_bit(index, ppat->used));
3052
3053        entry->value = ppat->clear_value;
3054        clear_bit(index, ppat->used);
3055        set_bit(index, ppat->dirty);
3056}
3057
3058/**
3059 * intel_ppat_get - get a usable PPAT entry
3060 * @i915: i915 device instance
3061 * @value: the PPAT value required by the caller
3062 *
3063 * The function tries to search if there is an existing PPAT entry which
3064 * matches with the required value. If perfectly matched, the existing PPAT
3065 * entry will be used. If only partially matched, it will try to check if
3066 * there is any available PPAT index. If yes, it will allocate a new PPAT
3067 * index for the required entry and update the HW. If not, the partially
3068 * matched entry will be used.
3069 */
3070const struct intel_ppat_entry *
3071intel_ppat_get(struct drm_i915_private *i915, u8 value)
3072{
3073        struct intel_ppat *ppat = &i915->ppat;
3074        struct intel_ppat_entry *entry = NULL;
3075        unsigned int scanned, best_score;
3076        int i;
3077
3078        GEM_BUG_ON(!ppat->max_entries);
3079
3080        scanned = best_score = 0;
3081        for_each_set_bit(i, ppat->used, ppat->max_entries) {
3082                unsigned int score;
3083
3084                score = ppat->match(ppat->entries[i].value, value);
3085                if (score > best_score) {
3086                        entry = &ppat->entries[i];
3087                        if (score == INTEL_PPAT_PERFECT_MATCH) {
3088                                kref_get(&entry->ref);
3089                                return entry;
3090                        }
3091                        best_score = score;
3092                }
3093                scanned++;
3094        }
3095
3096        if (scanned == ppat->max_entries) {
3097                if (!entry)
3098                        return ERR_PTR(-ENOSPC);
3099
3100                kref_get(&entry->ref);
3101                return entry;
3102        }
3103
3104        i = find_first_zero_bit(ppat->used, ppat->max_entries);
3105        entry = __alloc_ppat_entry(ppat, i, value);
3106        ppat->update_hw(i915);
3107        return entry;
3108}
3109
3110static void release_ppat(struct kref *kref)
3111{
3112        struct intel_ppat_entry *entry =
3113                container_of(kref, struct intel_ppat_entry, ref);
3114        struct drm_i915_private *i915 = entry->ppat->i915;
3115
3116        __free_ppat_entry(entry);
3117        entry->ppat->update_hw(i915);
3118}
3119
3120/**
3121 * intel_ppat_put - put back the PPAT entry got from intel_ppat_get()
3122 * @entry: an intel PPAT entry
3123 *
3124 * Put back the PPAT entry got from intel_ppat_get(). If the PPAT index of the
3125 * entry is dynamically allocated, its reference count will be decreased. Once
3126 * the reference count becomes into zero, the PPAT index becomes free again.
3127 */
3128void intel_ppat_put(const struct intel_ppat_entry *entry)
3129{
3130        struct intel_ppat *ppat = entry->ppat;
3131        unsigned int index = entry - ppat->entries;
3132
3133        GEM_BUG_ON(!ppat->max_entries);
3134
3135        kref_put(&ppat->entries[index].ref, release_ppat);
3136}
3137
3138static void cnl_private_pat_update_hw(struct drm_i915_private *dev_priv)
3139{
3140        struct intel_ppat *ppat = &dev_priv->ppat;
3141        int i;
3142
3143        for_each_set_bit(i, ppat->dirty, ppat->max_entries) {
3144                I915_WRITE(GEN10_PAT_INDEX(i), ppat->entries[i].value);
3145                clear_bit(i, ppat->dirty);
3146        }
3147}
3148
3149static void bdw_private_pat_update_hw(struct drm_i915_private *dev_priv)
3150{
3151        struct intel_ppat *ppat = &dev_priv->ppat;
3152        u64 pat = 0;
3153        int i;
3154
3155        for (i = 0; i < ppat->max_entries; i++)
3156                pat |= GEN8_PPAT(i, ppat->entries[i].value);
3157
3158        bitmap_clear(ppat->dirty, 0, ppat->max_entries);
3159
3160        I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
3161        I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
3162}
3163
3164static unsigned int bdw_private_pat_match(u8 src, u8 dst)
3165{
3166        unsigned int score = 0;
3167        enum {
3168                AGE_MATCH = BIT(0),
3169                TC_MATCH = BIT(1),
3170                CA_MATCH = BIT(2),
3171        };
3172
3173        /* Cache attribute has to be matched. */
3174        if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst))
3175                return 0;
3176
3177        score |= CA_MATCH;
3178
3179        if (GEN8_PPAT_GET_TC(src) == GEN8_PPAT_GET_TC(dst))
3180                score |= TC_MATCH;
3181
3182        if (GEN8_PPAT_GET_AGE(src) == GEN8_PPAT_GET_AGE(dst))
3183                score |= AGE_MATCH;
3184
3185        if (score == (AGE_MATCH | TC_MATCH | CA_MATCH))
3186                return INTEL_PPAT_PERFECT_MATCH;
3187
3188        return score;
3189}
3190
3191static unsigned int chv_private_pat_match(u8 src, u8 dst)
3192{
3193        return (CHV_PPAT_GET_SNOOP(src) == CHV_PPAT_GET_SNOOP(dst)) ?
3194                INTEL_PPAT_PERFECT_MATCH : 0;
3195}
3196
3197static void cnl_setup_private_ppat(struct intel_ppat *ppat)
3198{
3199        ppat->max_entries = 8;
3200        ppat->update_hw = cnl_private_pat_update_hw;
3201        ppat->match = bdw_private_pat_match;
3202        ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
3203
3204        __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);
3205        __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
3206        __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
3207        __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);
3208        __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
3209        __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
3210        __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
3211        __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3212}
3213
3214/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
3215 * bits. When using advanced contexts each context stores its own PAT, but
3216 * writing this data shouldn't be harmful even in those cases. */
3217static void bdw_setup_private_ppat(struct intel_ppat *ppat)
3218{
3219        ppat->max_entries = 8;
3220        ppat->update_hw = bdw_private_pat_update_hw;
3221        ppat->match = bdw_private_pat_match;
3222        ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
3223
3224        if (!HAS_PPGTT(ppat->i915)) {
3225                /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
3226                 * so RTL will always use the value corresponding to
3227                 * pat_sel = 000".
3228                 * So let's disable cache for GGTT to avoid screen corruptions.
3229                 * MOCS still can be used though.
3230                 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
3231                 * before this patch, i.e. the same uncached + snooping access
3232                 * like on gen6/7 seems to be in effect.
3233                 * - So this just fixes blitter/render access. Again it looks
3234                 * like it's not just uncached access, but uncached + snooping.
3235                 * So we can still hold onto all our assumptions wrt cpu
3236                 * clflushing on LLC machines.
3237                 */
3238                __alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC);
3239                return;
3240        }
3241
3242        __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);      /* for normal objects, no eLLC */
3243        __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);  /* for something pointing to ptes? */
3244        __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);  /* for scanout with eLLC */
3245        __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);                      /* Uncached objects, mostly for scanout */
3246        __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
3247        __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
3248        __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
3249        __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3250}
3251
3252static void chv_setup_private_ppat(struct intel_ppat *ppat)
3253{
3254        ppat->max_entries = 8;
3255        ppat->update_hw = bdw_private_pat_update_hw;
3256        ppat->match = chv_private_pat_match;
3257        ppat->clear_value = CHV_PPAT_SNOOP;
3258
3259        /*
3260         * Map WB on BDW to snooped on CHV.
3261         *
3262         * Only the snoop bit has meaning for CHV, the rest is
3263         * ignored.
3264         *
3265         * The hardware will never snoop for certain types of accesses:
3266         * - CPU GTT (GMADR->GGTT->no snoop->memory)
3267         * - PPGTT page tables
3268         * - some other special cycles
3269         *
3270         * As with BDW, we also need to consider the following for GT accesses:
3271         * "For GGTT, there is NO pat_sel[2:0] from the entry,
3272         * so RTL will always use the value corresponding to
3273         * pat_sel = 000".
3274         * Which means we must set the snoop bit in PAT entry 0
3275         * in order to keep the global status page working.
3276         */
3277
3278        __alloc_ppat_entry(ppat, 0, CHV_PPAT_SNOOP);
3279        __alloc_ppat_entry(ppat, 1, 0);
3280        __alloc_ppat_entry(ppat, 2, 0);
3281        __alloc_ppat_entry(ppat, 3, 0);
3282        __alloc_ppat_entry(ppat, 4, CHV_PPAT_SNOOP);
3283        __alloc_ppat_entry(ppat, 5, CHV_PPAT_SNOOP);
3284        __alloc_ppat_entry(ppat, 6, CHV_PPAT_SNOOP);
3285        __alloc_ppat_entry(ppat, 7, CHV_PPAT_SNOOP);
3286}
3287
3288static void gen6_gmch_remove(struct i915_address_space *vm)
3289{
3290        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
3291
3292        iounmap(ggtt->gsm);
3293        cleanup_scratch_page(vm);
3294}
3295
3296static void setup_private_pat(struct drm_i915_private *dev_priv)
3297{
3298        struct intel_ppat *ppat = &dev_priv->ppat;
3299        int i;
3300
3301        ppat->i915 = dev_priv;
3302
3303        if (INTEL_GEN(dev_priv) >= 10)
3304                cnl_setup_private_ppat(ppat);
3305        else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
3306                chv_setup_private_ppat(ppat);
3307        else
3308                bdw_setup_private_ppat(ppat);
3309
3310        GEM_BUG_ON(ppat->max_entries > INTEL_MAX_PPAT_ENTRIES);
3311
3312        for_each_clear_bit(i, ppat->used, ppat->max_entries) {
3313                ppat->entries[i].value = ppat->clear_value;
3314                ppat->entries[i].ppat = ppat;
3315                set_bit(i, ppat->dirty);
3316        }
3317
3318        ppat->update_hw(dev_priv);
3319}
3320
3321static int gen8_gmch_probe(struct i915_ggtt *ggtt)
3322{
3323        struct drm_i915_private *dev_priv = ggtt->vm.i915;
3324        struct pci_dev *pdev = dev_priv->drm.pdev;
3325        unsigned int size;
3326        u16 snb_gmch_ctl;
3327        int err;
3328
3329        /* TODO: We're not aware of mappable constraints on gen8 yet */
3330        ggtt->gmadr =
3331                (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
3332                                                 pci_resource_len(pdev, 2));
3333        ggtt->mappable_end = resource_size(&ggtt->gmadr);
3334
3335        err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
3336        if (!err)
3337                err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
3338        if (err)
3339                DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
3340
3341        pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3342        if (IS_CHERRYVIEW(dev_priv))
3343                size = chv_get_total_gtt_size(snb_gmch_ctl);
3344        else
3345                size = gen8_get_total_gtt_size(snb_gmch_ctl);
3346
3347        ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
3348        ggtt->vm.cleanup = gen6_gmch_remove;
3349        ggtt->vm.insert_page = gen8_ggtt_insert_page;
3350        ggtt->vm.clear_range = nop_clear_range;
3351        if (intel_scanout_needs_vtd_wa(dev_priv))
3352                ggtt->vm.clear_range = gen8_ggtt_clear_range;
3353
3354        ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
3355
3356        /* Serialize GTT updates with aperture access on BXT if VT-d is on. */
3357        if (intel_ggtt_update_needs_vtd_wa(dev_priv) ||
3358            IS_CHERRYVIEW(dev_priv) /* fails with concurrent use/update */) {
3359                ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
3360                ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
3361                if (ggtt->vm.clear_range != nop_clear_range)
3362                        ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
3363
3364                /* Prevent recursively calling stop_machine() and deadlocks. */
3365                dev_info(dev_priv->drm.dev,
3366                         "Disabling error capture for VT-d workaround\n");
3367                i915_disable_error_state(dev_priv, -ENODEV);
3368        }
3369
3370        ggtt->invalidate = gen6_ggtt_invalidate;
3371
3372        ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
3373        ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
3374        ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
3375        ggtt->vm.vma_ops.clear_pages = clear_pages;
3376
3377        ggtt->vm.pte_encode = gen8_pte_encode;
3378
3379        setup_private_pat(dev_priv);
3380
3381        return ggtt_probe_common(ggtt, size);
3382}
3383
3384static int gen6_gmch_probe(struct i915_ggtt *ggtt)
3385{
3386        struct drm_i915_private *dev_priv = ggtt->vm.i915;
3387        struct pci_dev *pdev = dev_priv->drm.pdev;
3388        unsigned int size;
3389        u16 snb_gmch_ctl;
3390        int err;
3391
3392        ggtt->gmadr =
3393                (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
3394                                                 pci_resource_len(pdev, 2));
3395        ggtt->mappable_end = resource_size(&ggtt->gmadr);
3396
3397        /* 64/512MB is the current min/max we actually know of, but this is just
3398         * a coarse sanity check.
3399         */
3400        if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
3401                DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end);
3402                return -ENXIO;
3403        }
3404
3405        err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
3406        if (!err)
3407                err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
3408        if (err)
3409                DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
3410        pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3411
3412        size = gen6_get_total_gtt_size(snb_gmch_ctl);
3413        ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
3414
3415        ggtt->vm.clear_range = nop_clear_range;
3416        if (!HAS_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
3417                ggtt->vm.clear_range = gen6_ggtt_clear_range;
3418        ggtt->vm.insert_page = gen6_ggtt_insert_page;
3419        ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
3420        ggtt->vm.cleanup = gen6_gmch_remove;
3421
3422        ggtt->invalidate = gen6_ggtt_invalidate;
3423
3424        if (HAS_EDRAM(dev_priv))
3425                ggtt->vm.pte_encode = iris_pte_encode;
3426        else if (IS_HASWELL(dev_priv))
3427                ggtt->vm.pte_encode = hsw_pte_encode;
3428        else if (IS_VALLEYVIEW(dev_priv))
3429                ggtt->vm.pte_encode = byt_pte_encode;
3430        else if (INTEL_GEN(dev_priv) >= 7)
3431                ggtt->vm.pte_encode = ivb_pte_encode;
3432        else
3433                ggtt->vm.pte_encode = snb_pte_encode;
3434
3435        ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
3436        ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
3437        ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
3438        ggtt->vm.vma_ops.clear_pages = clear_pages;
3439
3440        return ggtt_probe_common(ggtt, size);
3441}
3442
3443static void i915_gmch_remove(struct i915_address_space *vm)
3444{
3445        intel_gmch_remove();
3446}
3447
3448static int i915_gmch_probe(struct i915_ggtt *ggtt)
3449{
3450        struct drm_i915_private *dev_priv = ggtt->vm.i915;
3451        phys_addr_t gmadr_base;
3452        int ret;
3453
3454        ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
3455        if (!ret) {
3456                DRM_ERROR("failed to set up gmch\n");
3457                return -EIO;
3458        }
3459
3460        intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
3461
3462        ggtt->gmadr =
3463                (struct resource) DEFINE_RES_MEM(gmadr_base,
3464                                                 ggtt->mappable_end);
3465
3466        ggtt->do_idle_maps = needs_idle_maps(dev_priv);
3467        ggtt->vm.insert_page = i915_ggtt_insert_page;
3468        ggtt->vm.insert_entries = i915_ggtt_insert_entries;
3469        ggtt->vm.clear_range = i915_ggtt_clear_range;
3470        ggtt->vm.cleanup = i915_gmch_remove;
3471
3472        ggtt->invalidate = gmch_ggtt_invalidate;
3473
3474        ggtt->vm.vma_ops.bind_vma    = ggtt_bind_vma;
3475        ggtt->vm.vma_ops.unbind_vma  = ggtt_unbind_vma;
3476        ggtt->vm.vma_ops.set_pages   = ggtt_set_pages;
3477        ggtt->vm.vma_ops.clear_pages = clear_pages;
3478
3479        if (unlikely(ggtt->do_idle_maps))
3480                DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3481
3482        return 0;
3483}
3484
3485/**
3486 * i915_ggtt_probe_hw - Probe GGTT hardware location
3487 * @dev_priv: i915 device
3488 */
3489int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
3490{
3491        struct i915_ggtt *ggtt = &dev_priv->ggtt;
3492        int ret;
3493
3494        ggtt->vm.i915 = dev_priv;
3495        ggtt->vm.dma = &dev_priv->drm.pdev->dev;
3496
3497        if (INTEL_GEN(dev_priv) <= 5)
3498                ret = i915_gmch_probe(ggtt);
3499        else if (INTEL_GEN(dev_priv) < 8)
3500                ret = gen6_gmch_probe(ggtt);
3501        else
3502                ret = gen8_gmch_probe(ggtt);
3503        if (ret)
3504                return ret;
3505
3506        if ((ggtt->vm.total - 1) >> 32) {
3507                DRM_ERROR("We never expected a Global GTT with more than 32bits"
3508                          " of address space! Found %lldM!\n",
3509                          ggtt->vm.total >> 20);
3510                ggtt->vm.total = 1ULL << 32;
3511                ggtt->mappable_end =
3512                        min_t(u64, ggtt->mappable_end, ggtt->vm.total);
3513        }
3514
3515        if (ggtt->mappable_end > ggtt->vm.total) {
3516                DRM_ERROR("mappable aperture extends past end of GGTT,"
3517                          " aperture=%pa, total=%llx\n",
3518                          &ggtt->mappable_end, ggtt->vm.total);
3519                ggtt->mappable_end = ggtt->vm.total;
3520        }
3521
3522        /* GMADR is the PCI mmio aperture into the global GTT. */
3523        DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20);
3524        DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
3525        DRM_DEBUG_DRIVER("DSM size = %lluM\n",
3526                         (u64)resource_size(&intel_graphics_stolen_res) >> 20);
3527        if (intel_vtd_active())
3528                DRM_INFO("VT-d active for gfx access\n");
3529
3530        return 0;
3531}
3532
3533/**
3534 * i915_ggtt_init_hw - Initialize GGTT hardware
3535 * @dev_priv: i915 device
3536 */
3537int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
3538{
3539        struct i915_ggtt *ggtt = &dev_priv->ggtt;
3540        int ret;
3541
3542        stash_init(&dev_priv->mm.wc_stash);
3543
3544        /* Note that we use page colouring to enforce a guard page at the
3545         * end of the address space. This is required as the CS may prefetch
3546         * beyond the end of the batch buffer, across the page boundary,
3547         * and beyond the end of the GTT if we do not provide a guard.
3548         */
3549        mutex_lock(&dev_priv->drm.struct_mutex);
3550        i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
3551
3552        ggtt->vm.is_ggtt = true;
3553
3554        /* Only VLV supports read-only GGTT mappings */
3555        ggtt->vm.has_read_only = IS_VALLEYVIEW(dev_priv);
3556
3557        if (!HAS_LLC(dev_priv) && !HAS_PPGTT(dev_priv))
3558                ggtt->vm.mm.color_adjust = i915_gtt_color_adjust;
3559        mutex_unlock(&dev_priv->drm.struct_mutex);
3560
3561        if (!io_mapping_init_wc(&dev_priv->ggtt.iomap,
3562                                dev_priv->ggtt.gmadr.start,
3563                                dev_priv->ggtt.mappable_end)) {
3564                ret = -EIO;
3565                goto out_gtt_cleanup;
3566        }
3567
3568        ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end);
3569
3570        i915_ggtt_init_fences(ggtt);
3571
3572        /*
3573         * Initialise stolen early so that we may reserve preallocated
3574         * objects for the BIOS to KMS transition.
3575         */
3576        ret = i915_gem_init_stolen(dev_priv);
3577        if (ret)
3578                goto out_gtt_cleanup;
3579
3580        return 0;
3581
3582out_gtt_cleanup:
3583        ggtt->vm.cleanup(&ggtt->vm);
3584        return ret;
3585}
3586
3587int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
3588{
3589        if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt())
3590                return -EIO;
3591
3592        return 0;
3593}
3594
3595void i915_ggtt_enable_guc(struct drm_i915_private *i915)
3596{
3597        GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate);
3598
3599        i915->ggtt.invalidate = guc_ggtt_invalidate;
3600
3601        i915_ggtt_invalidate(i915);
3602}
3603
3604void i915_ggtt_disable_guc(struct drm_i915_private *i915)
3605{
3606        /* XXX Temporary pardon for error unload */
3607        if (i915->ggtt.invalidate == gen6_ggtt_invalidate)
3608                return;
3609
3610        /* We should only be called after i915_ggtt_enable_guc() */
3611        GEM_BUG_ON(i915->ggtt.invalidate != guc_ggtt_invalidate);
3612
3613        i915->ggtt.invalidate = gen6_ggtt_invalidate;
3614
3615        i915_ggtt_invalidate(i915);
3616}
3617
3618void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
3619{
3620        struct i915_ggtt *ggtt = &dev_priv->ggtt;
3621        struct i915_vma *vma, *vn;
3622
3623        i915_check_and_clear_faults(dev_priv);
3624
3625        mutex_lock(&ggtt->vm.mutex);
3626
3627        /* First fill our portion of the GTT with scratch pages */
3628        ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
3629        ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
3630
3631        /* clflush objects bound into the GGTT and rebind them. */
3632        list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
3633                struct drm_i915_gem_object *obj = vma->obj;
3634
3635                if (!(vma->flags & I915_VMA_GLOBAL_BIND))
3636                        continue;
3637
3638                mutex_unlock(&ggtt->vm.mutex);
3639
3640                if (!i915_vma_unbind(vma))
3641                        goto lock;
3642
3643                WARN_ON(i915_vma_bind(vma,
3644                                      obj ? obj->cache_level : 0,
3645                                      PIN_UPDATE));
3646                if (obj) {
3647                        i915_gem_object_lock(obj);
3648                        WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
3649                        i915_gem_object_unlock(obj);
3650                }
3651
3652lock:
3653                mutex_lock(&ggtt->vm.mutex);
3654        }
3655
3656        ggtt->vm.closed = false;
3657        i915_ggtt_invalidate(dev_priv);
3658
3659        mutex_unlock(&ggtt->vm.mutex);
3660
3661        if (INTEL_GEN(dev_priv) >= 8) {
3662                struct intel_ppat *ppat = &dev_priv->ppat;
3663
3664                bitmap_set(ppat->dirty, 0, ppat->max_entries);
3665                dev_priv->ppat.update_hw(dev_priv);
3666                return;
3667        }
3668}
3669
3670static struct scatterlist *
3671rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset,
3672             unsigned int width, unsigned int height,
3673             unsigned int stride,
3674             struct sg_table *st, struct scatterlist *sg)
3675{
3676        unsigned int column, row;
3677        unsigned int src_idx;
3678
3679        for (column = 0; column < width; column++) {
3680                src_idx = stride * (height - 1) + column + offset;
3681                for (row = 0; row < height; row++) {
3682                        st->nents++;
3683                        /* We don't need the pages, but need to initialize
3684                         * the entries so the sg list can be happily traversed.
3685                         * The only thing we need are DMA addresses.
3686                         */
3687                        sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0);
3688                        sg_dma_address(sg) =
3689                                i915_gem_object_get_dma_address(obj, src_idx);
3690                        sg_dma_len(sg) = I915_GTT_PAGE_SIZE;
3691                        sg = sg_next(sg);
3692                        src_idx -= stride;
3693                }
3694        }
3695
3696        return sg;
3697}
3698
3699static noinline struct sg_table *
3700intel_rotate_pages(struct intel_rotation_info *rot_info,
3701                   struct drm_i915_gem_object *obj)
3702{
3703        unsigned int size = intel_rotation_info_size(rot_info);
3704        struct sg_table *st;
3705        struct scatterlist *sg;
3706        int ret = -ENOMEM;
3707        int i;
3708
3709        /* Allocate target SG list. */
3710        st = kmalloc(sizeof(*st), GFP_KERNEL);
3711        if (!st)
3712                goto err_st_alloc;
3713
3714        ret = sg_alloc_table(st, size, GFP_KERNEL);
3715        if (ret)
3716                goto err_sg_alloc;
3717
3718        st->nents = 0;
3719        sg = st->sgl;
3720
3721        for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
3722                sg = rotate_pages(obj, rot_info->plane[i].offset,
3723                                  rot_info->plane[i].width, rot_info->plane[i].height,
3724                                  rot_info->plane[i].stride, st, sg);
3725        }
3726
3727        return st;
3728
3729err_sg_alloc:
3730        kfree(st);
3731err_st_alloc:
3732
3733        DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
3734                         obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
3735
3736        return ERR_PTR(ret);
3737}
3738
3739static struct scatterlist *
3740remap_pages(struct drm_i915_gem_object *obj, unsigned int offset,
3741            unsigned int width, unsigned int height,
3742            unsigned int stride,
3743            struct sg_table *st, struct scatterlist *sg)
3744{
3745        unsigned int row;
3746
3747        for (row = 0; row < height; row++) {
3748                unsigned int left = width * I915_GTT_PAGE_SIZE;
3749
3750                while (left) {
3751                        dma_addr_t addr;
3752                        unsigned int length;
3753
3754                        /* We don't need the pages, but need to initialize
3755                         * the entries so the sg list can be happily traversed.
3756                         * The only thing we need are DMA addresses.
3757                         */
3758
3759                        addr = i915_gem_object_get_dma_address_len(obj, offset, &length);
3760
3761                        length = min(left, length);
3762
3763                        st->nents++;
3764
3765                        sg_set_page(sg, NULL, length, 0);
3766                        sg_dma_address(sg) = addr;
3767                        sg_dma_len(sg) = length;
3768                        sg = sg_next(sg);
3769
3770                        offset += length / I915_GTT_PAGE_SIZE;
3771                        left -= length;
3772                }
3773
3774                offset += stride - width;
3775        }
3776
3777        return sg;
3778}
3779
3780static noinline struct sg_table *
3781intel_remap_pages(struct intel_remapped_info *rem_info,
3782                  struct drm_i915_gem_object *obj)
3783{
3784        unsigned int size = intel_remapped_info_size(rem_info);
3785        struct sg_table *st;
3786        struct scatterlist *sg;
3787        int ret = -ENOMEM;
3788        int i;
3789
3790        /* Allocate target SG list. */
3791        st = kmalloc(sizeof(*st), GFP_KERNEL);
3792        if (!st)
3793                goto err_st_alloc;
3794
3795        ret = sg_alloc_table(st, size, GFP_KERNEL);
3796        if (ret)
3797                goto err_sg_alloc;
3798
3799        st->nents = 0;
3800        sg = st->sgl;
3801
3802        for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
3803                sg = remap_pages(obj, rem_info->plane[i].offset,
3804                                 rem_info->plane[i].width, rem_info->plane[i].height,
3805                                 rem_info->plane[i].stride, st, sg);
3806        }
3807
3808        i915_sg_trim(st);
3809
3810        return st;
3811
3812err_sg_alloc:
3813        kfree(st);
3814err_st_alloc:
3815
3816        DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
3817                         obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size);
3818
3819        return ERR_PTR(ret);
3820}
3821
3822static noinline struct sg_table *
3823intel_partial_pages(const struct i915_ggtt_view *view,
3824                    struct drm_i915_gem_object *obj)
3825{
3826        struct sg_table *st;
3827        struct scatterlist *sg, *iter;
3828        unsigned int count = view->partial.size;
3829        unsigned int offset;
3830        int ret = -ENOMEM;
3831
3832        st = kmalloc(sizeof(*st), GFP_KERNEL);
3833        if (!st)
3834                goto err_st_alloc;
3835
3836        ret = sg_alloc_table(st, count, GFP_KERNEL);
3837        if (ret)
3838                goto err_sg_alloc;
3839
3840        iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
3841        GEM_BUG_ON(!iter);
3842
3843        sg = st->sgl;
3844        st->nents = 0;
3845        do {
3846                unsigned int len;
3847
3848                len = min(iter->length - (offset << PAGE_SHIFT),
3849                          count << PAGE_SHIFT);
3850                sg_set_page(sg, NULL, len, 0);
3851                sg_dma_address(sg) =
3852                        sg_dma_address(iter) + (offset << PAGE_SHIFT);
3853                sg_dma_len(sg) = len;
3854
3855                st->nents++;
3856                count -= len >> PAGE_SHIFT;
3857                if (count == 0) {
3858                        sg_mark_end(sg);
3859                        i915_sg_trim(st); /* Drop any unused tail entries. */
3860
3861                        return st;
3862                }
3863
3864                sg = __sg_next(sg);
3865                iter = __sg_next(iter);
3866                offset = 0;
3867        } while (1);
3868
3869err_sg_alloc:
3870        kfree(st);
3871err_st_alloc:
3872        return ERR_PTR(ret);
3873}
3874
3875static int
3876i915_get_ggtt_vma_pages(struct i915_vma *vma)
3877{
3878        int ret;
3879
3880        /* The vma->pages are only valid within the lifespan of the borrowed
3881         * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
3882         * must be the vma->pages. A simple rule is that vma->pages must only
3883         * be accessed when the obj->mm.pages are pinned.
3884         */
3885        GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
3886
3887        switch (vma->ggtt_view.type) {
3888        default:
3889                GEM_BUG_ON(vma->ggtt_view.type);
3890                /* fall through */
3891        case I915_GGTT_VIEW_NORMAL:
3892                vma->pages = vma->obj->mm.pages;
3893                return 0;
3894
3895        case I915_GGTT_VIEW_ROTATED:
3896                vma->pages =
3897                        intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
3898                break;
3899
3900        case I915_GGTT_VIEW_REMAPPED:
3901                vma->pages =
3902                        intel_remap_pages(&vma->ggtt_view.remapped, vma->obj);
3903                break;
3904
3905        case I915_GGTT_VIEW_PARTIAL:
3906                vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
3907                break;
3908        }
3909
3910        ret = 0;
3911        if (IS_ERR(vma->pages)) {
3912                ret = PTR_ERR(vma->pages);
3913                vma->pages = NULL;
3914                DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3915                          vma->ggtt_view.type, ret);
3916        }
3917        return ret;
3918}
3919
3920/**
3921 * i915_gem_gtt_reserve - reserve a node in an address_space (GTT)
3922 * @vm: the &struct i915_address_space
3923 * @node: the &struct drm_mm_node (typically i915_vma.mode)
3924 * @size: how much space to allocate inside the GTT,
3925 *        must be #I915_GTT_PAGE_SIZE aligned
3926 * @offset: where to insert inside the GTT,
3927 *          must be #I915_GTT_MIN_ALIGNMENT aligned, and the node
3928 *          (@offset + @size) must fit within the address space
3929 * @color: color to apply to node, if this node is not from a VMA,
3930 *         color must be #I915_COLOR_UNEVICTABLE
3931 * @flags: control search and eviction behaviour
3932 *
3933 * i915_gem_gtt_reserve() tries to insert the @node at the exact @offset inside
3934 * the address space (using @size and @color). If the @node does not fit, it
3935 * tries to evict any overlapping nodes from the GTT, including any
3936 * neighbouring nodes if the colors do not match (to ensure guard pages between
3937 * differing domains). See i915_gem_evict_for_node() for the gory details
3938 * on the eviction algorithm. #PIN_NONBLOCK may used to prevent waiting on
3939 * evicting active overlapping objects, and any overlapping node that is pinned
3940 * or marked as unevictable will also result in failure.
3941 *
3942 * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
3943 * asked to wait for eviction and interrupted.
3944 */
3945int i915_gem_gtt_reserve(struct i915_address_space *vm,
3946                         struct drm_mm_node *node,
3947                         u64 size, u64 offset, unsigned long color,
3948                         unsigned int flags)
3949{
3950        int err;
3951
3952        GEM_BUG_ON(!size);
3953        GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
3954        GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
3955        GEM_BUG_ON(range_overflows(offset, size, vm->total));
3956        GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
3957        GEM_BUG_ON(drm_mm_node_allocated(node));
3958
3959        node->size = size;
3960        node->start = offset;
3961        node->color = color;
3962
3963        err = drm_mm_reserve_node(&vm->mm, node);
3964        if (err != -ENOSPC)
3965                return err;
3966
3967        if (flags & PIN_NOEVICT)
3968                return -ENOSPC;
3969
3970        err = i915_gem_evict_for_node(vm, node, flags);
3971        if (err == 0)
3972                err = drm_mm_reserve_node(&vm->mm, node);
3973
3974        return err;
3975}
3976
3977static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
3978{
3979        u64 range, addr;
3980
3981        GEM_BUG_ON(range_overflows(start, len, end));
3982        GEM_BUG_ON(round_up(start, align) > round_down(end - len, align));
3983
3984        range = round_down(end - len, align) - round_up(start, align);
3985        if (range) {
3986                if (sizeof(unsigned long) == sizeof(u64)) {
3987                        addr = get_random_long();
3988                } else {
3989                        addr = get_random_int();
3990                        if (range > U32_MAX) {
3991                                addr <<= 32;
3992                                addr |= get_random_int();
3993                        }
3994                }
3995                div64_u64_rem(addr, range, &addr);
3996                start += addr;
3997        }
3998
3999        return round_up(start, align);
4000}
4001
4002/**
4003 * i915_gem_gtt_insert - insert a node into an address_space (GTT)
4004 * @vm: the &struct i915_address_space
4005 * @node: the &struct drm_mm_node (typically i915_vma.node)
4006 * @size: how much space to allocate inside the GTT,
4007 *        must be #I915_GTT_PAGE_SIZE aligned
4008 * @alignment: required alignment of starting offset, may be 0 but
4009 *             if specified, this must be a power-of-two and at least
4010 *             #I915_GTT_MIN_ALIGNMENT
4011 * @color: color to apply to node
4012 * @start: start of any range restriction inside GTT (0 for all),
4013 *         must be #I915_GTT_PAGE_SIZE aligned
4014 * @end: end of any range restriction inside GTT (U64_MAX for all),
4015 *       must be #I915_GTT_PAGE_SIZE aligned if not U64_MAX
4016 * @flags: control search and eviction behaviour
4017 *
4018 * i915_gem_gtt_insert() first searches for an available hole into which
4019 * is can insert the node. The hole address is aligned to @alignment and
4020 * its @size must then fit entirely within the [@start, @end] bounds. The
4021 * nodes on either side of the hole must match @color, or else a guard page
4022 * will be inserted between the two nodes (or the node evicted). If no
4023 * suitable hole is found, first a victim is randomly selected and tested
4024 * for eviction, otherwise then the LRU list of objects within the GTT
4025 * is scanned to find the first set of replacement nodes to create the hole.
4026 * Those old overlapping nodes are evicted from the GTT (and so must be
4027 * rebound before any future use). Any node that is currently pinned cannot
4028 * be evicted (see i915_vma_pin()). Similar if the node's VMA is currently
4029 * active and #PIN_NONBLOCK is specified, that node is also skipped when
4030 * searching for an eviction candidate. See i915_gem_evict_something() for
4031 * the gory details on the eviction algorithm.
4032 *
4033 * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
4034 * asked to wait for eviction and interrupted.
4035 */
4036int i915_gem_gtt_insert(struct i915_address_space *vm,
4037                        struct drm_mm_node *node,
4038                        u64 size, u64 alignment, unsigned long color,
4039                        u64 start, u64 end, unsigned int flags)
4040{
4041        enum drm_mm_insert_mode mode;
4042        u64 offset;
4043        int err;
4044
4045        lockdep_assert_held(&vm->i915->drm.struct_mutex);
4046        GEM_BUG_ON(!size);
4047        GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
4048        GEM_BUG_ON(alignment && !is_power_of_2(alignment));
4049        GEM_BUG_ON(alignment && !IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
4050        GEM_BUG_ON(start >= end);
4051        GEM_BUG_ON(start > 0  && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
4052        GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
4053        GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
4054        GEM_BUG_ON(drm_mm_node_allocated(node));
4055
4056        if (unlikely(range_overflows(start, size, end)))
4057                return -ENOSPC;
4058
4059        if (unlikely(round_up(start, alignment) > round_down(end - size, alignment)))
4060                return -ENOSPC;
4061
4062        mode = DRM_MM_INSERT_BEST;
4063        if (flags & PIN_HIGH)
4064                mode = DRM_MM_INSERT_HIGHEST;
4065        if (flags & PIN_MAPPABLE)
4066                mode = DRM_MM_INSERT_LOW;
4067
4068        /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
4069         * so we know that we always have a minimum alignment of 4096.
4070         * The drm_mm range manager is optimised to return results
4071         * with zero alignment, so where possible use the optimal
4072         * path.
4073         */
4074        BUILD_BUG_ON(I915_GTT_MIN_ALIGNMENT > I915_GTT_PAGE_SIZE);
4075        if (alignment <= I915_GTT_MIN_ALIGNMENT)
4076                alignment = 0;
4077
4078        err = drm_mm_insert_node_in_range(&vm->mm, node,
4079                                          size, alignment, color,
4080                                          start, end, mode);
4081        if (err != -ENOSPC)
4082                return err;
4083
4084        if (mode & DRM_MM_INSERT_ONCE) {
4085                err = drm_mm_insert_node_in_range(&vm->mm, node,
4086                                                  size, alignment, color,
4087                                                  start, end,
4088                                                  DRM_MM_INSERT_BEST);
4089                if (err != -ENOSPC)
4090                        return err;
4091        }
4092
4093        if (flags & PIN_NOEVICT)
4094                return -ENOSPC;
4095
4096        /* No free space, pick a slot at random.
4097         *
4098         * There is a pathological case here using a GTT shared between
4099         * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt):
4100         *
4101         *    |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->|
4102         *         (64k objects)             (448k objects)
4103         *
4104         * Now imagine that the eviction LRU is ordered top-down (just because
4105         * pathology meets real life), and that we need to evict an object to
4106         * make room inside the aperture. The eviction scan then has to walk
4107         * the 448k list before it finds one within range. And now imagine that
4108         * it has to search for a new hole between every byte inside the memcpy,
4109         * for several simultaneous clients.
4110         *
4111         * On a full-ppgtt system, if we have run out of available space, there
4112         * will be lots and lots of objects in the eviction list! Again,
4113         * searching that LRU list may be slow if we are also applying any
4114         * range restrictions (e.g. restriction to low 4GiB) and so, for
4115         * simplicity and similarilty between different GTT, try the single
4116         * random replacement first.
4117         */
4118        offset = random_offset(start, end,
4119                               size, alignment ?: I915_GTT_MIN_ALIGNMENT);
4120        err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags);
4121        if (err != -ENOSPC)
4122                return err;
4123
4124        /* Randomly selected placement is pinned, do a search */
4125        err = i915_gem_evict_something(vm, size, alignment, color,
4126                                       start, end, flags);
4127        if (err)
4128                return err;
4129
4130        return drm_mm_insert_node_in_range(&vm->mm, node,
4131                                           size, alignment, color,
4132                                           start, end, DRM_MM_INSERT_EVICT);
4133}
4134
4135#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
4136#include "selftests/mock_gtt.c"
4137#include "selftests/i915_gem_gtt.c"
4138#endif
4139