linux/drivers/gpu/drm/i915/i915_gem_gtt.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2010 Daniel Vetter
   3 * Copyright © 2011-2014 Intel Corporation
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the "Software"),
   7 * to deal in the Software without restriction, including without limitation
   8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9 * and/or sell copies of the Software, and to permit persons to whom the
  10 * Software is furnished to do so, subject to the following conditions:
  11 *
  12 * The above copyright notice and this permission notice (including the next
  13 * paragraph) shall be included in all copies or substantial portions of the
  14 * Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22 * IN THE SOFTWARE.
  23 *
  24 */
  25
  26#include <linux/seq_file.h>
  27#include <drm/drmP.h>
  28#include <drm/i915_drm.h>
  29#include "i915_drv.h"
  30#include "i915_trace.h"
  31#include "intel_drv.h"
  32
  33static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv);
  34static void chv_setup_private_ppat(struct drm_i915_private *dev_priv);
  35
  36static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
  37{
  38        bool has_aliasing_ppgtt;
  39        bool has_full_ppgtt;
  40
  41        has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6;
  42        has_full_ppgtt = INTEL_INFO(dev)->gen >= 7;
  43        if (IS_GEN8(dev))
  44                has_full_ppgtt = false; /* XXX why? */
  45
  46        /*
  47         * We don't allow disabling PPGTT for gen9+ as it's a requirement for
  48         * execlists, the sole mechanism available to submit work.
  49         */
  50        if (INTEL_INFO(dev)->gen < 9 &&
  51            (enable_ppgtt == 0 || !has_aliasing_ppgtt))
  52                return 0;
  53
  54        if (enable_ppgtt == 1)
  55                return 1;
  56
  57        if (enable_ppgtt == 2 && has_full_ppgtt)
  58                return 2;
  59
  60#ifdef CONFIG_INTEL_IOMMU
  61        /* Disable ppgtt on SNB if VT-d is on. */
  62        if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
  63                DRM_INFO("Disabling PPGTT because VT-d is on\n");
  64                return 0;
  65        }
  66#endif
  67
  68        /* Early VLV doesn't have this */
  69        if (IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) &&
  70            dev->pdev->revision < 0xb) {
  71                DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
  72                return 0;
  73        }
  74
  75        return has_aliasing_ppgtt ? 1 : 0;
  76}
  77
  78
  79static void ppgtt_bind_vma(struct i915_vma *vma,
  80                           enum i915_cache_level cache_level,
  81                           u32 flags);
  82static void ppgtt_unbind_vma(struct i915_vma *vma);
  83
  84static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
  85                                             enum i915_cache_level level,
  86                                             bool valid)
  87{
  88        gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
  89        pte |= addr;
  90
  91        switch (level) {
  92        case I915_CACHE_NONE:
  93                pte |= PPAT_UNCACHED_INDEX;
  94                break;
  95        case I915_CACHE_WT:
  96                pte |= PPAT_DISPLAY_ELLC_INDEX;
  97                break;
  98        default:
  99                pte |= PPAT_CACHED_INDEX;
 100                break;
 101        }
 102
 103        return pte;
 104}
 105
 106static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev,
 107                                             dma_addr_t addr,
 108                                             enum i915_cache_level level)
 109{
 110        gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
 111        pde |= addr;
 112        if (level != I915_CACHE_NONE)
 113                pde |= PPAT_CACHED_PDE_INDEX;
 114        else
 115                pde |= PPAT_UNCACHED_INDEX;
 116        return pde;
 117}
 118
 119static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
 120                                     enum i915_cache_level level,
 121                                     bool valid, u32 unused)
 122{
 123        gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
 124        pte |= GEN6_PTE_ADDR_ENCODE(addr);
 125
 126        switch (level) {
 127        case I915_CACHE_L3_LLC:
 128        case I915_CACHE_LLC:
 129                pte |= GEN6_PTE_CACHE_LLC;
 130                break;
 131        case I915_CACHE_NONE:
 132                pte |= GEN6_PTE_UNCACHED;
 133                break;
 134        default:
 135                WARN_ON(1);
 136        }
 137
 138        return pte;
 139}
 140
 141static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
 142                                     enum i915_cache_level level,
 143                                     bool valid, u32 unused)
 144{
 145        gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
 146        pte |= GEN6_PTE_ADDR_ENCODE(addr);
 147
 148        switch (level) {
 149        case I915_CACHE_L3_LLC:
 150                pte |= GEN7_PTE_CACHE_L3_LLC;
 151                break;
 152        case I915_CACHE_LLC:
 153                pte |= GEN6_PTE_CACHE_LLC;
 154                break;
 155        case I915_CACHE_NONE:
 156                pte |= GEN6_PTE_UNCACHED;
 157                break;
 158        default:
 159                WARN_ON(1);
 160        }
 161
 162        return pte;
 163}
 164
 165static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
 166                                     enum i915_cache_level level,
 167                                     bool valid, u32 flags)
 168{
 169        gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
 170        pte |= GEN6_PTE_ADDR_ENCODE(addr);
 171
 172        if (!(flags & PTE_READ_ONLY))
 173                pte |= BYT_PTE_WRITEABLE;
 174
 175        if (level != I915_CACHE_NONE)
 176                pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
 177
 178        return pte;
 179}
 180
 181static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr,
 182                                     enum i915_cache_level level,
 183                                     bool valid, u32 unused)
 184{
 185        gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
 186        pte |= HSW_PTE_ADDR_ENCODE(addr);
 187
 188        if (level != I915_CACHE_NONE)
 189                pte |= HSW_WB_LLC_AGE3;
 190
 191        return pte;
 192}
 193
 194static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
 195                                      enum i915_cache_level level,
 196                                      bool valid, u32 unused)
 197{
 198        gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
 199        pte |= HSW_PTE_ADDR_ENCODE(addr);
 200
 201        switch (level) {
 202        case I915_CACHE_NONE:
 203                break;
 204        case I915_CACHE_WT:
 205                pte |= HSW_WT_ELLC_LLC_AGE3;
 206                break;
 207        default:
 208                pte |= HSW_WB_ELLC_LLC_AGE3;
 209                break;
 210        }
 211
 212        return pte;
 213}
 214
 215/* Broadwell Page Directory Pointer Descriptors */
 216static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
 217                           uint64_t val)
 218{
 219        int ret;
 220
 221        BUG_ON(entry >= 4);
 222
 223        ret = intel_ring_begin(ring, 6);
 224        if (ret)
 225                return ret;
 226
 227        intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
 228        intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
 229        intel_ring_emit(ring, (u32)(val >> 32));
 230        intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
 231        intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
 232        intel_ring_emit(ring, (u32)(val));
 233        intel_ring_advance(ring);
 234
 235        return 0;
 236}
 237
 238static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
 239                          struct intel_engine_cs *ring)
 240{
 241        int i, ret;
 242
 243        /* bit of a hack to find the actual last used pd */
 244        int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
 245
 246        for (i = used_pd - 1; i >= 0; i--) {
 247                dma_addr_t addr = ppgtt->pd_dma_addr[i];
 248                ret = gen8_write_pdp(ring, i, addr);
 249                if (ret)
 250                        return ret;
 251        }
 252
 253        return 0;
 254}
 255
 256static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 257                                   uint64_t start,
 258                                   uint64_t length,
 259                                   bool use_scratch)
 260{
 261        struct i915_hw_ppgtt *ppgtt =
 262                container_of(vm, struct i915_hw_ppgtt, base);
 263        gen8_gtt_pte_t *pt_vaddr, scratch_pte;
 264        unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
 265        unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
 266        unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
 267        unsigned num_entries = length >> PAGE_SHIFT;
 268        unsigned last_pte, i;
 269
 270        scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr,
 271                                      I915_CACHE_LLC, use_scratch);
 272
 273        while (num_entries) {
 274                struct page *page_table = ppgtt->gen8_pt_pages[pdpe][pde];
 275
 276                last_pte = pte + num_entries;
 277                if (last_pte > GEN8_PTES_PER_PAGE)
 278                        last_pte = GEN8_PTES_PER_PAGE;
 279
 280                pt_vaddr = kmap_atomic(page_table);
 281
 282                for (i = pte; i < last_pte; i++) {
 283                        pt_vaddr[i] = scratch_pte;
 284                        num_entries--;
 285                }
 286
 287                if (!HAS_LLC(ppgtt->base.dev))
 288                        drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
 289                kunmap_atomic(pt_vaddr);
 290
 291                pte = 0;
 292                if (++pde == GEN8_PDES_PER_PAGE) {
 293                        pdpe++;
 294                        pde = 0;
 295                }
 296        }
 297}
 298
 299static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 300                                      struct sg_table *pages,
 301                                      uint64_t start,
 302                                      enum i915_cache_level cache_level, u32 unused)
 303{
 304        struct i915_hw_ppgtt *ppgtt =
 305                container_of(vm, struct i915_hw_ppgtt, base);
 306        gen8_gtt_pte_t *pt_vaddr;
 307        unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
 308        unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
 309        unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
 310        struct sg_page_iter sg_iter;
 311
 312        pt_vaddr = NULL;
 313
 314        for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
 315                if (WARN_ON(pdpe >= GEN8_LEGACY_PDPS))
 316                        break;
 317
 318                if (pt_vaddr == NULL)
 319                        pt_vaddr = kmap_atomic(ppgtt->gen8_pt_pages[pdpe][pde]);
 320
 321                pt_vaddr[pte] =
 322                        gen8_pte_encode(sg_page_iter_dma_address(&sg_iter),
 323                                        cache_level, true);
 324                if (++pte == GEN8_PTES_PER_PAGE) {
 325                        if (!HAS_LLC(ppgtt->base.dev))
 326                                drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
 327                        kunmap_atomic(pt_vaddr);
 328                        pt_vaddr = NULL;
 329                        if (++pde == GEN8_PDES_PER_PAGE) {
 330                                pdpe++;
 331                                pde = 0;
 332                        }
 333                        pte = 0;
 334                }
 335        }
 336        if (pt_vaddr) {
 337                if (!HAS_LLC(ppgtt->base.dev))
 338                        drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
 339                kunmap_atomic(pt_vaddr);
 340        }
 341}
 342
 343static void gen8_free_page_tables(struct page **pt_pages)
 344{
 345        int i;
 346
 347        if (pt_pages == NULL)
 348                return;
 349
 350        for (i = 0; i < GEN8_PDES_PER_PAGE; i++)
 351                if (pt_pages[i])
 352                        __free_pages(pt_pages[i], 0);
 353}
 354
 355static void gen8_ppgtt_free(const struct i915_hw_ppgtt *ppgtt)
 356{
 357        int i;
 358
 359        for (i = 0; i < ppgtt->num_pd_pages; i++) {
 360                gen8_free_page_tables(ppgtt->gen8_pt_pages[i]);
 361                kfree(ppgtt->gen8_pt_pages[i]);
 362                kfree(ppgtt->gen8_pt_dma_addr[i]);
 363        }
 364
 365        __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT));
 366}
 367
 368static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
 369{
 370        struct pci_dev *hwdev = ppgtt->base.dev->pdev;
 371        int i, j;
 372
 373        for (i = 0; i < ppgtt->num_pd_pages; i++) {
 374                /* TODO: In the future we'll support sparse mappings, so this
 375                 * will have to change. */
 376                if (!ppgtt->pd_dma_addr[i])
 377                        continue;
 378
 379                pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE,
 380                               PCI_DMA_BIDIRECTIONAL);
 381
 382                for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
 383                        dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
 384                        if (addr)
 385                                pci_unmap_page(hwdev, addr, PAGE_SIZE,
 386                                               PCI_DMA_BIDIRECTIONAL);
 387                }
 388        }
 389}
 390
 391static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 392{
 393        struct i915_hw_ppgtt *ppgtt =
 394                container_of(vm, struct i915_hw_ppgtt, base);
 395
 396        gen8_ppgtt_unmap_pages(ppgtt);
 397        gen8_ppgtt_free(ppgtt);
 398}
 399
 400static struct page **__gen8_alloc_page_tables(void)
 401{
 402        struct page **pt_pages;
 403        int i;
 404
 405        pt_pages = kcalloc(GEN8_PDES_PER_PAGE, sizeof(struct page *), GFP_KERNEL);
 406        if (!pt_pages)
 407                return ERR_PTR(-ENOMEM);
 408
 409        for (i = 0; i < GEN8_PDES_PER_PAGE; i++) {
 410                pt_pages[i] = alloc_page(GFP_KERNEL);
 411                if (!pt_pages[i])
 412                        goto bail;
 413        }
 414
 415        return pt_pages;
 416
 417bail:
 418        gen8_free_page_tables(pt_pages);
 419        kfree(pt_pages);
 420        return ERR_PTR(-ENOMEM);
 421}
 422
 423static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt,
 424                                           const int max_pdp)
 425{
 426        struct page **pt_pages[GEN8_LEGACY_PDPS];
 427        int i, ret;
 428
 429        for (i = 0; i < max_pdp; i++) {
 430                pt_pages[i] = __gen8_alloc_page_tables();
 431                if (IS_ERR(pt_pages[i])) {
 432                        ret = PTR_ERR(pt_pages[i]);
 433                        goto unwind_out;
 434                }
 435        }
 436
 437        /* NB: Avoid touching gen8_pt_pages until last to keep the allocation,
 438         * "atomic" - for cleanup purposes.
 439         */
 440        for (i = 0; i < max_pdp; i++)
 441                ppgtt->gen8_pt_pages[i] = pt_pages[i];
 442
 443        return 0;
 444
 445unwind_out:
 446        while (i--) {
 447                gen8_free_page_tables(pt_pages[i]);
 448                kfree(pt_pages[i]);
 449        }
 450
 451        return ret;
 452}
 453
 454static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt)
 455{
 456        int i;
 457
 458        for (i = 0; i < ppgtt->num_pd_pages; i++) {
 459                ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE,
 460                                                     sizeof(dma_addr_t),
 461                                                     GFP_KERNEL);
 462                if (!ppgtt->gen8_pt_dma_addr[i])
 463                        return -ENOMEM;
 464        }
 465
 466        return 0;
 467}
 468
 469static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt,
 470                                                const int max_pdp)
 471{
 472        ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT));
 473        if (!ppgtt->pd_pages)
 474                return -ENOMEM;
 475
 476        ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT);
 477        BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS);
 478
 479        return 0;
 480}
 481
 482static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt,
 483                            const int max_pdp)
 484{
 485        int ret;
 486
 487        ret = gen8_ppgtt_allocate_page_directories(ppgtt, max_pdp);
 488        if (ret)
 489                return ret;
 490
 491        ret = gen8_ppgtt_allocate_page_tables(ppgtt, max_pdp);
 492        if (ret) {
 493                __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT));
 494                return ret;
 495        }
 496
 497        ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE;
 498
 499        ret = gen8_ppgtt_allocate_dma(ppgtt);
 500        if (ret)
 501                gen8_ppgtt_free(ppgtt);
 502
 503        return ret;
 504}
 505
 506static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt,
 507                                             const int pd)
 508{
 509        dma_addr_t pd_addr;
 510        int ret;
 511
 512        pd_addr = pci_map_page(ppgtt->base.dev->pdev,
 513                               &ppgtt->pd_pages[pd], 0,
 514                               PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 515
 516        ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr);
 517        if (ret)
 518                return ret;
 519
 520        ppgtt->pd_dma_addr[pd] = pd_addr;
 521
 522        return 0;
 523}
 524
 525static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt,
 526                                        const int pd,
 527                                        const int pt)
 528{
 529        dma_addr_t pt_addr;
 530        struct page *p;
 531        int ret;
 532
 533        p = ppgtt->gen8_pt_pages[pd][pt];
 534        pt_addr = pci_map_page(ppgtt->base.dev->pdev,
 535                               p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 536        ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr);
 537        if (ret)
 538                return ret;
 539
 540        ppgtt->gen8_pt_dma_addr[pd][pt] = pt_addr;
 541
 542        return 0;
 543}
 544
 545/**
 546 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
 547 * with a net effect resembling a 2-level page table in normal x86 terms. Each
 548 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
 549 * space.
 550 *
 551 * FIXME: split allocation into smaller pieces. For now we only ever do this
 552 * once, but with full PPGTT, the multiple contiguous allocations will be bad.
 553 * TODO: Do something with the size parameter
 554 */
 555static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
 556{
 557        const int max_pdp = DIV_ROUND_UP(size, 1 << 30);
 558        const int min_pt_pages = GEN8_PDES_PER_PAGE * max_pdp;
 559        int i, j, ret;
 560
 561        if (size % (1<<30))
 562                DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size);
 563
 564        /* 1. Do all our allocations for page directories and page tables. */
 565        ret = gen8_ppgtt_alloc(ppgtt, max_pdp);
 566        if (ret)
 567                return ret;
 568
 569        /*
 570         * 2. Create DMA mappings for the page directories and page tables.
 571         */
 572        for (i = 0; i < max_pdp; i++) {
 573                ret = gen8_ppgtt_setup_page_directories(ppgtt, i);
 574                if (ret)
 575                        goto bail;
 576
 577                for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
 578                        ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j);
 579                        if (ret)
 580                                goto bail;
 581                }
 582        }
 583
 584        /*
 585         * 3. Map all the page directory entires to point to the page tables
 586         * we've allocated.
 587         *
 588         * For now, the PPGTT helper functions all require that the PDEs are
 589         * plugged in correctly. So we do that now/here. For aliasing PPGTT, we
 590         * will never need to touch the PDEs again.
 591         */
 592        for (i = 0; i < max_pdp; i++) {
 593                gen8_ppgtt_pde_t *pd_vaddr;
 594                pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]);
 595                for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
 596                        dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
 597                        pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr,
 598                                                      I915_CACHE_LLC);
 599                }
 600                if (!HAS_LLC(ppgtt->base.dev))
 601                        drm_clflush_virt_range(pd_vaddr, PAGE_SIZE);
 602                kunmap_atomic(pd_vaddr);
 603        }
 604
 605        ppgtt->switch_mm = gen8_mm_switch;
 606        ppgtt->base.clear_range = gen8_ppgtt_clear_range;
 607        ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
 608        ppgtt->base.cleanup = gen8_ppgtt_cleanup;
 609        ppgtt->base.start = 0;
 610        ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE;
 611
 612        ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
 613
 614        DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n",
 615                         ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp);
 616        DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n",
 617                         ppgtt->num_pd_entries,
 618                         (ppgtt->num_pd_entries - min_pt_pages) + size % (1<<30));
 619        return 0;
 620
 621bail:
 622        gen8_ppgtt_unmap_pages(ppgtt);
 623        gen8_ppgtt_free(ppgtt);
 624        return ret;
 625}
 626
 627static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 628{
 629        struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
 630        struct i915_address_space *vm = &ppgtt->base;
 631        gen6_gtt_pte_t __iomem *pd_addr;
 632        gen6_gtt_pte_t scratch_pte;
 633        uint32_t pd_entry;
 634        int pte, pde;
 635
 636        scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
 637
 638        pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm +
 639                ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
 640
 641        seq_printf(m, "  VM %p (pd_offset %x-%x):\n", vm,
 642                   ppgtt->pd_offset, ppgtt->pd_offset + ppgtt->num_pd_entries);
 643        for (pde = 0; pde < ppgtt->num_pd_entries; pde++) {
 644                u32 expected;
 645                gen6_gtt_pte_t *pt_vaddr;
 646                dma_addr_t pt_addr = ppgtt->pt_dma_addr[pde];
 647                pd_entry = readl(pd_addr + pde);
 648                expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
 649
 650                if (pd_entry != expected)
 651                        seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
 652                                   pde,
 653                                   pd_entry,
 654                                   expected);
 655                seq_printf(m, "\tPDE: %x\n", pd_entry);
 656
 657                pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
 658                for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) {
 659                        unsigned long va =
 660                                (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) +
 661                                (pte * PAGE_SIZE);
 662                        int i;
 663                        bool found = false;
 664                        for (i = 0; i < 4; i++)
 665                                if (pt_vaddr[pte + i] != scratch_pte)
 666                                        found = true;
 667                        if (!found)
 668                                continue;
 669
 670                        seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
 671                        for (i = 0; i < 4; i++) {
 672                                if (pt_vaddr[pte + i] != scratch_pte)
 673                                        seq_printf(m, " %08x", pt_vaddr[pte + i]);
 674                                else
 675                                        seq_puts(m, "  SCRATCH ");
 676                        }
 677                        seq_puts(m, "\n");
 678                }
 679                kunmap_atomic(pt_vaddr);
 680        }
 681}
 682
 683static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
 684{
 685        struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
 686        gen6_gtt_pte_t __iomem *pd_addr;
 687        uint32_t pd_entry;
 688        int i;
 689
 690        WARN_ON(ppgtt->pd_offset & 0x3f);
 691        pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm +
 692                ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
 693        for (i = 0; i < ppgtt->num_pd_entries; i++) {
 694                dma_addr_t pt_addr;
 695
 696                pt_addr = ppgtt->pt_dma_addr[i];
 697                pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
 698                pd_entry |= GEN6_PDE_VALID;
 699
 700                writel(pd_entry, pd_addr + i);
 701        }
 702        readl(pd_addr);
 703}
 704
 705static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
 706{
 707        BUG_ON(ppgtt->pd_offset & 0x3f);
 708
 709        return (ppgtt->pd_offset / 64) << 16;
 710}
 711
 712static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
 713                         struct intel_engine_cs *ring)
 714{
 715        int ret;
 716
 717        /* NB: TLBs must be flushed and invalidated before a switch */
 718        ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
 719        if (ret)
 720                return ret;
 721
 722        ret = intel_ring_begin(ring, 6);
 723        if (ret)
 724                return ret;
 725
 726        intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
 727        intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
 728        intel_ring_emit(ring, PP_DIR_DCLV_2G);
 729        intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
 730        intel_ring_emit(ring, get_pd_offset(ppgtt));
 731        intel_ring_emit(ring, MI_NOOP);
 732        intel_ring_advance(ring);
 733
 734        return 0;
 735}
 736
 737static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
 738                          struct intel_engine_cs *ring)
 739{
 740        int ret;
 741
 742        /* NB: TLBs must be flushed and invalidated before a switch */
 743        ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
 744        if (ret)
 745                return ret;
 746
 747        ret = intel_ring_begin(ring, 6);
 748        if (ret)
 749                return ret;
 750
 751        intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
 752        intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
 753        intel_ring_emit(ring, PP_DIR_DCLV_2G);
 754        intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
 755        intel_ring_emit(ring, get_pd_offset(ppgtt));
 756        intel_ring_emit(ring, MI_NOOP);
 757        intel_ring_advance(ring);
 758
 759        /* XXX: RCS is the only one to auto invalidate the TLBs? */
 760        if (ring->id != RCS) {
 761                ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
 762                if (ret)
 763                        return ret;
 764        }
 765
 766        return 0;
 767}
 768
 769static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
 770                          struct intel_engine_cs *ring)
 771{
 772        struct drm_device *dev = ppgtt->base.dev;
 773        struct drm_i915_private *dev_priv = dev->dev_private;
 774
 775
 776        I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
 777        I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
 778
 779        POSTING_READ(RING_PP_DIR_DCLV(ring));
 780
 781        return 0;
 782}
 783
 784static void gen8_ppgtt_enable(struct drm_device *dev)
 785{
 786        struct drm_i915_private *dev_priv = dev->dev_private;
 787        struct intel_engine_cs *ring;
 788        int j;
 789
 790        for_each_ring(ring, dev_priv, j) {
 791                I915_WRITE(RING_MODE_GEN7(ring),
 792                           _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
 793        }
 794}
 795
 796static void gen7_ppgtt_enable(struct drm_device *dev)
 797{
 798        struct drm_i915_private *dev_priv = dev->dev_private;
 799        struct intel_engine_cs *ring;
 800        uint32_t ecochk, ecobits;
 801        int i;
 802
 803        ecobits = I915_READ(GAC_ECO_BITS);
 804        I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
 805
 806        ecochk = I915_READ(GAM_ECOCHK);
 807        if (IS_HASWELL(dev)) {
 808                ecochk |= ECOCHK_PPGTT_WB_HSW;
 809        } else {
 810                ecochk |= ECOCHK_PPGTT_LLC_IVB;
 811                ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
 812        }
 813        I915_WRITE(GAM_ECOCHK, ecochk);
 814
 815        for_each_ring(ring, dev_priv, i) {
 816                /* GFX_MODE is per-ring on gen7+ */
 817                I915_WRITE(RING_MODE_GEN7(ring),
 818                           _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
 819        }
 820}
 821
 822static void gen6_ppgtt_enable(struct drm_device *dev)
 823{
 824        struct drm_i915_private *dev_priv = dev->dev_private;
 825        uint32_t ecochk, gab_ctl, ecobits;
 826
 827        ecobits = I915_READ(GAC_ECO_BITS);
 828        I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
 829                   ECOBITS_PPGTT_CACHE64B);
 830
 831        gab_ctl = I915_READ(GAB_CTL);
 832        I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
 833
 834        ecochk = I915_READ(GAM_ECOCHK);
 835        I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
 836
 837        I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
 838}
 839
 840/* PPGTT support for Sandybdrige/Gen6 and later */
 841static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 842                                   uint64_t start,
 843                                   uint64_t length,
 844                                   bool use_scratch)
 845{
 846        struct i915_hw_ppgtt *ppgtt =
 847                container_of(vm, struct i915_hw_ppgtt, base);
 848        gen6_gtt_pte_t *pt_vaddr, scratch_pte;
 849        unsigned first_entry = start >> PAGE_SHIFT;
 850        unsigned num_entries = length >> PAGE_SHIFT;
 851        unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
 852        unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
 853        unsigned last_pte, i;
 854
 855        scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
 856
 857        while (num_entries) {
 858                last_pte = first_pte + num_entries;
 859                if (last_pte > I915_PPGTT_PT_ENTRIES)
 860                        last_pte = I915_PPGTT_PT_ENTRIES;
 861
 862                pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
 863
 864                for (i = first_pte; i < last_pte; i++)
 865                        pt_vaddr[i] = scratch_pte;
 866
 867                kunmap_atomic(pt_vaddr);
 868
 869                num_entries -= last_pte - first_pte;
 870                first_pte = 0;
 871                act_pt++;
 872        }
 873}
 874
 875static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 876                                      struct sg_table *pages,
 877                                      uint64_t start,
 878                                      enum i915_cache_level cache_level, u32 flags)
 879{
 880        struct i915_hw_ppgtt *ppgtt =
 881                container_of(vm, struct i915_hw_ppgtt, base);
 882        gen6_gtt_pte_t *pt_vaddr;
 883        unsigned first_entry = start >> PAGE_SHIFT;
 884        unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
 885        unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
 886        struct sg_page_iter sg_iter;
 887
 888        pt_vaddr = NULL;
 889        for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
 890                if (pt_vaddr == NULL)
 891                        pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
 892
 893                pt_vaddr[act_pte] =
 894                        vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
 895                                       cache_level, true, flags);
 896
 897                if (++act_pte == I915_PPGTT_PT_ENTRIES) {
 898                        kunmap_atomic(pt_vaddr);
 899                        pt_vaddr = NULL;
 900                        act_pt++;
 901                        act_pte = 0;
 902                }
 903        }
 904        if (pt_vaddr)
 905                kunmap_atomic(pt_vaddr);
 906}
 907
 908static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
 909{
 910        int i;
 911
 912        if (ppgtt->pt_dma_addr) {
 913                for (i = 0; i < ppgtt->num_pd_entries; i++)
 914                        pci_unmap_page(ppgtt->base.dev->pdev,
 915                                       ppgtt->pt_dma_addr[i],
 916                                       4096, PCI_DMA_BIDIRECTIONAL);
 917        }
 918}
 919
 920static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
 921{
 922        int i;
 923
 924        kfree(ppgtt->pt_dma_addr);
 925        for (i = 0; i < ppgtt->num_pd_entries; i++)
 926                __free_page(ppgtt->pt_pages[i]);
 927        kfree(ppgtt->pt_pages);
 928}
 929
 930static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 931{
 932        struct i915_hw_ppgtt *ppgtt =
 933                container_of(vm, struct i915_hw_ppgtt, base);
 934
 935        drm_mm_remove_node(&ppgtt->node);
 936
 937        gen6_ppgtt_unmap_pages(ppgtt);
 938        gen6_ppgtt_free(ppgtt);
 939}
 940
 941static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
 942{
 943        struct drm_device *dev = ppgtt->base.dev;
 944        struct drm_i915_private *dev_priv = dev->dev_private;
 945        bool retried = false;
 946        int ret;
 947
 948        /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
 949         * allocator works in address space sizes, so it's multiplied by page
 950         * size. We allocate at the top of the GTT to avoid fragmentation.
 951         */
 952        BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm));
 953alloc:
 954        ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
 955                                                  &ppgtt->node, GEN6_PD_SIZE,
 956                                                  GEN6_PD_ALIGN, 0,
 957                                                  0, dev_priv->gtt.base.total,
 958                                                  DRM_MM_TOPDOWN);
 959        if (ret == -ENOSPC && !retried) {
 960                ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
 961                                               GEN6_PD_SIZE, GEN6_PD_ALIGN,
 962                                               I915_CACHE_NONE,
 963                                               0, dev_priv->gtt.base.total,
 964                                               0);
 965                if (ret)
 966                        return ret;
 967
 968                retried = true;
 969                goto alloc;
 970        }
 971
 972        if (ppgtt->node.start < dev_priv->gtt.mappable_end)
 973                DRM_DEBUG("Forced to use aperture for PDEs\n");
 974
 975        ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES;
 976        return ret;
 977}
 978
 979static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt)
 980{
 981        int i;
 982
 983        ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *),
 984                                  GFP_KERNEL);
 985
 986        if (!ppgtt->pt_pages)
 987                return -ENOMEM;
 988
 989        for (i = 0; i < ppgtt->num_pd_entries; i++) {
 990                ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL);
 991                if (!ppgtt->pt_pages[i]) {
 992                        gen6_ppgtt_free(ppgtt);
 993                        return -ENOMEM;
 994                }
 995        }
 996
 997        return 0;
 998}
 999
1000static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
1001{
1002        int ret;
1003
1004        ret = gen6_ppgtt_allocate_page_directories(ppgtt);
1005        if (ret)
1006                return ret;
1007
1008        ret = gen6_ppgtt_allocate_page_tables(ppgtt);
1009        if (ret) {
1010                drm_mm_remove_node(&ppgtt->node);
1011                return ret;
1012        }
1013
1014        ppgtt->pt_dma_addr = kcalloc(ppgtt->num_pd_entries, sizeof(dma_addr_t),
1015                                     GFP_KERNEL);
1016        if (!ppgtt->pt_dma_addr) {
1017                drm_mm_remove_node(&ppgtt->node);
1018                gen6_ppgtt_free(ppgtt);
1019                return -ENOMEM;
1020        }
1021
1022        return 0;
1023}
1024
1025static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt)
1026{
1027        struct drm_device *dev = ppgtt->base.dev;
1028        int i;
1029
1030        for (i = 0; i < ppgtt->num_pd_entries; i++) {
1031                dma_addr_t pt_addr;
1032
1033                pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096,
1034                                       PCI_DMA_BIDIRECTIONAL);
1035
1036                if (pci_dma_mapping_error(dev->pdev, pt_addr)) {
1037                        gen6_ppgtt_unmap_pages(ppgtt);
1038                        return -EIO;
1039                }
1040
1041                ppgtt->pt_dma_addr[i] = pt_addr;
1042        }
1043
1044        return 0;
1045}
1046
1047static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1048{
1049        struct drm_device *dev = ppgtt->base.dev;
1050        struct drm_i915_private *dev_priv = dev->dev_private;
1051        int ret;
1052
1053        ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
1054        if (IS_GEN6(dev)) {
1055                ppgtt->switch_mm = gen6_mm_switch;
1056        } else if (IS_HASWELL(dev)) {
1057                ppgtt->switch_mm = hsw_mm_switch;
1058        } else if (IS_GEN7(dev)) {
1059                ppgtt->switch_mm = gen7_mm_switch;
1060        } else
1061                BUG();
1062
1063        ret = gen6_ppgtt_alloc(ppgtt);
1064        if (ret)
1065                return ret;
1066
1067        ret = gen6_ppgtt_setup_page_tables(ppgtt);
1068        if (ret) {
1069                gen6_ppgtt_free(ppgtt);
1070                return ret;
1071        }
1072
1073        ppgtt->base.clear_range = gen6_ppgtt_clear_range;
1074        ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
1075        ppgtt->base.cleanup = gen6_ppgtt_cleanup;
1076        ppgtt->base.start = 0;
1077        ppgtt->base.total =  ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES * PAGE_SIZE;
1078        ppgtt->debug_dump = gen6_dump_ppgtt;
1079
1080        ppgtt->pd_offset =
1081                ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t);
1082
1083        ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
1084
1085        DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n",
1086                         ppgtt->node.size >> 20,
1087                         ppgtt->node.start / PAGE_SIZE);
1088
1089        gen6_write_pdes(ppgtt);
1090        DRM_DEBUG("Adding PPGTT at offset %x\n",
1091                  ppgtt->pd_offset << 10);
1092
1093        return 0;
1094}
1095
1096static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
1097{
1098        struct drm_i915_private *dev_priv = dev->dev_private;
1099
1100        ppgtt->base.dev = dev;
1101        ppgtt->base.scratch = dev_priv->gtt.base.scratch;
1102
1103        if (INTEL_INFO(dev)->gen < 8)
1104                return gen6_ppgtt_init(ppgtt);
1105        else if (IS_GEN8(dev) || IS_GEN9(dev))
1106                return gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total);
1107        else
1108                BUG();
1109}
1110int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
1111{
1112        struct drm_i915_private *dev_priv = dev->dev_private;
1113        int ret = 0;
1114
1115        ret = __hw_ppgtt_init(dev, ppgtt);
1116        if (ret == 0) {
1117                kref_init(&ppgtt->ref);
1118                drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
1119                            ppgtt->base.total);
1120                i915_init_vm(dev_priv, &ppgtt->base);
1121        }
1122
1123        return ret;
1124}
1125
1126int i915_ppgtt_init_hw(struct drm_device *dev)
1127{
1128        struct drm_i915_private *dev_priv = dev->dev_private;
1129        struct intel_engine_cs *ring;
1130        struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
1131        int i, ret = 0;
1132
1133        /* In the case of execlists, PPGTT is enabled by the context descriptor
1134         * and the PDPs are contained within the context itself.  We don't
1135         * need to do anything here. */
1136        if (i915.enable_execlists)
1137                return 0;
1138
1139        if (!USES_PPGTT(dev))
1140                return 0;
1141
1142        if (IS_GEN6(dev))
1143                gen6_ppgtt_enable(dev);
1144        else if (IS_GEN7(dev))
1145                gen7_ppgtt_enable(dev);
1146        else if (INTEL_INFO(dev)->gen >= 8)
1147                gen8_ppgtt_enable(dev);
1148        else
1149                WARN_ON(1);
1150
1151        if (ppgtt) {
1152                for_each_ring(ring, dev_priv, i) {
1153                        ret = ppgtt->switch_mm(ppgtt, ring);
1154                        if (ret != 0)
1155                                return ret;
1156                }
1157        }
1158
1159        return ret;
1160}
1161struct i915_hw_ppgtt *
1162i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
1163{
1164        struct i915_hw_ppgtt *ppgtt;
1165        int ret;
1166
1167        ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
1168        if (!ppgtt)
1169                return ERR_PTR(-ENOMEM);
1170
1171        ret = i915_ppgtt_init(dev, ppgtt);
1172        if (ret) {
1173                kfree(ppgtt);
1174                return ERR_PTR(ret);
1175        }
1176
1177        ppgtt->file_priv = fpriv;
1178
1179        trace_i915_ppgtt_create(&ppgtt->base);
1180
1181        return ppgtt;
1182}
1183
1184void  i915_ppgtt_release(struct kref *kref)
1185{
1186        struct i915_hw_ppgtt *ppgtt =
1187                container_of(kref, struct i915_hw_ppgtt, ref);
1188
1189        trace_i915_ppgtt_release(&ppgtt->base);
1190
1191        /* vmas should already be unbound */
1192        WARN_ON(!list_empty(&ppgtt->base.active_list));
1193        WARN_ON(!list_empty(&ppgtt->base.inactive_list));
1194
1195        list_del(&ppgtt->base.global_link);
1196        drm_mm_takedown(&ppgtt->base.mm);
1197
1198        ppgtt->base.cleanup(&ppgtt->base);
1199        kfree(ppgtt);
1200}
1201
1202static void
1203ppgtt_bind_vma(struct i915_vma *vma,
1204               enum i915_cache_level cache_level,
1205               u32 flags)
1206{
1207        /* Currently applicable only to VLV */
1208        if (vma->obj->gt_ro)
1209                flags |= PTE_READ_ONLY;
1210
1211        vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
1212                                cache_level, flags);
1213}
1214
1215static void ppgtt_unbind_vma(struct i915_vma *vma)
1216{
1217        vma->vm->clear_range(vma->vm,
1218                             vma->node.start,
1219                             vma->obj->base.size,
1220                             true);
1221}
1222
1223extern int intel_iommu_gfx_mapped;
1224/* Certain Gen5 chipsets require require idling the GPU before
1225 * unmapping anything from the GTT when VT-d is enabled.
1226 */
1227static inline bool needs_idle_maps(struct drm_device *dev)
1228{
1229#ifdef CONFIG_INTEL_IOMMU
1230        /* Query intel_iommu to see if we need the workaround. Presumably that
1231         * was loaded first.
1232         */
1233        if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
1234                return true;
1235#endif
1236        return false;
1237}
1238
1239static bool do_idling(struct drm_i915_private *dev_priv)
1240{
1241        bool ret = dev_priv->mm.interruptible;
1242
1243        if (unlikely(dev_priv->gtt.do_idle_maps)) {
1244                dev_priv->mm.interruptible = false;
1245                if (i915_gpu_idle(dev_priv->dev)) {
1246                        DRM_ERROR("Couldn't idle GPU\n");
1247                        /* Wait a bit, in hopes it avoids the hang */
1248                        udelay(10);
1249                }
1250        }
1251
1252        return ret;
1253}
1254
1255static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
1256{
1257        if (unlikely(dev_priv->gtt.do_idle_maps))
1258                dev_priv->mm.interruptible = interruptible;
1259}
1260
1261void i915_check_and_clear_faults(struct drm_device *dev)
1262{
1263        struct drm_i915_private *dev_priv = dev->dev_private;
1264        struct intel_engine_cs *ring;
1265        int i;
1266
1267        if (INTEL_INFO(dev)->gen < 6)
1268                return;
1269
1270        for_each_ring(ring, dev_priv, i) {
1271                u32 fault_reg;
1272                fault_reg = I915_READ(RING_FAULT_REG(ring));
1273                if (fault_reg & RING_FAULT_VALID) {
1274                        DRM_DEBUG_DRIVER("Unexpected fault\n"
1275                                         "\tAddr: 0x%08lx\n"
1276                                         "\tAddress space: %s\n"
1277                                         "\tSource ID: %d\n"
1278                                         "\tType: %d\n",
1279                                         fault_reg & PAGE_MASK,
1280                                         fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
1281                                         RING_FAULT_SRCID(fault_reg),
1282                                         RING_FAULT_FAULT_TYPE(fault_reg));
1283                        I915_WRITE(RING_FAULT_REG(ring),
1284                                   fault_reg & ~RING_FAULT_VALID);
1285                }
1286        }
1287        POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
1288}
1289
1290static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
1291{
1292        if (INTEL_INFO(dev_priv->dev)->gen < 6) {
1293                intel_gtt_chipset_flush();
1294        } else {
1295                I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
1296                POSTING_READ(GFX_FLSH_CNTL_GEN6);
1297        }
1298}
1299
1300void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
1301{
1302        struct drm_i915_private *dev_priv = dev->dev_private;
1303
1304        /* Don't bother messing with faults pre GEN6 as we have little
1305         * documentation supporting that it's a good idea.
1306         */
1307        if (INTEL_INFO(dev)->gen < 6)
1308                return;
1309
1310        i915_check_and_clear_faults(dev);
1311
1312        dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
1313                                       dev_priv->gtt.base.start,
1314                                       dev_priv->gtt.base.total,
1315                                       true);
1316
1317        i915_ggtt_flush(dev_priv);
1318}
1319
1320void i915_gem_restore_gtt_mappings(struct drm_device *dev)
1321{
1322        struct drm_i915_private *dev_priv = dev->dev_private;
1323        struct drm_i915_gem_object *obj;
1324        struct i915_address_space *vm;
1325
1326        i915_check_and_clear_faults(dev);
1327
1328        /* First fill our portion of the GTT with scratch pages */
1329        dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
1330                                       dev_priv->gtt.base.start,
1331                                       dev_priv->gtt.base.total,
1332                                       true);
1333
1334        list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
1335                struct i915_vma *vma = i915_gem_obj_to_vma(obj,
1336                                                           &dev_priv->gtt.base);
1337                if (!vma)
1338                        continue;
1339
1340                i915_gem_clflush_object(obj, obj->pin_display);
1341                /* The bind_vma code tries to be smart about tracking mappings.
1342                 * Unfortunately above, we've just wiped out the mappings
1343                 * without telling our object about it. So we need to fake it.
1344                 */
1345                vma->bound &= ~GLOBAL_BIND;
1346                vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
1347        }
1348
1349
1350        if (INTEL_INFO(dev)->gen >= 8) {
1351                if (IS_CHERRYVIEW(dev))
1352                        chv_setup_private_ppat(dev_priv);
1353                else
1354                        bdw_setup_private_ppat(dev_priv);
1355
1356                return;
1357        }
1358
1359        list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
1360                /* TODO: Perhaps it shouldn't be gen6 specific */
1361                if (i915_is_ggtt(vm)) {
1362                        if (dev_priv->mm.aliasing_ppgtt)
1363                                gen6_write_pdes(dev_priv->mm.aliasing_ppgtt);
1364                        continue;
1365                }
1366
1367                gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base));
1368        }
1369
1370        i915_ggtt_flush(dev_priv);
1371}
1372
1373int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
1374{
1375        if (obj->has_dma_mapping)
1376                return 0;
1377
1378        if (!dma_map_sg(&obj->base.dev->pdev->dev,
1379                        obj->pages->sgl, obj->pages->nents,
1380                        PCI_DMA_BIDIRECTIONAL))
1381                return -ENOSPC;
1382
1383        return 0;
1384}
1385
1386static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
1387{
1388#ifdef writeq
1389        writeq(pte, addr);
1390#else
1391        iowrite32((u32)pte, addr);
1392        iowrite32(pte >> 32, addr + 4);
1393#endif
1394}
1395
1396static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
1397                                     struct sg_table *st,
1398                                     uint64_t start,
1399                                     enum i915_cache_level level, u32 unused)
1400{
1401        struct drm_i915_private *dev_priv = vm->dev->dev_private;
1402        unsigned first_entry = start >> PAGE_SHIFT;
1403        gen8_gtt_pte_t __iomem *gtt_entries =
1404                (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
1405        int i = 0;
1406        struct sg_page_iter sg_iter;
1407        dma_addr_t addr = 0; /* shut up gcc */
1408
1409        for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
1410                addr = sg_dma_address(sg_iter.sg) +
1411                        (sg_iter.sg_pgoffset << PAGE_SHIFT);
1412                gen8_set_pte(&gtt_entries[i],
1413                             gen8_pte_encode(addr, level, true));
1414                i++;
1415        }
1416
1417        /*
1418         * XXX: This serves as a posting read to make sure that the PTE has
1419         * actually been updated. There is some concern that even though
1420         * registers and PTEs are within the same BAR that they are potentially
1421         * of NUMA access patterns. Therefore, even with the way we assume
1422         * hardware should work, we must keep this posting read for paranoia.
1423         */
1424        if (i != 0)
1425                WARN_ON(readq(&gtt_entries[i-1])
1426                        != gen8_pte_encode(addr, level, true));
1427
1428        /* This next bit makes the above posting read even more important. We
1429         * want to flush the TLBs only after we're certain all the PTE updates
1430         * have finished.
1431         */
1432        I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
1433        POSTING_READ(GFX_FLSH_CNTL_GEN6);
1434}
1435
1436/*
1437 * Binds an object into the global gtt with the specified cache level. The object
1438 * will be accessible to the GPU via commands whose operands reference offsets
1439 * within the global GTT as well as accessible by the GPU through the GMADR
1440 * mapped BAR (dev_priv->mm.gtt->gtt).
1441 */
1442static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
1443                                     struct sg_table *st,
1444                                     uint64_t start,
1445                                     enum i915_cache_level level, u32 flags)
1446{
1447        struct drm_i915_private *dev_priv = vm->dev->dev_private;
1448        unsigned first_entry = start >> PAGE_SHIFT;
1449        gen6_gtt_pte_t __iomem *gtt_entries =
1450                (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
1451        int i = 0;
1452        struct sg_page_iter sg_iter;
1453        dma_addr_t addr = 0;
1454
1455        for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
1456                addr = sg_page_iter_dma_address(&sg_iter);
1457                iowrite32(vm->pte_encode(addr, level, true, flags), &gtt_entries[i]);
1458                i++;
1459        }
1460
1461        /* XXX: This serves as a posting read to make sure that the PTE has
1462         * actually been updated. There is some concern that even though
1463         * registers and PTEs are within the same BAR that they are potentially
1464         * of NUMA access patterns. Therefore, even with the way we assume
1465         * hardware should work, we must keep this posting read for paranoia.
1466         */
1467        if (i != 0) {
1468                unsigned long gtt = readl(&gtt_entries[i-1]);
1469                WARN_ON(gtt != vm->pte_encode(addr, level, true, flags));
1470        }
1471
1472        /* This next bit makes the above posting read even more important. We
1473         * want to flush the TLBs only after we're certain all the PTE updates
1474         * have finished.
1475         */
1476        I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
1477        POSTING_READ(GFX_FLSH_CNTL_GEN6);
1478}
1479
1480static void gen8_ggtt_clear_range(struct i915_address_space *vm,
1481                                  uint64_t start,
1482                                  uint64_t length,
1483                                  bool use_scratch)
1484{
1485        struct drm_i915_private *dev_priv = vm->dev->dev_private;
1486        unsigned first_entry = start >> PAGE_SHIFT;
1487        unsigned num_entries = length >> PAGE_SHIFT;
1488        gen8_gtt_pte_t scratch_pte, __iomem *gtt_base =
1489                (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
1490        const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
1491        int i;
1492
1493        if (WARN(num_entries > max_entries,
1494                 "First entry = %d; Num entries = %d (max=%d)\n",
1495                 first_entry, num_entries, max_entries))
1496                num_entries = max_entries;
1497
1498        scratch_pte = gen8_pte_encode(vm->scratch.addr,
1499                                      I915_CACHE_LLC,
1500                                      use_scratch);
1501        for (i = 0; i < num_entries; i++)
1502                gen8_set_pte(&gtt_base[i], scratch_pte);
1503        readl(gtt_base);
1504}
1505
1506static void gen6_ggtt_clear_range(struct i915_address_space *vm,
1507                                  uint64_t start,
1508                                  uint64_t length,
1509                                  bool use_scratch)
1510{
1511        struct drm_i915_private *dev_priv = vm->dev->dev_private;
1512        unsigned first_entry = start >> PAGE_SHIFT;
1513        unsigned num_entries = length >> PAGE_SHIFT;
1514        gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
1515                (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
1516        const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
1517        int i;
1518
1519        if (WARN(num_entries > max_entries,
1520                 "First entry = %d; Num entries = %d (max=%d)\n",
1521                 first_entry, num_entries, max_entries))
1522                num_entries = max_entries;
1523
1524        scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch, 0);
1525
1526        for (i = 0; i < num_entries; i++)
1527                iowrite32(scratch_pte, &gtt_base[i]);
1528        readl(gtt_base);
1529}
1530
1531
1532static void i915_ggtt_bind_vma(struct i915_vma *vma,
1533                               enum i915_cache_level cache_level,
1534                               u32 unused)
1535{
1536        const unsigned long entry = vma->node.start >> PAGE_SHIFT;
1537        unsigned int flags = (cache_level == I915_CACHE_NONE) ?
1538                AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
1539
1540        BUG_ON(!i915_is_ggtt(vma->vm));
1541        intel_gtt_insert_sg_entries(vma->obj->pages, entry, flags);
1542        vma->bound = GLOBAL_BIND;
1543}
1544
1545static void i915_ggtt_clear_range(struct i915_address_space *vm,
1546                                  uint64_t start,
1547                                  uint64_t length,
1548                                  bool unused)
1549{
1550        unsigned first_entry = start >> PAGE_SHIFT;
1551        unsigned num_entries = length >> PAGE_SHIFT;
1552        intel_gtt_clear_range(first_entry, num_entries);
1553}
1554
1555static void i915_ggtt_unbind_vma(struct i915_vma *vma)
1556{
1557        const unsigned int first = vma->node.start >> PAGE_SHIFT;
1558        const unsigned int size = vma->obj->base.size >> PAGE_SHIFT;
1559
1560        BUG_ON(!i915_is_ggtt(vma->vm));
1561        vma->bound = 0;
1562        intel_gtt_clear_range(first, size);
1563}
1564
1565static void ggtt_bind_vma(struct i915_vma *vma,
1566                          enum i915_cache_level cache_level,
1567                          u32 flags)
1568{
1569        struct drm_device *dev = vma->vm->dev;
1570        struct drm_i915_private *dev_priv = dev->dev_private;
1571        struct drm_i915_gem_object *obj = vma->obj;
1572
1573        /* Currently applicable only to VLV */
1574        if (obj->gt_ro)
1575                flags |= PTE_READ_ONLY;
1576
1577        /* If there is no aliasing PPGTT, or the caller needs a global mapping,
1578         * or we have a global mapping already but the cacheability flags have
1579         * changed, set the global PTEs.
1580         *
1581         * If there is an aliasing PPGTT it is anecdotally faster, so use that
1582         * instead if none of the above hold true.
1583         *
1584         * NB: A global mapping should only be needed for special regions like
1585         * "gtt mappable", SNB errata, or if specified via special execbuf
1586         * flags. At all other times, the GPU will use the aliasing PPGTT.
1587         */
1588        if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
1589                if (!(vma->bound & GLOBAL_BIND) ||
1590                    (cache_level != obj->cache_level)) {
1591                        vma->vm->insert_entries(vma->vm, obj->pages,
1592                                                vma->node.start,
1593                                                cache_level, flags);
1594                        vma->bound |= GLOBAL_BIND;
1595                }
1596        }
1597
1598        if (dev_priv->mm.aliasing_ppgtt &&
1599            (!(vma->bound & LOCAL_BIND) ||
1600             (cache_level != obj->cache_level))) {
1601                struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
1602                appgtt->base.insert_entries(&appgtt->base,
1603                                            vma->obj->pages,
1604                                            vma->node.start,
1605                                            cache_level, flags);
1606                vma->bound |= LOCAL_BIND;
1607        }
1608}
1609
1610static void ggtt_unbind_vma(struct i915_vma *vma)
1611{
1612        struct drm_device *dev = vma->vm->dev;
1613        struct drm_i915_private *dev_priv = dev->dev_private;
1614        struct drm_i915_gem_object *obj = vma->obj;
1615
1616        if (vma->bound & GLOBAL_BIND) {
1617                vma->vm->clear_range(vma->vm,
1618                                     vma->node.start,
1619                                     obj->base.size,
1620                                     true);
1621                vma->bound &= ~GLOBAL_BIND;
1622        }
1623
1624        if (vma->bound & LOCAL_BIND) {
1625                struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
1626                appgtt->base.clear_range(&appgtt->base,
1627                                         vma->node.start,
1628                                         obj->base.size,
1629                                         true);
1630                vma->bound &= ~LOCAL_BIND;
1631        }
1632}
1633
1634void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
1635{
1636        struct drm_device *dev = obj->base.dev;
1637        struct drm_i915_private *dev_priv = dev->dev_private;
1638        bool interruptible;
1639
1640        interruptible = do_idling(dev_priv);
1641
1642        if (!obj->has_dma_mapping)
1643                dma_unmap_sg(&dev->pdev->dev,
1644                             obj->pages->sgl, obj->pages->nents,
1645                             PCI_DMA_BIDIRECTIONAL);
1646
1647        undo_idling(dev_priv, interruptible);
1648}
1649
1650static void i915_gtt_color_adjust(struct drm_mm_node *node,
1651                                  unsigned long color,
1652                                  unsigned long *start,
1653                                  unsigned long *end)
1654{
1655        if (node->color != color)
1656                *start += 4096;
1657
1658        if (!list_empty(&node->node_list)) {
1659                node = list_entry(node->node_list.next,
1660                                  struct drm_mm_node,
1661                                  node_list);
1662                if (node->allocated && node->color != color)
1663                        *end -= 4096;
1664        }
1665}
1666
1667static int i915_gem_setup_global_gtt(struct drm_device *dev,
1668                                     unsigned long start,
1669                                     unsigned long mappable_end,
1670                                     unsigned long end)
1671{
1672        /* Let GEM Manage all of the aperture.
1673         *
1674         * However, leave one page at the end still bound to the scratch page.
1675         * There are a number of places where the hardware apparently prefetches
1676         * past the end of the object, and we've seen multiple hangs with the
1677         * GPU head pointer stuck in a batchbuffer bound at the last page of the
1678         * aperture.  One page should be enough to keep any prefetching inside
1679         * of the aperture.
1680         */
1681        struct drm_i915_private *dev_priv = dev->dev_private;
1682        struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
1683        struct drm_mm_node *entry;
1684        struct drm_i915_gem_object *obj;
1685        unsigned long hole_start, hole_end;
1686        int ret;
1687
1688        BUG_ON(mappable_end > end);
1689
1690        /* Subtract the guard page ... */
1691        drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE);
1692        if (!HAS_LLC(dev))
1693                dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust;
1694
1695        /* Mark any preallocated objects as occupied */
1696        list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
1697                struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
1698
1699                DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
1700                              i915_gem_obj_ggtt_offset(obj), obj->base.size);
1701
1702                WARN_ON(i915_gem_obj_ggtt_bound(obj));
1703                ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
1704                if (ret) {
1705                        DRM_DEBUG_KMS("Reservation failed: %i\n", ret);
1706                        return ret;
1707                }
1708                vma->bound |= GLOBAL_BIND;
1709        }
1710
1711        dev_priv->gtt.base.start = start;
1712        dev_priv->gtt.base.total = end - start;
1713
1714        /* Clear any non-preallocated blocks */
1715        drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
1716                DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
1717                              hole_start, hole_end);
1718                ggtt_vm->clear_range(ggtt_vm, hole_start,
1719                                     hole_end - hole_start, true);
1720        }
1721
1722        /* And finally clear the reserved guard page */
1723        ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
1724
1725        if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) {
1726                struct i915_hw_ppgtt *ppgtt;
1727
1728                ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
1729                if (!ppgtt)
1730                        return -ENOMEM;
1731
1732                ret = __hw_ppgtt_init(dev, ppgtt);
1733                if (ret != 0)
1734                        return ret;
1735
1736                dev_priv->mm.aliasing_ppgtt = ppgtt;
1737        }
1738
1739        return 0;
1740}
1741
1742void i915_gem_init_global_gtt(struct drm_device *dev)
1743{
1744        struct drm_i915_private *dev_priv = dev->dev_private;
1745        unsigned long gtt_size, mappable_size;
1746
1747        gtt_size = dev_priv->gtt.base.total;
1748        mappable_size = dev_priv->gtt.mappable_end;
1749
1750        i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
1751}
1752
1753void i915_global_gtt_cleanup(struct drm_device *dev)
1754{
1755        struct drm_i915_private *dev_priv = dev->dev_private;
1756        struct i915_address_space *vm = &dev_priv->gtt.base;
1757
1758        if (dev_priv->mm.aliasing_ppgtt) {
1759                struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
1760
1761                ppgtt->base.cleanup(&ppgtt->base);
1762        }
1763
1764        if (drm_mm_initialized(&vm->mm)) {
1765                drm_mm_takedown(&vm->mm);
1766                list_del(&vm->global_link);
1767        }
1768
1769        vm->cleanup(vm);
1770}
1771
1772static int setup_scratch_page(struct drm_device *dev)
1773{
1774        struct drm_i915_private *dev_priv = dev->dev_private;
1775        struct page *page;
1776        dma_addr_t dma_addr;
1777
1778        page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
1779        if (page == NULL)
1780                return -ENOMEM;
1781        set_pages_uc(page, 1);
1782
1783#ifdef CONFIG_INTEL_IOMMU
1784        dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE,
1785                                PCI_DMA_BIDIRECTIONAL);
1786        if (pci_dma_mapping_error(dev->pdev, dma_addr))
1787                return -EINVAL;
1788#else
1789        dma_addr = page_to_phys(page);
1790#endif
1791        dev_priv->gtt.base.scratch.page = page;
1792        dev_priv->gtt.base.scratch.addr = dma_addr;
1793
1794        return 0;
1795}
1796
1797static void teardown_scratch_page(struct drm_device *dev)
1798{
1799        struct drm_i915_private *dev_priv = dev->dev_private;
1800        struct page *page = dev_priv->gtt.base.scratch.page;
1801
1802        set_pages_wb(page, 1);
1803        pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr,
1804                       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
1805        __free_page(page);
1806}
1807
1808static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
1809{
1810        snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
1811        snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
1812        return snb_gmch_ctl << 20;
1813}
1814
1815static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
1816{
1817        bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
1818        bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
1819        if (bdw_gmch_ctl)
1820                bdw_gmch_ctl = 1 << bdw_gmch_ctl;
1821
1822#ifdef CONFIG_X86_32
1823        /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
1824        if (bdw_gmch_ctl > 4)
1825                bdw_gmch_ctl = 4;
1826#endif
1827
1828        return bdw_gmch_ctl << 20;
1829}
1830
1831static inline unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
1832{
1833        gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
1834        gmch_ctrl &= SNB_GMCH_GGMS_MASK;
1835
1836        if (gmch_ctrl)
1837                return 1 << (20 + gmch_ctrl);
1838
1839        return 0;
1840}
1841
1842static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
1843{
1844        snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
1845        snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
1846        return snb_gmch_ctl << 25; /* 32 MB units */
1847}
1848
1849static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
1850{
1851        bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
1852        bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
1853        return bdw_gmch_ctl << 25; /* 32 MB units */
1854}
1855
1856static size_t chv_get_stolen_size(u16 gmch_ctrl)
1857{
1858        gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
1859        gmch_ctrl &= SNB_GMCH_GMS_MASK;
1860
1861        /*
1862         * 0x0  to 0x10: 32MB increments starting at 0MB
1863         * 0x11 to 0x16: 4MB increments starting at 8MB
1864         * 0x17 to 0x1d: 4MB increments start at 36MB
1865         */
1866        if (gmch_ctrl < 0x11)
1867                return gmch_ctrl << 25;
1868        else if (gmch_ctrl < 0x17)
1869                return (gmch_ctrl - 0x11 + 2) << 22;
1870        else
1871                return (gmch_ctrl - 0x17 + 9) << 22;
1872}
1873
1874static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
1875{
1876        gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
1877        gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
1878
1879        if (gen9_gmch_ctl < 0xf0)
1880                return gen9_gmch_ctl << 25; /* 32 MB units */
1881        else
1882                /* 4MB increments starting at 0xf0 for 4MB */
1883                return (gen9_gmch_ctl - 0xf0 + 1) << 22;
1884}
1885
1886static int ggtt_probe_common(struct drm_device *dev,
1887                             size_t gtt_size)
1888{
1889        struct drm_i915_private *dev_priv = dev->dev_private;
1890        phys_addr_t gtt_phys_addr;
1891        int ret;
1892
1893        /* For Modern GENs the PTEs and register space are split in the BAR */
1894        gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
1895                (pci_resource_len(dev->pdev, 0) / 2);
1896
1897        dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
1898        if (!dev_priv->gtt.gsm) {
1899                DRM_ERROR("Failed to map the gtt page table\n");
1900                return -ENOMEM;
1901        }
1902
1903        ret = setup_scratch_page(dev);
1904        if (ret) {
1905                DRM_ERROR("Scratch setup failed\n");
1906                /* iounmap will also get called at remove, but meh */
1907                iounmap(dev_priv->gtt.gsm);
1908        }
1909
1910        return ret;
1911}
1912
1913/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
1914 * bits. When using advanced contexts each context stores its own PAT, but
1915 * writing this data shouldn't be harmful even in those cases. */
1916static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
1917{
1918        uint64_t pat;
1919
1920        pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
1921              GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
1922              GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
1923              GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
1924              GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
1925              GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
1926              GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
1927              GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
1928
1929        if (!USES_PPGTT(dev_priv->dev))
1930                /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
1931                 * so RTL will always use the value corresponding to
1932                 * pat_sel = 000".
1933                 * So let's disable cache for GGTT to avoid screen corruptions.
1934                 * MOCS still can be used though.
1935                 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
1936                 * before this patch, i.e. the same uncached + snooping access
1937                 * like on gen6/7 seems to be in effect.
1938                 * - So this just fixes blitter/render access. Again it looks
1939                 * like it's not just uncached access, but uncached + snooping.
1940                 * So we can still hold onto all our assumptions wrt cpu
1941                 * clflushing on LLC machines.
1942                 */
1943                pat = GEN8_PPAT(0, GEN8_PPAT_UC);
1944
1945        /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
1946         * write would work. */
1947        I915_WRITE(GEN8_PRIVATE_PAT, pat);
1948        I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
1949}
1950
1951static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
1952{
1953        uint64_t pat;
1954
1955        /*
1956         * Map WB on BDW to snooped on CHV.
1957         *
1958         * Only the snoop bit has meaning for CHV, the rest is
1959         * ignored.
1960         *
1961         * The hardware will never snoop for certain types of accesses:
1962         * - CPU GTT (GMADR->GGTT->no snoop->memory)
1963         * - PPGTT page tables
1964         * - some other special cycles
1965         *
1966         * As with BDW, we also need to consider the following for GT accesses:
1967         * "For GGTT, there is NO pat_sel[2:0] from the entry,
1968         * so RTL will always use the value corresponding to
1969         * pat_sel = 000".
1970         * Which means we must set the snoop bit in PAT entry 0
1971         * in order to keep the global status page working.
1972         */
1973        pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
1974              GEN8_PPAT(1, 0) |
1975              GEN8_PPAT(2, 0) |
1976              GEN8_PPAT(3, 0) |
1977              GEN8_PPAT(4, CHV_PPAT_SNOOP) |
1978              GEN8_PPAT(5, CHV_PPAT_SNOOP) |
1979              GEN8_PPAT(6, CHV_PPAT_SNOOP) |
1980              GEN8_PPAT(7, CHV_PPAT_SNOOP);
1981
1982        I915_WRITE(GEN8_PRIVATE_PAT, pat);
1983        I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
1984}
1985
1986static int gen8_gmch_probe(struct drm_device *dev,
1987                           size_t *gtt_total,
1988                           size_t *stolen,
1989                           phys_addr_t *mappable_base,
1990                           unsigned long *mappable_end)
1991{
1992        struct drm_i915_private *dev_priv = dev->dev_private;
1993        unsigned int gtt_size;
1994        u16 snb_gmch_ctl;
1995        int ret;
1996
1997        /* TODO: We're not aware of mappable constraints on gen8 yet */
1998        *mappable_base = pci_resource_start(dev->pdev, 2);
1999        *mappable_end = pci_resource_len(dev->pdev, 2);
2000
2001        if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
2002                pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
2003
2004        pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
2005
2006        if (INTEL_INFO(dev)->gen >= 9) {
2007                *stolen = gen9_get_stolen_size(snb_gmch_ctl);
2008                gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
2009        } else if (IS_CHERRYVIEW(dev)) {
2010                *stolen = chv_get_stolen_size(snb_gmch_ctl);
2011                gtt_size = chv_get_total_gtt_size(snb_gmch_ctl);
2012        } else {
2013                *stolen = gen8_get_stolen_size(snb_gmch_ctl);
2014                gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
2015        }
2016
2017        *gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT;
2018
2019        if (IS_CHERRYVIEW(dev))
2020                chv_setup_private_ppat(dev_priv);
2021        else
2022                bdw_setup_private_ppat(dev_priv);
2023
2024        ret = ggtt_probe_common(dev, gtt_size);
2025
2026        dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
2027        dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
2028
2029        return ret;
2030}
2031
2032static int gen6_gmch_probe(struct drm_device *dev,
2033                           size_t *gtt_total,
2034                           size_t *stolen,
2035                           phys_addr_t *mappable_base,
2036                           unsigned long *mappable_end)
2037{
2038        struct drm_i915_private *dev_priv = dev->dev_private;
2039        unsigned int gtt_size;
2040        u16 snb_gmch_ctl;
2041        int ret;
2042
2043        *mappable_base = pci_resource_start(dev->pdev, 2);
2044        *mappable_end = pci_resource_len(dev->pdev, 2);
2045
2046        /* 64/512MB is the current min/max we actually know of, but this is just
2047         * a coarse sanity check.
2048         */
2049        if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
2050                DRM_ERROR("Unknown GMADR size (%lx)\n",
2051                          dev_priv->gtt.mappable_end);
2052                return -ENXIO;
2053        }
2054
2055        if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
2056                pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
2057        pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
2058
2059        *stolen = gen6_get_stolen_size(snb_gmch_ctl);
2060
2061        gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
2062        *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT;
2063
2064        ret = ggtt_probe_common(dev, gtt_size);
2065
2066        dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
2067        dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
2068
2069        return ret;
2070}
2071
2072static void gen6_gmch_remove(struct i915_address_space *vm)
2073{
2074
2075        struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
2076
2077        iounmap(gtt->gsm);
2078        teardown_scratch_page(vm->dev);
2079}
2080
2081static int i915_gmch_probe(struct drm_device *dev,
2082                           size_t *gtt_total,
2083                           size_t *stolen,
2084                           phys_addr_t *mappable_base,
2085                           unsigned long *mappable_end)
2086{
2087        struct drm_i915_private *dev_priv = dev->dev_private;
2088        int ret;
2089
2090        ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
2091        if (!ret) {
2092                DRM_ERROR("failed to set up gmch\n");
2093                return -EIO;
2094        }
2095
2096        intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
2097
2098        dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
2099        dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
2100
2101        if (unlikely(dev_priv->gtt.do_idle_maps))
2102                DRM_INFO("applying Ironlake quirks for intel_iommu\n");
2103
2104        return 0;
2105}
2106
2107static void i915_gmch_remove(struct i915_address_space *vm)
2108{
2109        intel_gmch_remove();
2110}
2111
2112int i915_gem_gtt_init(struct drm_device *dev)
2113{
2114        struct drm_i915_private *dev_priv = dev->dev_private;
2115        struct i915_gtt *gtt = &dev_priv->gtt;
2116        int ret;
2117
2118        if (INTEL_INFO(dev)->gen <= 5) {
2119                gtt->gtt_probe = i915_gmch_probe;
2120                gtt->base.cleanup = i915_gmch_remove;
2121        } else if (INTEL_INFO(dev)->gen < 8) {
2122                gtt->gtt_probe = gen6_gmch_probe;
2123                gtt->base.cleanup = gen6_gmch_remove;
2124                if (IS_HASWELL(dev) && dev_priv->ellc_size)
2125                        gtt->base.pte_encode = iris_pte_encode;
2126                else if (IS_HASWELL(dev))
2127                        gtt->base.pte_encode = hsw_pte_encode;
2128                else if (IS_VALLEYVIEW(dev))
2129                        gtt->base.pte_encode = byt_pte_encode;
2130                else if (INTEL_INFO(dev)->gen >= 7)
2131                        gtt->base.pte_encode = ivb_pte_encode;
2132                else
2133                        gtt->base.pte_encode = snb_pte_encode;
2134        } else {
2135                dev_priv->gtt.gtt_probe = gen8_gmch_probe;
2136                dev_priv->gtt.base.cleanup = gen6_gmch_remove;
2137        }
2138
2139        ret = gtt->gtt_probe(dev, &gtt->base.total, &gtt->stolen_size,
2140                             &gtt->mappable_base, &gtt->mappable_end);
2141        if (ret)
2142                return ret;
2143
2144        gtt->base.dev = dev;
2145
2146        /* GMADR is the PCI mmio aperture into the global GTT. */
2147        DRM_INFO("Memory usable by graphics device = %zdM\n",
2148                 gtt->base.total >> 20);
2149        DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20);
2150        DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
2151#ifdef CONFIG_INTEL_IOMMU
2152        if (intel_iommu_gfx_mapped)
2153                DRM_INFO("VT-d active for gfx access\n");
2154#endif
2155        /*
2156         * i915.enable_ppgtt is read-only, so do an early pass to validate the
2157         * user's requested state against the hardware/driver capabilities.  We
2158         * do this now so that we can print out any log messages once rather
2159         * than every time we check intel_enable_ppgtt().
2160         */
2161        i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt);
2162        DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
2163
2164        return 0;
2165}
2166
2167static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj,
2168                                              struct i915_address_space *vm)
2169{
2170        struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
2171        if (vma == NULL)
2172                return ERR_PTR(-ENOMEM);
2173
2174        INIT_LIST_HEAD(&vma->vma_link);
2175        INIT_LIST_HEAD(&vma->mm_list);
2176        INIT_LIST_HEAD(&vma->exec_list);
2177        vma->vm = vm;
2178        vma->obj = obj;
2179
2180        switch (INTEL_INFO(vm->dev)->gen) {
2181        case 9:
2182        case 8:
2183        case 7:
2184        case 6:
2185                if (i915_is_ggtt(vm)) {
2186                        vma->unbind_vma = ggtt_unbind_vma;
2187                        vma->bind_vma = ggtt_bind_vma;
2188                } else {
2189                        vma->unbind_vma = ppgtt_unbind_vma;
2190                        vma->bind_vma = ppgtt_bind_vma;
2191                }
2192                break;
2193        case 5:
2194        case 4:
2195        case 3:
2196        case 2:
2197                BUG_ON(!i915_is_ggtt(vm));
2198                vma->unbind_vma = i915_ggtt_unbind_vma;
2199                vma->bind_vma = i915_ggtt_bind_vma;
2200                break;
2201        default:
2202                BUG();
2203        }
2204
2205        /* Keep GGTT vmas first to make debug easier */
2206        if (i915_is_ggtt(vm))
2207                list_add(&vma->vma_link, &obj->vma_list);
2208        else {
2209                list_add_tail(&vma->vma_link, &obj->vma_list);
2210                i915_ppgtt_get(i915_vm_to_ppgtt(vm));
2211        }
2212
2213        return vma;
2214}
2215
2216struct i915_vma *
2217i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
2218                                  struct i915_address_space *vm)
2219{
2220        struct i915_vma *vma;
2221
2222        vma = i915_gem_obj_to_vma(obj, vm);
2223        if (!vma)
2224                vma = __i915_gem_vma_create(obj, vm);
2225
2226        return vma;
2227}
2228