linux/drivers/gpu/drm/i915/i915_gem_gtt.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2010 Daniel Vetter
   3 * Copyright © 2011-2014 Intel Corporation
   4 *
   5 * Permission is hereby granted, free of charge, to any person obtaining a
   6 * copy of this software and associated documentation files (the "Software"),
   7 * to deal in the Software without restriction, including without limitation
   8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9 * and/or sell copies of the Software, and to permit persons to whom the
  10 * Software is furnished to do so, subject to the following conditions:
  11 *
  12 * The above copyright notice and this permission notice (including the next
  13 * paragraph) shall be included in all copies or substantial portions of the
  14 * Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22 * IN THE SOFTWARE.
  23 *
  24 */
  25
  26#include <linux/seq_file.h>
  27#include <linux/stop_machine.h>
  28#include <drm/drmP.h>
  29#include <drm/i915_drm.h>
  30#include "i915_drv.h"
  31#include "i915_vgpu.h"
  32#include "i915_trace.h"
  33#include "intel_drv.h"
  34
  35/**
  36 * DOC: Global GTT views
  37 *
  38 * Background and previous state
  39 *
  40 * Historically objects could exists (be bound) in global GTT space only as
  41 * singular instances with a view representing all of the object's backing pages
  42 * in a linear fashion. This view will be called a normal view.
  43 *
  44 * To support multiple views of the same object, where the number of mapped
  45 * pages is not equal to the backing store, or where the layout of the pages
  46 * is not linear, concept of a GGTT view was added.
  47 *
  48 * One example of an alternative view is a stereo display driven by a single
  49 * image. In this case we would have a framebuffer looking like this
  50 * (2x2 pages):
  51 *
  52 *    12
  53 *    34
  54 *
  55 * Above would represent a normal GGTT view as normally mapped for GPU or CPU
  56 * rendering. In contrast, fed to the display engine would be an alternative
  57 * view which could look something like this:
  58 *
  59 *   1212
  60 *   3434
  61 *
  62 * In this example both the size and layout of pages in the alternative view is
  63 * different from the normal view.
  64 *
  65 * Implementation and usage
  66 *
  67 * GGTT views are implemented using VMAs and are distinguished via enum
  68 * i915_ggtt_view_type and struct i915_ggtt_view.
  69 *
  70 * A new flavour of core GEM functions which work with GGTT bound objects were
  71 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
  72 * renaming  in large amounts of code. They take the struct i915_ggtt_view
  73 * parameter encapsulating all metadata required to implement a view.
  74 *
  75 * As a helper for callers which are only interested in the normal view,
  76 * globally const i915_ggtt_view_normal singleton instance exists. All old core
  77 * GEM API functions, the ones not taking the view parameter, are operating on,
  78 * or with the normal GGTT view.
  79 *
  80 * Code wanting to add or use a new GGTT view needs to:
  81 *
  82 * 1. Add a new enum with a suitable name.
  83 * 2. Extend the metadata in the i915_ggtt_view structure if required.
  84 * 3. Add support to i915_get_vma_pages().
  85 *
  86 * New views are required to build a scatter-gather table from within the
  87 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
  88 * exists for the lifetime of an VMA.
  89 *
  90 * Core API is designed to have copy semantics which means that passed in
  91 * struct i915_ggtt_view does not need to be persistent (left around after
  92 * calling the core API functions).
  93 *
  94 */
  95
  96static int
  97i915_get_ggtt_vma_pages(struct i915_vma *vma);
  98
  99const struct i915_ggtt_view i915_ggtt_view_normal = {
 100        .type = I915_GGTT_VIEW_NORMAL,
 101};
 102const struct i915_ggtt_view i915_ggtt_view_rotated = {
 103        .type = I915_GGTT_VIEW_ROTATED,
 104};
 105
 106static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
 107{
 108        bool has_aliasing_ppgtt;
 109        bool has_full_ppgtt;
 110        bool has_full_48bit_ppgtt;
 111
 112        has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6;
 113        has_full_ppgtt = INTEL_INFO(dev)->gen >= 7;
 114        has_full_48bit_ppgtt = IS_BROADWELL(dev) || INTEL_INFO(dev)->gen >= 9;
 115
 116        if (intel_vgpu_active(dev))
 117                has_full_ppgtt = false; /* emulation is too hard */
 118
 119        /*
 120         * We don't allow disabling PPGTT for gen9+ as it's a requirement for
 121         * execlists, the sole mechanism available to submit work.
 122         */
 123        if (INTEL_INFO(dev)->gen < 9 &&
 124            (enable_ppgtt == 0 || !has_aliasing_ppgtt))
 125                return 0;
 126
 127        if (enable_ppgtt == 1)
 128                return 1;
 129
 130        if (enable_ppgtt == 2 && has_full_ppgtt)
 131                return 2;
 132
 133        if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
 134                return 3;
 135
 136#ifdef CONFIG_INTEL_IOMMU
 137        /* Disable ppgtt on SNB if VT-d is on. */
 138        if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
 139                DRM_INFO("Disabling PPGTT because VT-d is on\n");
 140                return 0;
 141        }
 142#endif
 143
 144        /* Early VLV doesn't have this */
 145        if (IS_VALLEYVIEW(dev) && dev->pdev->revision < 0xb) {
 146                DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
 147                return 0;
 148        }
 149
 150        if (INTEL_INFO(dev)->gen >= 8 && i915.enable_execlists)
 151                return has_full_48bit_ppgtt ? 3 : 2;
 152        else
 153                return has_aliasing_ppgtt ? 1 : 0;
 154}
 155
 156static int ppgtt_bind_vma(struct i915_vma *vma,
 157                          enum i915_cache_level cache_level,
 158                          u32 unused)
 159{
 160        u32 pte_flags = 0;
 161
 162        /* Currently applicable only to VLV */
 163        if (vma->obj->gt_ro)
 164                pte_flags |= PTE_READ_ONLY;
 165
 166        vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
 167                                cache_level, pte_flags);
 168
 169        return 0;
 170}
 171
 172static void ppgtt_unbind_vma(struct i915_vma *vma)
 173{
 174        vma->vm->clear_range(vma->vm,
 175                             vma->node.start,
 176                             vma->obj->base.size,
 177                             true);
 178}
 179
 180static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
 181                                  enum i915_cache_level level,
 182                                  bool valid)
 183{
 184        gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
 185        pte |= addr;
 186
 187        switch (level) {
 188        case I915_CACHE_NONE:
 189                pte |= PPAT_UNCACHED_INDEX;
 190                break;
 191        case I915_CACHE_WT:
 192                pte |= PPAT_DISPLAY_ELLC_INDEX;
 193                break;
 194        default:
 195                pte |= PPAT_CACHED_INDEX;
 196                break;
 197        }
 198
 199        return pte;
 200}
 201
 202static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
 203                                  const enum i915_cache_level level)
 204{
 205        gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
 206        pde |= addr;
 207        if (level != I915_CACHE_NONE)
 208                pde |= PPAT_CACHED_PDE_INDEX;
 209        else
 210                pde |= PPAT_UNCACHED_INDEX;
 211        return pde;
 212}
 213
 214#define gen8_pdpe_encode gen8_pde_encode
 215#define gen8_pml4e_encode gen8_pde_encode
 216
 217static gen6_pte_t snb_pte_encode(dma_addr_t addr,
 218                                 enum i915_cache_level level,
 219                                 bool valid, u32 unused)
 220{
 221        gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
 222        pte |= GEN6_PTE_ADDR_ENCODE(addr);
 223
 224        switch (level) {
 225        case I915_CACHE_L3_LLC:
 226        case I915_CACHE_LLC:
 227                pte |= GEN6_PTE_CACHE_LLC;
 228                break;
 229        case I915_CACHE_NONE:
 230                pte |= GEN6_PTE_UNCACHED;
 231                break;
 232        default:
 233                MISSING_CASE(level);
 234        }
 235
 236        return pte;
 237}
 238
 239static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
 240                                 enum i915_cache_level level,
 241                                 bool valid, u32 unused)
 242{
 243        gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
 244        pte |= GEN6_PTE_ADDR_ENCODE(addr);
 245
 246        switch (level) {
 247        case I915_CACHE_L3_LLC:
 248                pte |= GEN7_PTE_CACHE_L3_LLC;
 249                break;
 250        case I915_CACHE_LLC:
 251                pte |= GEN6_PTE_CACHE_LLC;
 252                break;
 253        case I915_CACHE_NONE:
 254                pte |= GEN6_PTE_UNCACHED;
 255                break;
 256        default:
 257                MISSING_CASE(level);
 258        }
 259
 260        return pte;
 261}
 262
 263static gen6_pte_t byt_pte_encode(dma_addr_t addr,
 264                                 enum i915_cache_level level,
 265                                 bool valid, u32 flags)
 266{
 267        gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
 268        pte |= GEN6_PTE_ADDR_ENCODE(addr);
 269
 270        if (!(flags & PTE_READ_ONLY))
 271                pte |= BYT_PTE_WRITEABLE;
 272
 273        if (level != I915_CACHE_NONE)
 274                pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
 275
 276        return pte;
 277}
 278
 279static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
 280                                 enum i915_cache_level level,
 281                                 bool valid, u32 unused)
 282{
 283        gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
 284        pte |= HSW_PTE_ADDR_ENCODE(addr);
 285
 286        if (level != I915_CACHE_NONE)
 287                pte |= HSW_WB_LLC_AGE3;
 288
 289        return pte;
 290}
 291
 292static gen6_pte_t iris_pte_encode(dma_addr_t addr,
 293                                  enum i915_cache_level level,
 294                                  bool valid, u32 unused)
 295{
 296        gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
 297        pte |= HSW_PTE_ADDR_ENCODE(addr);
 298
 299        switch (level) {
 300        case I915_CACHE_NONE:
 301                break;
 302        case I915_CACHE_WT:
 303                pte |= HSW_WT_ELLC_LLC_AGE3;
 304                break;
 305        default:
 306                pte |= HSW_WB_ELLC_LLC_AGE3;
 307                break;
 308        }
 309
 310        return pte;
 311}
 312
 313static int __setup_page_dma(struct drm_device *dev,
 314                            struct i915_page_dma *p, gfp_t flags)
 315{
 316        struct device *device = &dev->pdev->dev;
 317
 318        p->page = alloc_page(flags);
 319        if (!p->page)
 320                return -ENOMEM;
 321
 322        p->daddr = dma_map_page(device,
 323                                p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
 324
 325        if (dma_mapping_error(device, p->daddr)) {
 326                __free_page(p->page);
 327                return -EINVAL;
 328        }
 329
 330        return 0;
 331}
 332
 333static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
 334{
 335        return __setup_page_dma(dev, p, GFP_KERNEL);
 336}
 337
 338static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
 339{
 340        if (WARN_ON(!p->page))
 341                return;
 342
 343        dma_unmap_page(&dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL);
 344        __free_page(p->page);
 345        memset(p, 0, sizeof(*p));
 346}
 347
 348static void *kmap_page_dma(struct i915_page_dma *p)
 349{
 350        return kmap_atomic(p->page);
 351}
 352
 353/* We use the flushing unmap only with ppgtt structures:
 354 * page directories, page tables and scratch pages.
 355 */
 356static void kunmap_page_dma(struct drm_device *dev, void *vaddr)
 357{
 358        /* There are only few exceptions for gen >=6. chv and bxt.
 359         * And we are not sure about the latter so play safe for now.
 360         */
 361        if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
 362                drm_clflush_virt_range(vaddr, PAGE_SIZE);
 363
 364        kunmap_atomic(vaddr);
 365}
 366
 367#define kmap_px(px) kmap_page_dma(px_base(px))
 368#define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr))
 369
 370#define setup_px(dev, px) setup_page_dma((dev), px_base(px))
 371#define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px))
 372#define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v))
 373#define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v))
 374
 375static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
 376                          const uint64_t val)
 377{
 378        int i;
 379        uint64_t * const vaddr = kmap_page_dma(p);
 380
 381        for (i = 0; i < 512; i++)
 382                vaddr[i] = val;
 383
 384        kunmap_page_dma(dev, vaddr);
 385}
 386
 387static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
 388                             const uint32_t val32)
 389{
 390        uint64_t v = val32;
 391
 392        v = v << 32 | val32;
 393
 394        fill_page_dma(dev, p, v);
 395}
 396
 397static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev)
 398{
 399        struct i915_page_scratch *sp;
 400        int ret;
 401
 402        sp = kzalloc(sizeof(*sp), GFP_KERNEL);
 403        if (sp == NULL)
 404                return ERR_PTR(-ENOMEM);
 405
 406        ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO);
 407        if (ret) {
 408                kfree(sp);
 409                return ERR_PTR(ret);
 410        }
 411
 412        set_pages_uc(px_page(sp), 1);
 413
 414        return sp;
 415}
 416
 417static void free_scratch_page(struct drm_device *dev,
 418                              struct i915_page_scratch *sp)
 419{
 420        set_pages_wb(px_page(sp), 1);
 421
 422        cleanup_px(dev, sp);
 423        kfree(sp);
 424}
 425
 426static struct i915_page_table *alloc_pt(struct drm_device *dev)
 427{
 428        struct i915_page_table *pt;
 429        const size_t count = INTEL_INFO(dev)->gen >= 8 ?
 430                GEN8_PTES : GEN6_PTES;
 431        int ret = -ENOMEM;
 432
 433        pt = kzalloc(sizeof(*pt), GFP_KERNEL);
 434        if (!pt)
 435                return ERR_PTR(-ENOMEM);
 436
 437        pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
 438                                GFP_KERNEL);
 439
 440        if (!pt->used_ptes)
 441                goto fail_bitmap;
 442
 443        ret = setup_px(dev, pt);
 444        if (ret)
 445                goto fail_page_m;
 446
 447        return pt;
 448
 449fail_page_m:
 450        kfree(pt->used_ptes);
 451fail_bitmap:
 452        kfree(pt);
 453
 454        return ERR_PTR(ret);
 455}
 456
 457static void free_pt(struct drm_device *dev, struct i915_page_table *pt)
 458{
 459        cleanup_px(dev, pt);
 460        kfree(pt->used_ptes);
 461        kfree(pt);
 462}
 463
 464static void gen8_initialize_pt(struct i915_address_space *vm,
 465                               struct i915_page_table *pt)
 466{
 467        gen8_pte_t scratch_pte;
 468
 469        scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
 470                                      I915_CACHE_LLC, true);
 471
 472        fill_px(vm->dev, pt, scratch_pte);
 473}
 474
 475static void gen6_initialize_pt(struct i915_address_space *vm,
 476                               struct i915_page_table *pt)
 477{
 478        gen6_pte_t scratch_pte;
 479
 480        WARN_ON(px_dma(vm->scratch_page) == 0);
 481
 482        scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
 483                                     I915_CACHE_LLC, true, 0);
 484
 485        fill32_px(vm->dev, pt, scratch_pte);
 486}
 487
 488static struct i915_page_directory *alloc_pd(struct drm_device *dev)
 489{
 490        struct i915_page_directory *pd;
 491        int ret = -ENOMEM;
 492
 493        pd = kzalloc(sizeof(*pd), GFP_KERNEL);
 494        if (!pd)
 495                return ERR_PTR(-ENOMEM);
 496
 497        pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
 498                                sizeof(*pd->used_pdes), GFP_KERNEL);
 499        if (!pd->used_pdes)
 500                goto fail_bitmap;
 501
 502        ret = setup_px(dev, pd);
 503        if (ret)
 504                goto fail_page_m;
 505
 506        return pd;
 507
 508fail_page_m:
 509        kfree(pd->used_pdes);
 510fail_bitmap:
 511        kfree(pd);
 512
 513        return ERR_PTR(ret);
 514}
 515
 516static void free_pd(struct drm_device *dev, struct i915_page_directory *pd)
 517{
 518        if (px_page(pd)) {
 519                cleanup_px(dev, pd);
 520                kfree(pd->used_pdes);
 521                kfree(pd);
 522        }
 523}
 524
 525static void gen8_initialize_pd(struct i915_address_space *vm,
 526                               struct i915_page_directory *pd)
 527{
 528        gen8_pde_t scratch_pde;
 529
 530        scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
 531
 532        fill_px(vm->dev, pd, scratch_pde);
 533}
 534
 535static int __pdp_init(struct drm_device *dev,
 536                      struct i915_page_directory_pointer *pdp)
 537{
 538        size_t pdpes = I915_PDPES_PER_PDP(dev);
 539
 540        pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
 541                                  sizeof(unsigned long),
 542                                  GFP_KERNEL);
 543        if (!pdp->used_pdpes)
 544                return -ENOMEM;
 545
 546        pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
 547                                      GFP_KERNEL);
 548        if (!pdp->page_directory) {
 549                kfree(pdp->used_pdpes);
 550                /* the PDP might be the statically allocated top level. Keep it
 551                 * as clean as possible */
 552                pdp->used_pdpes = NULL;
 553                return -ENOMEM;
 554        }
 555
 556        return 0;
 557}
 558
 559static void __pdp_fini(struct i915_page_directory_pointer *pdp)
 560{
 561        kfree(pdp->used_pdpes);
 562        kfree(pdp->page_directory);
 563        pdp->page_directory = NULL;
 564}
 565
 566static struct
 567i915_page_directory_pointer *alloc_pdp(struct drm_device *dev)
 568{
 569        struct i915_page_directory_pointer *pdp;
 570        int ret = -ENOMEM;
 571
 572        WARN_ON(!USES_FULL_48BIT_PPGTT(dev));
 573
 574        pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
 575        if (!pdp)
 576                return ERR_PTR(-ENOMEM);
 577
 578        ret = __pdp_init(dev, pdp);
 579        if (ret)
 580                goto fail_bitmap;
 581
 582        ret = setup_px(dev, pdp);
 583        if (ret)
 584                goto fail_page_m;
 585
 586        return pdp;
 587
 588fail_page_m:
 589        __pdp_fini(pdp);
 590fail_bitmap:
 591        kfree(pdp);
 592
 593        return ERR_PTR(ret);
 594}
 595
 596static void free_pdp(struct drm_device *dev,
 597                     struct i915_page_directory_pointer *pdp)
 598{
 599        __pdp_fini(pdp);
 600        if (USES_FULL_48BIT_PPGTT(dev)) {
 601                cleanup_px(dev, pdp);
 602                kfree(pdp);
 603        }
 604}
 605
 606static void gen8_initialize_pdp(struct i915_address_space *vm,
 607                                struct i915_page_directory_pointer *pdp)
 608{
 609        gen8_ppgtt_pdpe_t scratch_pdpe;
 610
 611        scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
 612
 613        fill_px(vm->dev, pdp, scratch_pdpe);
 614}
 615
 616static void gen8_initialize_pml4(struct i915_address_space *vm,
 617                                 struct i915_pml4 *pml4)
 618{
 619        gen8_ppgtt_pml4e_t scratch_pml4e;
 620
 621        scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
 622                                          I915_CACHE_LLC);
 623
 624        fill_px(vm->dev, pml4, scratch_pml4e);
 625}
 626
 627static void
 628gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
 629                          struct i915_page_directory_pointer *pdp,
 630                          struct i915_page_directory *pd,
 631                          int index)
 632{
 633        gen8_ppgtt_pdpe_t *page_directorypo;
 634
 635        if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
 636                return;
 637
 638        page_directorypo = kmap_px(pdp);
 639        page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
 640        kunmap_px(ppgtt, page_directorypo);
 641}
 642
 643static void
 644gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
 645                                  struct i915_pml4 *pml4,
 646                                  struct i915_page_directory_pointer *pdp,
 647                                  int index)
 648{
 649        gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
 650
 651        WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev));
 652        pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
 653        kunmap_px(ppgtt, pagemap);
 654}
 655
 656/* Broadwell Page Directory Pointer Descriptors */
 657static int gen8_write_pdp(struct drm_i915_gem_request *req,
 658                          unsigned entry,
 659                          dma_addr_t addr)
 660{
 661        struct intel_engine_cs *ring = req->ring;
 662        int ret;
 663
 664        BUG_ON(entry >= 4);
 665
 666        ret = intel_ring_begin(req, 6);
 667        if (ret)
 668                return ret;
 669
 670        intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
 671        intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(ring, entry));
 672        intel_ring_emit(ring, upper_32_bits(addr));
 673        intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
 674        intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(ring, entry));
 675        intel_ring_emit(ring, lower_32_bits(addr));
 676        intel_ring_advance(ring);
 677
 678        return 0;
 679}
 680
 681static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
 682                                 struct drm_i915_gem_request *req)
 683{
 684        int i, ret;
 685
 686        for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
 687                const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
 688
 689                ret = gen8_write_pdp(req, i, pd_daddr);
 690                if (ret)
 691                        return ret;
 692        }
 693
 694        return 0;
 695}
 696
 697static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
 698                              struct drm_i915_gem_request *req)
 699{
 700        return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
 701}
 702
 703static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
 704                                       struct i915_page_directory_pointer *pdp,
 705                                       uint64_t start,
 706                                       uint64_t length,
 707                                       gen8_pte_t scratch_pte)
 708{
 709        struct i915_hw_ppgtt *ppgtt =
 710                container_of(vm, struct i915_hw_ppgtt, base);
 711        gen8_pte_t *pt_vaddr;
 712        unsigned pdpe = gen8_pdpe_index(start);
 713        unsigned pde = gen8_pde_index(start);
 714        unsigned pte = gen8_pte_index(start);
 715        unsigned num_entries = length >> PAGE_SHIFT;
 716        unsigned last_pte, i;
 717
 718        if (WARN_ON(!pdp))
 719                return;
 720
 721        while (num_entries) {
 722                struct i915_page_directory *pd;
 723                struct i915_page_table *pt;
 724
 725                if (WARN_ON(!pdp->page_directory[pdpe]))
 726                        break;
 727
 728                pd = pdp->page_directory[pdpe];
 729
 730                if (WARN_ON(!pd->page_table[pde]))
 731                        break;
 732
 733                pt = pd->page_table[pde];
 734
 735                if (WARN_ON(!px_page(pt)))
 736                        break;
 737
 738                last_pte = pte + num_entries;
 739                if (last_pte > GEN8_PTES)
 740                        last_pte = GEN8_PTES;
 741
 742                pt_vaddr = kmap_px(pt);
 743
 744                for (i = pte; i < last_pte; i++) {
 745                        pt_vaddr[i] = scratch_pte;
 746                        num_entries--;
 747                }
 748
 749                kunmap_px(ppgtt, pt);
 750
 751                pte = 0;
 752                if (++pde == I915_PDES) {
 753                        if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
 754                                break;
 755                        pde = 0;
 756                }
 757        }
 758}
 759
 760static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 761                                   uint64_t start,
 762                                   uint64_t length,
 763                                   bool use_scratch)
 764{
 765        struct i915_hw_ppgtt *ppgtt =
 766                container_of(vm, struct i915_hw_ppgtt, base);
 767        gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
 768                                                 I915_CACHE_LLC, use_scratch);
 769
 770        if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
 771                gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
 772                                           scratch_pte);
 773        } else {
 774                uint64_t pml4e;
 775                struct i915_page_directory_pointer *pdp;
 776
 777                gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
 778                        gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
 779                                                   scratch_pte);
 780                }
 781        }
 782}
 783
 784static void
 785gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
 786                              struct i915_page_directory_pointer *pdp,
 787                              struct sg_page_iter *sg_iter,
 788                              uint64_t start,
 789                              enum i915_cache_level cache_level)
 790{
 791        struct i915_hw_ppgtt *ppgtt =
 792                container_of(vm, struct i915_hw_ppgtt, base);
 793        gen8_pte_t *pt_vaddr;
 794        unsigned pdpe = gen8_pdpe_index(start);
 795        unsigned pde = gen8_pde_index(start);
 796        unsigned pte = gen8_pte_index(start);
 797
 798        pt_vaddr = NULL;
 799
 800        while (__sg_page_iter_next(sg_iter)) {
 801                if (pt_vaddr == NULL) {
 802                        struct i915_page_directory *pd = pdp->page_directory[pdpe];
 803                        struct i915_page_table *pt = pd->page_table[pde];
 804                        pt_vaddr = kmap_px(pt);
 805                }
 806
 807                pt_vaddr[pte] =
 808                        gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
 809                                        cache_level, true);
 810                if (++pte == GEN8_PTES) {
 811                        kunmap_px(ppgtt, pt_vaddr);
 812                        pt_vaddr = NULL;
 813                        if (++pde == I915_PDES) {
 814                                if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
 815                                        break;
 816                                pde = 0;
 817                        }
 818                        pte = 0;
 819                }
 820        }
 821
 822        if (pt_vaddr)
 823                kunmap_px(ppgtt, pt_vaddr);
 824}
 825
 826static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 827                                      struct sg_table *pages,
 828                                      uint64_t start,
 829                                      enum i915_cache_level cache_level,
 830                                      u32 unused)
 831{
 832        struct i915_hw_ppgtt *ppgtt =
 833                container_of(vm, struct i915_hw_ppgtt, base);
 834        struct sg_page_iter sg_iter;
 835
 836        __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
 837
 838        if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
 839                gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
 840                                              cache_level);
 841        } else {
 842                struct i915_page_directory_pointer *pdp;
 843                uint64_t pml4e;
 844                uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
 845
 846                gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
 847                        gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
 848                                                      start, cache_level);
 849                }
 850        }
 851}
 852
 853static void gen8_free_page_tables(struct drm_device *dev,
 854                                  struct i915_page_directory *pd)
 855{
 856        int i;
 857
 858        if (!px_page(pd))
 859                return;
 860
 861        for_each_set_bit(i, pd->used_pdes, I915_PDES) {
 862                if (WARN_ON(!pd->page_table[i]))
 863                        continue;
 864
 865                free_pt(dev, pd->page_table[i]);
 866                pd->page_table[i] = NULL;
 867        }
 868}
 869
 870static int gen8_init_scratch(struct i915_address_space *vm)
 871{
 872        struct drm_device *dev = vm->dev;
 873
 874        vm->scratch_page = alloc_scratch_page(dev);
 875        if (IS_ERR(vm->scratch_page))
 876                return PTR_ERR(vm->scratch_page);
 877
 878        vm->scratch_pt = alloc_pt(dev);
 879        if (IS_ERR(vm->scratch_pt)) {
 880                free_scratch_page(dev, vm->scratch_page);
 881                return PTR_ERR(vm->scratch_pt);
 882        }
 883
 884        vm->scratch_pd = alloc_pd(dev);
 885        if (IS_ERR(vm->scratch_pd)) {
 886                free_pt(dev, vm->scratch_pt);
 887                free_scratch_page(dev, vm->scratch_page);
 888                return PTR_ERR(vm->scratch_pd);
 889        }
 890
 891        if (USES_FULL_48BIT_PPGTT(dev)) {
 892                vm->scratch_pdp = alloc_pdp(dev);
 893                if (IS_ERR(vm->scratch_pdp)) {
 894                        free_pd(dev, vm->scratch_pd);
 895                        free_pt(dev, vm->scratch_pt);
 896                        free_scratch_page(dev, vm->scratch_page);
 897                        return PTR_ERR(vm->scratch_pdp);
 898                }
 899        }
 900
 901        gen8_initialize_pt(vm, vm->scratch_pt);
 902        gen8_initialize_pd(vm, vm->scratch_pd);
 903        if (USES_FULL_48BIT_PPGTT(dev))
 904                gen8_initialize_pdp(vm, vm->scratch_pdp);
 905
 906        return 0;
 907}
 908
 909static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
 910{
 911        enum vgt_g2v_type msg;
 912        struct drm_device *dev = ppgtt->base.dev;
 913        struct drm_i915_private *dev_priv = dev->dev_private;
 914        int i;
 915
 916        if (USES_FULL_48BIT_PPGTT(dev)) {
 917                u64 daddr = px_dma(&ppgtt->pml4);
 918
 919                I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
 920                I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
 921
 922                msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
 923                                VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
 924        } else {
 925                for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
 926                        u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
 927
 928                        I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
 929                        I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
 930                }
 931
 932                msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
 933                                VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
 934        }
 935
 936        I915_WRITE(vgtif_reg(g2v_notify), msg);
 937
 938        return 0;
 939}
 940
 941static void gen8_free_scratch(struct i915_address_space *vm)
 942{
 943        struct drm_device *dev = vm->dev;
 944
 945        if (USES_FULL_48BIT_PPGTT(dev))
 946                free_pdp(dev, vm->scratch_pdp);
 947        free_pd(dev, vm->scratch_pd);
 948        free_pt(dev, vm->scratch_pt);
 949        free_scratch_page(dev, vm->scratch_page);
 950}
 951
 952static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
 953                                    struct i915_page_directory_pointer *pdp)
 954{
 955        int i;
 956
 957        for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
 958                if (WARN_ON(!pdp->page_directory[i]))
 959                        continue;
 960
 961                gen8_free_page_tables(dev, pdp->page_directory[i]);
 962                free_pd(dev, pdp->page_directory[i]);
 963        }
 964
 965        free_pdp(dev, pdp);
 966}
 967
 968static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
 969{
 970        int i;
 971
 972        for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
 973                if (WARN_ON(!ppgtt->pml4.pdps[i]))
 974                        continue;
 975
 976                gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]);
 977        }
 978
 979        cleanup_px(ppgtt->base.dev, &ppgtt->pml4);
 980}
 981
 982static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 983{
 984        struct i915_hw_ppgtt *ppgtt =
 985                container_of(vm, struct i915_hw_ppgtt, base);
 986
 987        if (intel_vgpu_active(vm->dev))
 988                gen8_ppgtt_notify_vgt(ppgtt, false);
 989
 990        if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
 991                gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp);
 992        else
 993                gen8_ppgtt_cleanup_4lvl(ppgtt);
 994
 995        gen8_free_scratch(vm);
 996}
 997
 998/**
 999 * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
1000 * @vm: Master vm structure.
1001 * @pd: Page directory for this address range.
1002 * @start:      Starting virtual address to begin allocations.
1003 * @length:     Size of the allocations.
1004 * @new_pts:    Bitmap set by function with new allocations. Likely used by the
1005 *              caller to free on error.
1006 *
1007 * Allocate the required number of page tables. Extremely similar to
1008 * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
1009 * the page directory boundary (instead of the page directory pointer). That
1010 * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
1011 * possible, and likely that the caller will need to use multiple calls of this
1012 * function to achieve the appropriate allocation.
1013 *
1014 * Return: 0 if success; negative error code otherwise.
1015 */
1016static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
1017                                     struct i915_page_directory *pd,
1018                                     uint64_t start,
1019                                     uint64_t length,
1020                                     unsigned long *new_pts)
1021{
1022        struct drm_device *dev = vm->dev;
1023        struct i915_page_table *pt;
1024        uint32_t pde;
1025
1026        gen8_for_each_pde(pt, pd, start, length, pde) {
1027                /* Don't reallocate page tables */
1028                if (test_bit(pde, pd->used_pdes)) {
1029                        /* Scratch is never allocated this way */
1030                        WARN_ON(pt == vm->scratch_pt);
1031                        continue;
1032                }
1033
1034                pt = alloc_pt(dev);
1035                if (IS_ERR(pt))
1036                        goto unwind_out;
1037
1038                gen8_initialize_pt(vm, pt);
1039                pd->page_table[pde] = pt;
1040                __set_bit(pde, new_pts);
1041                trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
1042        }
1043
1044        return 0;
1045
1046unwind_out:
1047        for_each_set_bit(pde, new_pts, I915_PDES)
1048                free_pt(dev, pd->page_table[pde]);
1049
1050        return -ENOMEM;
1051}
1052
1053/**
1054 * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
1055 * @vm: Master vm structure.
1056 * @pdp:        Page directory pointer for this address range.
1057 * @start:      Starting virtual address to begin allocations.
1058 * @length:     Size of the allocations.
1059 * @new_pds:    Bitmap set by function with new allocations. Likely used by the
1060 *              caller to free on error.
1061 *
1062 * Allocate the required number of page directories starting at the pde index of
1063 * @start, and ending at the pde index @start + @length. This function will skip
1064 * over already allocated page directories within the range, and only allocate
1065 * new ones, setting the appropriate pointer within the pdp as well as the
1066 * correct position in the bitmap @new_pds.
1067 *
1068 * The function will only allocate the pages within the range for a give page
1069 * directory pointer. In other words, if @start + @length straddles a virtually
1070 * addressed PDP boundary (512GB for 4k pages), there will be more allocations
1071 * required by the caller, This is not currently possible, and the BUG in the
1072 * code will prevent it.
1073 *
1074 * Return: 0 if success; negative error code otherwise.
1075 */
1076static int
1077gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
1078                                  struct i915_page_directory_pointer *pdp,
1079                                  uint64_t start,
1080                                  uint64_t length,
1081                                  unsigned long *new_pds)
1082{
1083        struct drm_device *dev = vm->dev;
1084        struct i915_page_directory *pd;
1085        uint32_t pdpe;
1086        uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1087
1088        WARN_ON(!bitmap_empty(new_pds, pdpes));
1089
1090        gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1091                if (test_bit(pdpe, pdp->used_pdpes))
1092                        continue;
1093
1094                pd = alloc_pd(dev);
1095                if (IS_ERR(pd))
1096                        goto unwind_out;
1097
1098                gen8_initialize_pd(vm, pd);
1099                pdp->page_directory[pdpe] = pd;
1100                __set_bit(pdpe, new_pds);
1101                trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
1102        }
1103
1104        return 0;
1105
1106unwind_out:
1107        for_each_set_bit(pdpe, new_pds, pdpes)
1108                free_pd(dev, pdp->page_directory[pdpe]);
1109
1110        return -ENOMEM;
1111}
1112
1113/**
1114 * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
1115 * @vm: Master vm structure.
1116 * @pml4:       Page map level 4 for this address range.
1117 * @start:      Starting virtual address to begin allocations.
1118 * @length:     Size of the allocations.
1119 * @new_pdps:   Bitmap set by function with new allocations. Likely used by the
1120 *              caller to free on error.
1121 *
1122 * Allocate the required number of page directory pointers. Extremely similar to
1123 * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs().
1124 * The main difference is here we are limited by the pml4 boundary (instead of
1125 * the page directory pointer).
1126 *
1127 * Return: 0 if success; negative error code otherwise.
1128 */
1129static int
1130gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
1131                                  struct i915_pml4 *pml4,
1132                                  uint64_t start,
1133                                  uint64_t length,
1134                                  unsigned long *new_pdps)
1135{
1136        struct drm_device *dev = vm->dev;
1137        struct i915_page_directory_pointer *pdp;
1138        uint32_t pml4e;
1139
1140        WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
1141
1142        gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1143                if (!test_bit(pml4e, pml4->used_pml4es)) {
1144                        pdp = alloc_pdp(dev);
1145                        if (IS_ERR(pdp))
1146                                goto unwind_out;
1147
1148                        gen8_initialize_pdp(vm, pdp);
1149                        pml4->pdps[pml4e] = pdp;
1150                        __set_bit(pml4e, new_pdps);
1151                        trace_i915_page_directory_pointer_entry_alloc(vm,
1152                                                                      pml4e,
1153                                                                      start,
1154                                                                      GEN8_PML4E_SHIFT);
1155                }
1156        }
1157
1158        return 0;
1159
1160unwind_out:
1161        for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1162                free_pdp(dev, pml4->pdps[pml4e]);
1163
1164        return -ENOMEM;
1165}
1166
1167static void
1168free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
1169{
1170        kfree(new_pts);
1171        kfree(new_pds);
1172}
1173
1174/* Fills in the page directory bitmap, and the array of page tables bitmap. Both
1175 * of these are based on the number of PDPEs in the system.
1176 */
1177static
1178int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
1179                                         unsigned long **new_pts,
1180                                         uint32_t pdpes)
1181{
1182        unsigned long *pds;
1183        unsigned long *pts;
1184
1185        pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
1186        if (!pds)
1187                return -ENOMEM;
1188
1189        pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long),
1190                      GFP_TEMPORARY);
1191        if (!pts)
1192                goto err_out;
1193
1194        *new_pds = pds;
1195        *new_pts = pts;
1196
1197        return 0;
1198
1199err_out:
1200        free_gen8_temp_bitmaps(pds, pts);
1201        return -ENOMEM;
1202}
1203
1204/* PDE TLBs are a pain to invalidate on GEN8+. When we modify
1205 * the page table structures, we mark them dirty so that
1206 * context switching/execlist queuing code takes extra steps
1207 * to ensure that tlbs are flushed.
1208 */
1209static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
1210{
1211        ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
1212}
1213
1214static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
1215                                    struct i915_page_directory_pointer *pdp,
1216                                    uint64_t start,
1217                                    uint64_t length)
1218{
1219        struct i915_hw_ppgtt *ppgtt =
1220                container_of(vm, struct i915_hw_ppgtt, base);
1221        unsigned long *new_page_dirs, *new_page_tables;
1222        struct drm_device *dev = vm->dev;
1223        struct i915_page_directory *pd;
1224        const uint64_t orig_start = start;
1225        const uint64_t orig_length = length;
1226        uint32_t pdpe;
1227        uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1228        int ret;
1229
1230        /* Wrap is never okay since we can only represent 48b, and we don't
1231         * actually use the other side of the canonical address space.
1232         */
1233        if (WARN_ON(start + length < start))
1234                return -ENODEV;
1235
1236        if (WARN_ON(start + length > vm->total))
1237                return -ENODEV;
1238
1239        ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1240        if (ret)
1241                return ret;
1242
1243        /* Do the allocations first so we can easily bail out */
1244        ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
1245                                                new_page_dirs);
1246        if (ret) {
1247                free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1248                return ret;
1249        }
1250
1251        /* For every page directory referenced, allocate page tables */
1252        gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1253                ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
1254                                                new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
1255                if (ret)
1256                        goto err_out;
1257        }
1258
1259        start = orig_start;
1260        length = orig_length;
1261
1262        /* Allocations have completed successfully, so set the bitmaps, and do
1263         * the mappings. */
1264        gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1265                gen8_pde_t *const page_directory = kmap_px(pd);
1266                struct i915_page_table *pt;
1267                uint64_t pd_len = length;
1268                uint64_t pd_start = start;
1269                uint32_t pde;
1270
1271                /* Every pd should be allocated, we just did that above. */
1272                WARN_ON(!pd);
1273
1274                gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1275                        /* Same reasoning as pd */
1276                        WARN_ON(!pt);
1277                        WARN_ON(!pd_len);
1278                        WARN_ON(!gen8_pte_count(pd_start, pd_len));
1279
1280                        /* Set our used ptes within the page table */
1281                        bitmap_set(pt->used_ptes,
1282                                   gen8_pte_index(pd_start),
1283                                   gen8_pte_count(pd_start, pd_len));
1284
1285                        /* Our pde is now pointing to the pagetable, pt */
1286                        __set_bit(pde, pd->used_pdes);
1287
1288                        /* Map the PDE to the page table */
1289                        page_directory[pde] = gen8_pde_encode(px_dma(pt),
1290                                                              I915_CACHE_LLC);
1291                        trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
1292                                                        gen8_pte_index(start),
1293                                                        gen8_pte_count(start, length),
1294                                                        GEN8_PTES);
1295
1296                        /* NB: We haven't yet mapped ptes to pages. At this
1297                         * point we're still relying on insert_entries() */
1298                }
1299
1300                kunmap_px(ppgtt, page_directory);
1301                __set_bit(pdpe, pdp->used_pdpes);
1302                gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
1303        }
1304
1305        free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1306        mark_tlbs_dirty(ppgtt);
1307        return 0;
1308
1309err_out:
1310        while (pdpe--) {
1311                unsigned long temp;
1312
1313                for_each_set_bit(temp, new_page_tables + pdpe *
1314                                BITS_TO_LONGS(I915_PDES), I915_PDES)
1315                        free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]);
1316        }
1317
1318        for_each_set_bit(pdpe, new_page_dirs, pdpes)
1319                free_pd(dev, pdp->page_directory[pdpe]);
1320
1321        free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1322        mark_tlbs_dirty(ppgtt);
1323        return ret;
1324}
1325
1326static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
1327                                    struct i915_pml4 *pml4,
1328                                    uint64_t start,
1329                                    uint64_t length)
1330{
1331        DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
1332        struct i915_hw_ppgtt *ppgtt =
1333                        container_of(vm, struct i915_hw_ppgtt, base);
1334        struct i915_page_directory_pointer *pdp;
1335        uint64_t pml4e;
1336        int ret = 0;
1337
1338        /* Do the pml4 allocations first, so we don't need to track the newly
1339         * allocated tables below the pdp */
1340        bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
1341
1342        /* The pagedirectory and pagetable allocations are done in the shared 3
1343         * and 4 level code. Just allocate the pdps.
1344         */
1345        ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length,
1346                                                new_pdps);
1347        if (ret)
1348                return ret;
1349
1350        WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
1351             "The allocation has spanned more than 512GB. "
1352             "It is highly likely this is incorrect.");
1353
1354        gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1355                WARN_ON(!pdp);
1356
1357                ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
1358                if (ret)
1359                        goto err_out;
1360
1361                gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e);
1362        }
1363
1364        bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
1365                  GEN8_PML4ES_PER_PML4);
1366
1367        return 0;
1368
1369err_out:
1370        for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1371                gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]);
1372
1373        return ret;
1374}
1375
1376static int gen8_alloc_va_range(struct i915_address_space *vm,
1377                               uint64_t start, uint64_t length)
1378{
1379        struct i915_hw_ppgtt *ppgtt =
1380                container_of(vm, struct i915_hw_ppgtt, base);
1381
1382        if (USES_FULL_48BIT_PPGTT(vm->dev))
1383                return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
1384        else
1385                return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
1386}
1387
1388static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
1389                          uint64_t start, uint64_t length,
1390                          gen8_pte_t scratch_pte,
1391                          struct seq_file *m)
1392{
1393        struct i915_page_directory *pd;
1394        uint32_t pdpe;
1395
1396        gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1397                struct i915_page_table *pt;
1398                uint64_t pd_len = length;
1399                uint64_t pd_start = start;
1400                uint32_t pde;
1401
1402                if (!test_bit(pdpe, pdp->used_pdpes))
1403                        continue;
1404
1405                seq_printf(m, "\tPDPE #%d\n", pdpe);
1406                gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1407                        uint32_t  pte;
1408                        gen8_pte_t *pt_vaddr;
1409
1410                        if (!test_bit(pde, pd->used_pdes))
1411                                continue;
1412
1413                        pt_vaddr = kmap_px(pt);
1414                        for (pte = 0; pte < GEN8_PTES; pte += 4) {
1415                                uint64_t va =
1416                                        (pdpe << GEN8_PDPE_SHIFT) |
1417                                        (pde << GEN8_PDE_SHIFT) |
1418                                        (pte << GEN8_PTE_SHIFT);
1419                                int i;
1420                                bool found = false;
1421
1422                                for (i = 0; i < 4; i++)
1423                                        if (pt_vaddr[pte + i] != scratch_pte)
1424                                                found = true;
1425                                if (!found)
1426                                        continue;
1427
1428                                seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
1429                                for (i = 0; i < 4; i++) {
1430                                        if (pt_vaddr[pte + i] != scratch_pte)
1431                                                seq_printf(m, " %llx", pt_vaddr[pte + i]);
1432                                        else
1433                                                seq_puts(m, "  SCRATCH ");
1434                                }
1435                                seq_puts(m, "\n");
1436                        }
1437                        /* don't use kunmap_px, it could trigger
1438                         * an unnecessary flush.
1439                         */
1440                        kunmap_atomic(pt_vaddr);
1441                }
1442        }
1443}
1444
1445static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1446{
1447        struct i915_address_space *vm = &ppgtt->base;
1448        uint64_t start = ppgtt->base.start;
1449        uint64_t length = ppgtt->base.total;
1450        gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
1451                                                 I915_CACHE_LLC, true);
1452
1453        if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
1454                gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
1455        } else {
1456                uint64_t pml4e;
1457                struct i915_pml4 *pml4 = &ppgtt->pml4;
1458                struct i915_page_directory_pointer *pdp;
1459
1460                gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1461                        if (!test_bit(pml4e, pml4->used_pml4es))
1462                                continue;
1463
1464                        seq_printf(m, "    PML4E #%llu\n", pml4e);
1465                        gen8_dump_pdp(pdp, start, length, scratch_pte, m);
1466                }
1467        }
1468}
1469
1470static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
1471{
1472        unsigned long *new_page_dirs, *new_page_tables;
1473        uint32_t pdpes = I915_PDPES_PER_PDP(dev);
1474        int ret;
1475
1476        /* We allocate temp bitmap for page tables for no gain
1477         * but as this is for init only, lets keep the things simple
1478         */
1479        ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1480        if (ret)
1481                return ret;
1482
1483        /* Allocate for all pdps regardless of how the ppgtt
1484         * was defined.
1485         */
1486        ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp,
1487                                                0, 1ULL << 32,
1488                                                new_page_dirs);
1489        if (!ret)
1490                *ppgtt->pdp.used_pdpes = *new_page_dirs;
1491
1492        free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1493
1494        return ret;
1495}
1496
1497/*
1498 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1499 * with a net effect resembling a 2-level page table in normal x86 terms. Each
1500 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1501 * space.
1502 *
1503 */
1504static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1505{
1506        int ret;
1507
1508        ret = gen8_init_scratch(&ppgtt->base);
1509        if (ret)
1510                return ret;
1511
1512        ppgtt->base.start = 0;
1513        ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1514        ppgtt->base.allocate_va_range = gen8_alloc_va_range;
1515        ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
1516        ppgtt->base.clear_range = gen8_ppgtt_clear_range;
1517        ppgtt->base.unbind_vma = ppgtt_unbind_vma;
1518        ppgtt->base.bind_vma = ppgtt_bind_vma;
1519        ppgtt->debug_dump = gen8_dump_ppgtt;
1520
1521        if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
1522                ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
1523                if (ret)
1524                        goto free_scratch;
1525
1526                gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);
1527
1528                ppgtt->base.total = 1ULL << 48;
1529                ppgtt->switch_mm = gen8_48b_mm_switch;
1530        } else {
1531                ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp);
1532                if (ret)
1533                        goto free_scratch;
1534
1535                ppgtt->base.total = 1ULL << 32;
1536                ppgtt->switch_mm = gen8_legacy_mm_switch;
1537                trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
1538                                                              0, 0,
1539                                                              GEN8_PML4E_SHIFT);
1540
1541                if (intel_vgpu_active(ppgtt->base.dev)) {
1542                        ret = gen8_preallocate_top_level_pdps(ppgtt);
1543                        if (ret)
1544                                goto free_scratch;
1545                }
1546        }
1547
1548        if (intel_vgpu_active(ppgtt->base.dev))
1549                gen8_ppgtt_notify_vgt(ppgtt, true);
1550
1551        return 0;
1552
1553free_scratch:
1554        gen8_free_scratch(&ppgtt->base);
1555        return ret;
1556}
1557
1558static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1559{
1560        struct i915_address_space *vm = &ppgtt->base;
1561        struct i915_page_table *unused;
1562        gen6_pte_t scratch_pte;
1563        uint32_t pd_entry;
1564        uint32_t  pte, pde, temp;
1565        uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
1566
1567        scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1568                                     I915_CACHE_LLC, true, 0);
1569
1570        gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) {
1571                u32 expected;
1572                gen6_pte_t *pt_vaddr;
1573                const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
1574                pd_entry = readl(ppgtt->pd_addr + pde);
1575                expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
1576
1577                if (pd_entry != expected)
1578                        seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1579                                   pde,
1580                                   pd_entry,
1581                                   expected);
1582                seq_printf(m, "\tPDE: %x\n", pd_entry);
1583
1584                pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]);
1585
1586                for (pte = 0; pte < GEN6_PTES; pte+=4) {
1587                        unsigned long va =
1588                                (pde * PAGE_SIZE * GEN6_PTES) +
1589                                (pte * PAGE_SIZE);
1590                        int i;
1591                        bool found = false;
1592                        for (i = 0; i < 4; i++)
1593                                if (pt_vaddr[pte + i] != scratch_pte)
1594                                        found = true;
1595                        if (!found)
1596                                continue;
1597
1598                        seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1599                        for (i = 0; i < 4; i++) {
1600                                if (pt_vaddr[pte + i] != scratch_pte)
1601                                        seq_printf(m, " %08x", pt_vaddr[pte + i]);
1602                                else
1603                                        seq_puts(m, "  SCRATCH ");
1604                        }
1605                        seq_puts(m, "\n");
1606                }
1607                kunmap_px(ppgtt, pt_vaddr);
1608        }
1609}
1610
1611/* Write pde (index) from the page directory @pd to the page table @pt */
1612static void gen6_write_pde(struct i915_page_directory *pd,
1613                            const int pde, struct i915_page_table *pt)
1614{
1615        /* Caller needs to make sure the write completes if necessary */
1616        struct i915_hw_ppgtt *ppgtt =
1617                container_of(pd, struct i915_hw_ppgtt, pd);
1618        u32 pd_entry;
1619
1620        pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt));
1621        pd_entry |= GEN6_PDE_VALID;
1622
1623        writel(pd_entry, ppgtt->pd_addr + pde);
1624}
1625
1626/* Write all the page tables found in the ppgtt structure to incrementing page
1627 * directories. */
1628static void gen6_write_page_range(struct drm_i915_private *dev_priv,
1629                                  struct i915_page_directory *pd,
1630                                  uint32_t start, uint32_t length)
1631{
1632        struct i915_page_table *pt;
1633        uint32_t pde, temp;
1634
1635        gen6_for_each_pde(pt, pd, start, length, temp, pde)
1636                gen6_write_pde(pd, pde, pt);
1637
1638        /* Make sure write is complete before other code can use this page
1639         * table. Also require for WC mapped PTEs */
1640        readl(dev_priv->gtt.gsm);
1641}
1642
1643static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
1644{
1645        BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
1646
1647        return (ppgtt->pd.base.ggtt_offset / 64) << 16;
1648}
1649
1650static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1651                         struct drm_i915_gem_request *req)
1652{
1653        struct intel_engine_cs *ring = req->ring;
1654        int ret;
1655
1656        /* NB: TLBs must be flushed and invalidated before a switch */
1657        ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1658        if (ret)
1659                return ret;
1660
1661        ret = intel_ring_begin(req, 6);
1662        if (ret)
1663                return ret;
1664
1665        intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1666        intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(ring));
1667        intel_ring_emit(ring, PP_DIR_DCLV_2G);
1668        intel_ring_emit_reg(ring, RING_PP_DIR_BASE(ring));
1669        intel_ring_emit(ring, get_pd_offset(ppgtt));
1670        intel_ring_emit(ring, MI_NOOP);
1671        intel_ring_advance(ring);
1672
1673        return 0;
1674}
1675
1676static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt,
1677                          struct drm_i915_gem_request *req)
1678{
1679        struct intel_engine_cs *ring = req->ring;
1680        struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
1681
1682        I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1683        I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1684        return 0;
1685}
1686
1687static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1688                          struct drm_i915_gem_request *req)
1689{
1690        struct intel_engine_cs *ring = req->ring;
1691        int ret;
1692
1693        /* NB: TLBs must be flushed and invalidated before a switch */
1694        ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1695        if (ret)
1696                return ret;
1697
1698        ret = intel_ring_begin(req, 6);
1699        if (ret)
1700                return ret;
1701
1702        intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1703        intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(ring));
1704        intel_ring_emit(ring, PP_DIR_DCLV_2G);
1705        intel_ring_emit_reg(ring, RING_PP_DIR_BASE(ring));
1706        intel_ring_emit(ring, get_pd_offset(ppgtt));
1707        intel_ring_emit(ring, MI_NOOP);
1708        intel_ring_advance(ring);
1709
1710        /* XXX: RCS is the only one to auto invalidate the TLBs? */
1711        if (ring->id != RCS) {
1712                ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1713                if (ret)
1714                        return ret;
1715        }
1716
1717        return 0;
1718}
1719
1720static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
1721                          struct drm_i915_gem_request *req)
1722{
1723        struct intel_engine_cs *ring = req->ring;
1724        struct drm_device *dev = ppgtt->base.dev;
1725        struct drm_i915_private *dev_priv = dev->dev_private;
1726
1727
1728        I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1729        I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1730
1731        POSTING_READ(RING_PP_DIR_DCLV(ring));
1732
1733        return 0;
1734}
1735
1736static void gen8_ppgtt_enable(struct drm_device *dev)
1737{
1738        struct drm_i915_private *dev_priv = dev->dev_private;
1739        struct intel_engine_cs *ring;
1740        int j;
1741
1742        for_each_ring(ring, dev_priv, j) {
1743                u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0;
1744                I915_WRITE(RING_MODE_GEN7(ring),
1745                           _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
1746        }
1747}
1748
1749static void gen7_ppgtt_enable(struct drm_device *dev)
1750{
1751        struct drm_i915_private *dev_priv = dev->dev_private;
1752        struct intel_engine_cs *ring;
1753        uint32_t ecochk, ecobits;
1754        int i;
1755
1756        ecobits = I915_READ(GAC_ECO_BITS);
1757        I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1758
1759        ecochk = I915_READ(GAM_ECOCHK);
1760        if (IS_HASWELL(dev)) {
1761                ecochk |= ECOCHK_PPGTT_WB_HSW;
1762        } else {
1763                ecochk |= ECOCHK_PPGTT_LLC_IVB;
1764                ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1765        }
1766        I915_WRITE(GAM_ECOCHK, ecochk);
1767
1768        for_each_ring(ring, dev_priv, i) {
1769                /* GFX_MODE is per-ring on gen7+ */
1770                I915_WRITE(RING_MODE_GEN7(ring),
1771                           _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1772        }
1773}
1774
1775static void gen6_ppgtt_enable(struct drm_device *dev)
1776{
1777        struct drm_i915_private *dev_priv = dev->dev_private;
1778        uint32_t ecochk, gab_ctl, ecobits;
1779
1780        ecobits = I915_READ(GAC_ECO_BITS);
1781        I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1782                   ECOBITS_PPGTT_CACHE64B);
1783
1784        gab_ctl = I915_READ(GAB_CTL);
1785        I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1786
1787        ecochk = I915_READ(GAM_ECOCHK);
1788        I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1789
1790        I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1791}
1792
1793/* PPGTT support for Sandybdrige/Gen6 and later */
1794static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1795                                   uint64_t start,
1796                                   uint64_t length,
1797                                   bool use_scratch)
1798{
1799        struct i915_hw_ppgtt *ppgtt =
1800                container_of(vm, struct i915_hw_ppgtt, base);
1801        gen6_pte_t *pt_vaddr, scratch_pte;
1802        unsigned first_entry = start >> PAGE_SHIFT;
1803        unsigned num_entries = length >> PAGE_SHIFT;
1804        unsigned act_pt = first_entry / GEN6_PTES;
1805        unsigned first_pte = first_entry % GEN6_PTES;
1806        unsigned last_pte, i;
1807
1808        scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1809                                     I915_CACHE_LLC, true, 0);
1810
1811        while (num_entries) {
1812                last_pte = first_pte + num_entries;
1813                if (last_pte > GEN6_PTES)
1814                        last_pte = GEN6_PTES;
1815
1816                pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1817
1818                for (i = first_pte; i < last_pte; i++)
1819                        pt_vaddr[i] = scratch_pte;
1820
1821                kunmap_px(ppgtt, pt_vaddr);
1822
1823                num_entries -= last_pte - first_pte;
1824                first_pte = 0;
1825                act_pt++;
1826        }
1827}
1828
1829static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1830                                      struct sg_table *pages,
1831                                      uint64_t start,
1832                                      enum i915_cache_level cache_level, u32 flags)
1833{
1834        struct i915_hw_ppgtt *ppgtt =
1835                container_of(vm, struct i915_hw_ppgtt, base);
1836        gen6_pte_t *pt_vaddr;
1837        unsigned first_entry = start >> PAGE_SHIFT;
1838        unsigned act_pt = first_entry / GEN6_PTES;
1839        unsigned act_pte = first_entry % GEN6_PTES;
1840        struct sg_page_iter sg_iter;
1841
1842        pt_vaddr = NULL;
1843        for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
1844                if (pt_vaddr == NULL)
1845                        pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1846
1847                pt_vaddr[act_pte] =
1848                        vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
1849                                       cache_level, true, flags);
1850
1851                if (++act_pte == GEN6_PTES) {
1852                        kunmap_px(ppgtt, pt_vaddr);
1853                        pt_vaddr = NULL;
1854                        act_pt++;
1855                        act_pte = 0;
1856                }
1857        }
1858        if (pt_vaddr)
1859                kunmap_px(ppgtt, pt_vaddr);
1860}
1861
1862static int gen6_alloc_va_range(struct i915_address_space *vm,
1863                               uint64_t start_in, uint64_t length_in)
1864{
1865        DECLARE_BITMAP(new_page_tables, I915_PDES);
1866        struct drm_device *dev = vm->dev;
1867        struct drm_i915_private *dev_priv = dev->dev_private;
1868        struct i915_hw_ppgtt *ppgtt =
1869                                container_of(vm, struct i915_hw_ppgtt, base);
1870        struct i915_page_table *pt;
1871        uint32_t start, length, start_save, length_save;
1872        uint32_t pde, temp;
1873        int ret;
1874
1875        if (WARN_ON(start_in + length_in > ppgtt->base.total))
1876                return -ENODEV;
1877
1878        start = start_save = start_in;
1879        length = length_save = length_in;
1880
1881        bitmap_zero(new_page_tables, I915_PDES);
1882
1883        /* The allocation is done in two stages so that we can bail out with
1884         * minimal amount of pain. The first stage finds new page tables that
1885         * need allocation. The second stage marks use ptes within the page
1886         * tables.
1887         */
1888        gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
1889                if (pt != vm->scratch_pt) {
1890                        WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
1891                        continue;
1892                }
1893
1894                /* We've already allocated a page table */
1895                WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
1896
1897                pt = alloc_pt(dev);
1898                if (IS_ERR(pt)) {
1899                        ret = PTR_ERR(pt);
1900                        goto unwind_out;
1901                }
1902
1903                gen6_initialize_pt(vm, pt);
1904
1905                ppgtt->pd.page_table[pde] = pt;
1906                __set_bit(pde, new_page_tables);
1907                trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
1908        }
1909
1910        start = start_save;
1911        length = length_save;
1912
1913        gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
1914                DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);
1915
1916                bitmap_zero(tmp_bitmap, GEN6_PTES);
1917                bitmap_set(tmp_bitmap, gen6_pte_index(start),
1918                           gen6_pte_count(start, length));
1919
1920                if (__test_and_clear_bit(pde, new_page_tables))
1921                        gen6_write_pde(&ppgtt->pd, pde, pt);
1922
1923                trace_i915_page_table_entry_map(vm, pde, pt,
1924                                         gen6_pte_index(start),
1925                                         gen6_pte_count(start, length),
1926                                         GEN6_PTES);
1927                bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
1928                                GEN6_PTES);
1929        }
1930
1931        WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));
1932
1933        /* Make sure write is complete before other code can use this page
1934         * table. Also require for WC mapped PTEs */
1935        readl(dev_priv->gtt.gsm);
1936
1937        mark_tlbs_dirty(ppgtt);
1938        return 0;
1939
1940unwind_out:
1941        for_each_set_bit(pde, new_page_tables, I915_PDES) {
1942                struct i915_page_table *pt = ppgtt->pd.page_table[pde];
1943
1944                ppgtt->pd.page_table[pde] = vm->scratch_pt;
1945                free_pt(vm->dev, pt);
1946        }
1947
1948        mark_tlbs_dirty(ppgtt);
1949        return ret;
1950}
1951
1952static int gen6_init_scratch(struct i915_address_space *vm)
1953{
1954        struct drm_device *dev = vm->dev;
1955
1956        vm->scratch_page = alloc_scratch_page(dev);
1957        if (IS_ERR(vm->scratch_page))
1958                return PTR_ERR(vm->scratch_page);
1959
1960        vm->scratch_pt = alloc_pt(dev);
1961        if (IS_ERR(vm->scratch_pt)) {
1962                free_scratch_page(dev, vm->scratch_page);
1963                return PTR_ERR(vm->scratch_pt);
1964        }
1965
1966        gen6_initialize_pt(vm, vm->scratch_pt);
1967
1968        return 0;
1969}
1970
1971static void gen6_free_scratch(struct i915_address_space *vm)
1972{
1973        struct drm_device *dev = vm->dev;
1974
1975        free_pt(dev, vm->scratch_pt);
1976        free_scratch_page(dev, vm->scratch_page);
1977}
1978
1979static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
1980{
1981        struct i915_hw_ppgtt *ppgtt =
1982                container_of(vm, struct i915_hw_ppgtt, base);
1983        struct i915_page_table *pt;
1984        uint32_t pde;
1985
1986        drm_mm_remove_node(&ppgtt->node);
1987
1988        gen6_for_all_pdes(pt, ppgtt, pde) {
1989                if (pt != vm->scratch_pt)
1990                        free_pt(ppgtt->base.dev, pt);
1991        }
1992
1993        gen6_free_scratch(vm);
1994}
1995
1996static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
1997{
1998        struct i915_address_space *vm = &ppgtt->base;
1999        struct drm_device *dev = ppgtt->base.dev;
2000        struct drm_i915_private *dev_priv = dev->dev_private;
2001        bool retried = false;
2002        int ret;
2003
2004        /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
2005         * allocator works in address space sizes, so it's multiplied by page
2006         * size. We allocate at the top of the GTT to avoid fragmentation.
2007         */
2008        BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm));
2009
2010        ret = gen6_init_scratch(vm);
2011        if (ret)
2012                return ret;
2013
2014alloc:
2015        ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
2016                                                  &ppgtt->node, GEN6_PD_SIZE,
2017                                                  GEN6_PD_ALIGN, 0,
2018                                                  0, dev_priv->gtt.base.total,
2019                                                  DRM_MM_TOPDOWN);
2020        if (ret == -ENOSPC && !retried) {
2021                ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
2022                                               GEN6_PD_SIZE, GEN6_PD_ALIGN,
2023                                               I915_CACHE_NONE,
2024                                               0, dev_priv->gtt.base.total,
2025                                               0);
2026                if (ret)
2027                        goto err_out;
2028
2029                retried = true;
2030                goto alloc;
2031        }
2032
2033        if (ret)
2034                goto err_out;
2035
2036
2037        if (ppgtt->node.start < dev_priv->gtt.mappable_end)
2038                DRM_DEBUG("Forced to use aperture for PDEs\n");
2039
2040        return 0;
2041
2042err_out:
2043        gen6_free_scratch(vm);
2044        return ret;
2045}
2046
2047static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
2048{
2049        return gen6_ppgtt_allocate_page_directories(ppgtt);
2050}
2051
2052static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
2053                                  uint64_t start, uint64_t length)
2054{
2055        struct i915_page_table *unused;
2056        uint32_t pde, temp;
2057
2058        gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde)
2059                ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
2060}
2061
2062static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
2063{
2064        struct drm_device *dev = ppgtt->base.dev;
2065        struct drm_i915_private *dev_priv = dev->dev_private;
2066        int ret;
2067
2068        ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
2069        if (IS_GEN6(dev)) {
2070                ppgtt->switch_mm = gen6_mm_switch;
2071        } else if (IS_HASWELL(dev)) {
2072                ppgtt->switch_mm = hsw_mm_switch;
2073        } else if (IS_GEN7(dev)) {
2074                ppgtt->switch_mm = gen7_mm_switch;
2075        } else
2076                BUG();
2077
2078        if (intel_vgpu_active(dev))
2079                ppgtt->switch_mm = vgpu_mm_switch;
2080
2081        ret = gen6_ppgtt_alloc(ppgtt);
2082        if (ret)
2083                return ret;
2084
2085        ppgtt->base.allocate_va_range = gen6_alloc_va_range;
2086        ppgtt->base.clear_range = gen6_ppgtt_clear_range;
2087        ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
2088        ppgtt->base.unbind_vma = ppgtt_unbind_vma;
2089        ppgtt->base.bind_vma = ppgtt_bind_vma;
2090        ppgtt->base.cleanup = gen6_ppgtt_cleanup;
2091        ppgtt->base.start = 0;
2092        ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
2093        ppgtt->debug_dump = gen6_dump_ppgtt;
2094
2095        ppgtt->pd.base.ggtt_offset =
2096                ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
2097
2098        ppgtt->pd_addr = (gen6_pte_t __iomem *)dev_priv->gtt.gsm +
2099                ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
2100
2101        gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
2102
2103        gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
2104
2105        DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
2106                         ppgtt->node.size >> 20,
2107                         ppgtt->node.start / PAGE_SIZE);
2108
2109        DRM_DEBUG("Adding PPGTT at offset %x\n",
2110                  ppgtt->pd.base.ggtt_offset << 10);
2111
2112        return 0;
2113}
2114
2115static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
2116{
2117        ppgtt->base.dev = dev;
2118
2119        if (INTEL_INFO(dev)->gen < 8)
2120                return gen6_ppgtt_init(ppgtt);
2121        else
2122                return gen8_ppgtt_init(ppgtt);
2123}
2124
2125static void i915_address_space_init(struct i915_address_space *vm,
2126                                    struct drm_i915_private *dev_priv)
2127{
2128        drm_mm_init(&vm->mm, vm->start, vm->total);
2129        vm->dev = dev_priv->dev;
2130        INIT_LIST_HEAD(&vm->active_list);
2131        INIT_LIST_HEAD(&vm->inactive_list);
2132        list_add_tail(&vm->global_link, &dev_priv->vm_list);
2133}
2134
2135static void gtt_write_workarounds(struct drm_device *dev)
2136{
2137        struct drm_i915_private *dev_priv = dev->dev_private;
2138
2139        /* This function is for gtt related workarounds. This function is
2140         * called on driver load and after a GPU reset, so you can place
2141         * workarounds here even if they get overwritten by GPU reset.
2142         */
2143        /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */
2144        if (IS_BROADWELL(dev))
2145                I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
2146        else if (IS_CHERRYVIEW(dev))
2147                I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
2148        else if (IS_SKYLAKE(dev))
2149                I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
2150        else if (IS_BROXTON(dev))
2151                I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
2152}
2153
2154int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
2155{
2156        struct drm_i915_private *dev_priv = dev->dev_private;
2157        int ret = 0;
2158
2159        ret = __hw_ppgtt_init(dev, ppgtt);
2160        if (ret == 0) {
2161                kref_init(&ppgtt->ref);
2162                i915_address_space_init(&ppgtt->base, dev_priv);
2163        }
2164
2165        return ret;
2166}
2167
2168int i915_ppgtt_init_hw(struct drm_device *dev)
2169{
2170        gtt_write_workarounds(dev);
2171
2172        /* In the case of execlists, PPGTT is enabled by the context descriptor
2173         * and the PDPs are contained within the context itself.  We don't
2174         * need to do anything here. */
2175        if (i915.enable_execlists)
2176                return 0;
2177
2178        if (!USES_PPGTT(dev))
2179                return 0;
2180
2181        if (IS_GEN6(dev))
2182                gen6_ppgtt_enable(dev);
2183        else if (IS_GEN7(dev))
2184                gen7_ppgtt_enable(dev);
2185        else if (INTEL_INFO(dev)->gen >= 8)
2186                gen8_ppgtt_enable(dev);
2187        else
2188                MISSING_CASE(INTEL_INFO(dev)->gen);
2189
2190        return 0;
2191}
2192
2193int i915_ppgtt_init_ring(struct drm_i915_gem_request *req)
2194{
2195        struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
2196        struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2197
2198        if (i915.enable_execlists)
2199                return 0;
2200
2201        if (!ppgtt)
2202                return 0;
2203
2204        return ppgtt->switch_mm(ppgtt, req);
2205}
2206
2207struct i915_hw_ppgtt *
2208i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
2209{
2210        struct i915_hw_ppgtt *ppgtt;
2211        int ret;
2212
2213        ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2214        if (!ppgtt)
2215                return ERR_PTR(-ENOMEM);
2216
2217        ret = i915_ppgtt_init(dev, ppgtt);
2218        if (ret) {
2219                kfree(ppgtt);
2220                return ERR_PTR(ret);
2221        }
2222
2223        ppgtt->file_priv = fpriv;
2224
2225        trace_i915_ppgtt_create(&ppgtt->base);
2226
2227        return ppgtt;
2228}
2229
2230void  i915_ppgtt_release(struct kref *kref)
2231{
2232        struct i915_hw_ppgtt *ppgtt =
2233                container_of(kref, struct i915_hw_ppgtt, ref);
2234
2235        trace_i915_ppgtt_release(&ppgtt->base);
2236
2237        /* vmas should already be unbound */
2238        WARN_ON(!list_empty(&ppgtt->base.active_list));
2239        WARN_ON(!list_empty(&ppgtt->base.inactive_list));
2240
2241        list_del(&ppgtt->base.global_link);
2242        drm_mm_takedown(&ppgtt->base.mm);
2243
2244        ppgtt->base.cleanup(&ppgtt->base);
2245        kfree(ppgtt);
2246}
2247
2248extern int intel_iommu_gfx_mapped;
2249/* Certain Gen5 chipsets require require idling the GPU before
2250 * unmapping anything from the GTT when VT-d is enabled.
2251 */
2252static bool needs_idle_maps(struct drm_device *dev)
2253{
2254#ifdef CONFIG_INTEL_IOMMU
2255        /* Query intel_iommu to see if we need the workaround. Presumably that
2256         * was loaded first.
2257         */
2258        if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
2259                return true;
2260#endif
2261        return false;
2262}
2263
2264static bool do_idling(struct drm_i915_private *dev_priv)
2265{
2266        bool ret = dev_priv->mm.interruptible;
2267
2268        if (unlikely(dev_priv->gtt.do_idle_maps)) {
2269                dev_priv->mm.interruptible = false;
2270                if (i915_gpu_idle(dev_priv->dev)) {
2271                        DRM_ERROR("Couldn't idle GPU\n");
2272                        /* Wait a bit, in hopes it avoids the hang */
2273                        udelay(10);
2274                }
2275        }
2276
2277        return ret;
2278}
2279
2280static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
2281{
2282        if (unlikely(dev_priv->gtt.do_idle_maps))
2283                dev_priv->mm.interruptible = interruptible;
2284}
2285
2286void i915_check_and_clear_faults(struct drm_device *dev)
2287{
2288        struct drm_i915_private *dev_priv = dev->dev_private;
2289        struct intel_engine_cs *ring;
2290        int i;
2291
2292        if (INTEL_INFO(dev)->gen < 6)
2293                return;
2294
2295        for_each_ring(ring, dev_priv, i) {
2296                u32 fault_reg;
2297                fault_reg = I915_READ(RING_FAULT_REG(ring));
2298                if (fault_reg & RING_FAULT_VALID) {
2299                        DRM_DEBUG_DRIVER("Unexpected fault\n"
2300                                         "\tAddr: 0x%08lx\n"
2301                                         "\tAddress space: %s\n"
2302                                         "\tSource ID: %d\n"
2303                                         "\tType: %d\n",
2304                                         fault_reg & PAGE_MASK,
2305                                         fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
2306                                         RING_FAULT_SRCID(fault_reg),
2307                                         RING_FAULT_FAULT_TYPE(fault_reg));
2308                        I915_WRITE(RING_FAULT_REG(ring),
2309                                   fault_reg & ~RING_FAULT_VALID);
2310                }
2311        }
2312        POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
2313}
2314
2315static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
2316{
2317        if (INTEL_INFO(dev_priv->dev)->gen < 6) {
2318                intel_gtt_chipset_flush();
2319        } else {
2320                I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2321                POSTING_READ(GFX_FLSH_CNTL_GEN6);
2322        }
2323}
2324
2325void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
2326{
2327        struct drm_i915_private *dev_priv = dev->dev_private;
2328
2329        /* Don't bother messing with faults pre GEN6 as we have little
2330         * documentation supporting that it's a good idea.
2331         */
2332        if (INTEL_INFO(dev)->gen < 6)
2333                return;
2334
2335        i915_check_and_clear_faults(dev);
2336
2337        dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
2338                                       dev_priv->gtt.base.start,
2339                                       dev_priv->gtt.base.total,
2340                                       true);
2341
2342        i915_ggtt_flush(dev_priv);
2343}
2344
2345int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
2346{
2347        if (!dma_map_sg(&obj->base.dev->pdev->dev,
2348                        obj->pages->sgl, obj->pages->nents,
2349                        PCI_DMA_BIDIRECTIONAL))
2350                return -ENOSPC;
2351
2352        return 0;
2353}
2354
2355static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
2356{
2357#ifdef writeq
2358        writeq(pte, addr);
2359#else
2360        iowrite32((u32)pte, addr);
2361        iowrite32(pte >> 32, addr + 4);
2362#endif
2363}
2364
2365static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2366                                     struct sg_table *st,
2367                                     uint64_t start,
2368                                     enum i915_cache_level level, u32 unused)
2369{
2370        struct drm_i915_private *dev_priv = vm->dev->dev_private;
2371        unsigned first_entry = start >> PAGE_SHIFT;
2372        gen8_pte_t __iomem *gtt_entries =
2373                (gen8_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
2374        int i = 0;
2375        struct sg_page_iter sg_iter;
2376        dma_addr_t addr = 0; /* shut up gcc */
2377        int rpm_atomic_seq;
2378
2379        rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2380
2381        for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2382                addr = sg_dma_address(sg_iter.sg) +
2383                        (sg_iter.sg_pgoffset << PAGE_SHIFT);
2384                gen8_set_pte(&gtt_entries[i],
2385                             gen8_pte_encode(addr, level, true));
2386                i++;
2387        }
2388
2389        /*
2390         * XXX: This serves as a posting read to make sure that the PTE has
2391         * actually been updated. There is some concern that even though
2392         * registers and PTEs are within the same BAR that they are potentially
2393         * of NUMA access patterns. Therefore, even with the way we assume
2394         * hardware should work, we must keep this posting read for paranoia.
2395         */
2396        if (i != 0)
2397                WARN_ON(readq(&gtt_entries[i-1])
2398                        != gen8_pte_encode(addr, level, true));
2399
2400        /* This next bit makes the above posting read even more important. We
2401         * want to flush the TLBs only after we're certain all the PTE updates
2402         * have finished.
2403         */
2404        I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2405        POSTING_READ(GFX_FLSH_CNTL_GEN6);
2406
2407        assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2408}
2409
2410struct insert_entries {
2411        struct i915_address_space *vm;
2412        struct sg_table *st;
2413        uint64_t start;
2414        enum i915_cache_level level;
2415        u32 flags;
2416};
2417
2418static int gen8_ggtt_insert_entries__cb(void *_arg)
2419{
2420        struct insert_entries *arg = _arg;
2421        gen8_ggtt_insert_entries(arg->vm, arg->st,
2422                                 arg->start, arg->level, arg->flags);
2423        return 0;
2424}
2425
2426static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm,
2427                                          struct sg_table *st,
2428                                          uint64_t start,
2429                                          enum i915_cache_level level,
2430                                          u32 flags)
2431{
2432        struct insert_entries arg = { vm, st, start, level, flags };
2433        stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL);
2434}
2435
2436/*
2437 * Binds an object into the global gtt with the specified cache level. The object
2438 * will be accessible to the GPU via commands whose operands reference offsets
2439 * within the global GTT as well as accessible by the GPU through the GMADR
2440 * mapped BAR (dev_priv->mm.gtt->gtt).
2441 */
2442static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2443                                     struct sg_table *st,
2444                                     uint64_t start,
2445                                     enum i915_cache_level level, u32 flags)
2446{
2447        struct drm_i915_private *dev_priv = vm->dev->dev_private;
2448        unsigned first_entry = start >> PAGE_SHIFT;
2449        gen6_pte_t __iomem *gtt_entries =
2450                (gen6_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
2451        int i = 0;
2452        struct sg_page_iter sg_iter;
2453        dma_addr_t addr = 0;
2454        int rpm_atomic_seq;
2455
2456        rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2457
2458        for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2459                addr = sg_page_iter_dma_address(&sg_iter);
2460                iowrite32(vm->pte_encode(addr, level, true, flags), &gtt_entries[i]);
2461                i++;
2462        }
2463
2464        /* XXX: This serves as a posting read to make sure that the PTE has
2465         * actually been updated. There is some concern that even though
2466         * registers and PTEs are within the same BAR that they are potentially
2467         * of NUMA access patterns. Therefore, even with the way we assume
2468         * hardware should work, we must keep this posting read for paranoia.
2469         */
2470        if (i != 0) {
2471                unsigned long gtt = readl(&gtt_entries[i-1]);
2472                WARN_ON(gtt != vm->pte_encode(addr, level, true, flags));
2473        }
2474
2475        /* This next bit makes the above posting read even more important. We
2476         * want to flush the TLBs only after we're certain all the PTE updates
2477         * have finished.
2478         */
2479        I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2480        POSTING_READ(GFX_FLSH_CNTL_GEN6);
2481
2482        assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2483}
2484
2485static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2486                                  uint64_t start,
2487                                  uint64_t length,
2488                                  bool use_scratch)
2489{
2490        struct drm_i915_private *dev_priv = vm->dev->dev_private;
2491        unsigned first_entry = start >> PAGE_SHIFT;
2492        unsigned num_entries = length >> PAGE_SHIFT;
2493        gen8_pte_t scratch_pte, __iomem *gtt_base =
2494                (gen8_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
2495        const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
2496        int i;
2497        int rpm_atomic_seq;
2498
2499        rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2500
2501        if (WARN(num_entries > max_entries,
2502                 "First entry = %d; Num entries = %d (max=%d)\n",
2503                 first_entry, num_entries, max_entries))
2504                num_entries = max_entries;
2505
2506        scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
2507                                      I915_CACHE_LLC,
2508                                      use_scratch);
2509        for (i = 0; i < num_entries; i++)
2510                gen8_set_pte(&gtt_base[i], scratch_pte);
2511        readl(gtt_base);
2512
2513        assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2514}
2515
2516static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2517                                  uint64_t start,
2518                                  uint64_t length,
2519                                  bool use_scratch)
2520{
2521        struct drm_i915_private *dev_priv = vm->dev->dev_private;
2522        unsigned first_entry = start >> PAGE_SHIFT;
2523        unsigned num_entries = length >> PAGE_SHIFT;
2524        gen6_pte_t scratch_pte, __iomem *gtt_base =
2525                (gen6_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
2526        const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
2527        int i;
2528        int rpm_atomic_seq;
2529
2530        rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2531
2532        if (WARN(num_entries > max_entries,
2533                 "First entry = %d; Num entries = %d (max=%d)\n",
2534                 first_entry, num_entries, max_entries))
2535                num_entries = max_entries;
2536
2537        scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
2538                                     I915_CACHE_LLC, use_scratch, 0);
2539
2540        for (i = 0; i < num_entries; i++)
2541                iowrite32(scratch_pte, &gtt_base[i]);
2542        readl(gtt_base);
2543
2544        assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2545}
2546
2547static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2548                                     struct sg_table *pages,
2549                                     uint64_t start,
2550                                     enum i915_cache_level cache_level, u32 unused)
2551{
2552        struct drm_i915_private *dev_priv = vm->dev->dev_private;
2553        unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2554                AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2555        int rpm_atomic_seq;
2556
2557        rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2558
2559        intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
2560
2561        assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2562
2563}
2564
2565static void i915_ggtt_clear_range(struct i915_address_space *vm,
2566                                  uint64_t start,
2567                                  uint64_t length,
2568                                  bool unused)
2569{
2570        struct drm_i915_private *dev_priv = vm->dev->dev_private;
2571        unsigned first_entry = start >> PAGE_SHIFT;
2572        unsigned num_entries = length >> PAGE_SHIFT;
2573        int rpm_atomic_seq;
2574
2575        rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2576
2577        intel_gtt_clear_range(first_entry, num_entries);
2578
2579        assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2580}
2581
2582static int ggtt_bind_vma(struct i915_vma *vma,
2583                         enum i915_cache_level cache_level,
2584                         u32 flags)
2585{
2586        struct drm_i915_gem_object *obj = vma->obj;
2587        u32 pte_flags = 0;
2588        int ret;
2589
2590        ret = i915_get_ggtt_vma_pages(vma);
2591        if (ret)
2592                return ret;
2593
2594        /* Currently applicable only to VLV */
2595        if (obj->gt_ro)
2596                pte_flags |= PTE_READ_ONLY;
2597
2598        vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages,
2599                                vma->node.start,
2600                                cache_level, pte_flags);
2601
2602        /*
2603         * Without aliasing PPGTT there's no difference between
2604         * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2605         * upgrade to both bound if we bind either to avoid double-binding.
2606         */
2607        vma->bound |= GLOBAL_BIND | LOCAL_BIND;
2608
2609        return 0;
2610}
2611
2612static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2613                                 enum i915_cache_level cache_level,
2614                                 u32 flags)
2615{
2616        struct drm_device *dev = vma->vm->dev;
2617        struct drm_i915_private *dev_priv = dev->dev_private;
2618        struct drm_i915_gem_object *obj = vma->obj;
2619        struct sg_table *pages = obj->pages;
2620        u32 pte_flags = 0;
2621        int ret;
2622
2623        ret = i915_get_ggtt_vma_pages(vma);
2624        if (ret)
2625                return ret;
2626        pages = vma->ggtt_view.pages;
2627
2628        /* Currently applicable only to VLV */
2629        if (obj->gt_ro)
2630                pte_flags |= PTE_READ_ONLY;
2631
2632
2633        if (flags & GLOBAL_BIND) {
2634                vma->vm->insert_entries(vma->vm, pages,
2635                                        vma->node.start,
2636                                        cache_level, pte_flags);
2637        }
2638
2639        if (flags & LOCAL_BIND) {
2640                struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
2641                appgtt->base.insert_entries(&appgtt->base, pages,
2642                                            vma->node.start,
2643                                            cache_level, pte_flags);
2644        }
2645
2646        return 0;
2647}
2648
2649static void ggtt_unbind_vma(struct i915_vma *vma)
2650{
2651        struct drm_device *dev = vma->vm->dev;
2652        struct drm_i915_private *dev_priv = dev->dev_private;
2653        struct drm_i915_gem_object *obj = vma->obj;
2654        const uint64_t size = min_t(uint64_t,
2655                                    obj->base.size,
2656                                    vma->node.size);
2657
2658        if (vma->bound & GLOBAL_BIND) {
2659                vma->vm->clear_range(vma->vm,
2660                                     vma->node.start,
2661                                     size,
2662                                     true);
2663        }
2664
2665        if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) {
2666                struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
2667
2668                appgtt->base.clear_range(&appgtt->base,
2669                                         vma->node.start,
2670                                         size,
2671                                         true);
2672        }
2673}
2674
2675void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
2676{
2677        struct drm_device *dev = obj->base.dev;
2678        struct drm_i915_private *dev_priv = dev->dev_private;
2679        bool interruptible;
2680
2681        interruptible = do_idling(dev_priv);
2682
2683        dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents,
2684                     PCI_DMA_BIDIRECTIONAL);
2685
2686        undo_idling(dev_priv, interruptible);
2687}
2688
2689static void i915_gtt_color_adjust(struct drm_mm_node *node,
2690                                  unsigned long color,
2691                                  u64 *start,
2692                                  u64 *end)
2693{
2694        if (node->color != color)
2695                *start += 4096;
2696
2697        if (!list_empty(&node->node_list)) {
2698                node = list_entry(node->node_list.next,
2699                                  struct drm_mm_node,
2700                                  node_list);
2701                if (node->allocated && node->color != color)
2702                        *end -= 4096;
2703        }
2704}
2705
2706static int i915_gem_setup_global_gtt(struct drm_device *dev,
2707                                     u64 start,
2708                                     u64 mappable_end,
2709                                     u64 end)
2710{
2711        /* Let GEM Manage all of the aperture.
2712         *
2713         * However, leave one page at the end still bound to the scratch page.
2714         * There are a number of places where the hardware apparently prefetches
2715         * past the end of the object, and we've seen multiple hangs with the
2716         * GPU head pointer stuck in a batchbuffer bound at the last page of the
2717         * aperture.  One page should be enough to keep any prefetching inside
2718         * of the aperture.
2719         */
2720        struct drm_i915_private *dev_priv = dev->dev_private;
2721        struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
2722        struct drm_mm_node *entry;
2723        struct drm_i915_gem_object *obj;
2724        unsigned long hole_start, hole_end;
2725        int ret;
2726
2727        BUG_ON(mappable_end > end);
2728
2729        ggtt_vm->start = start;
2730
2731        /* Subtract the guard page before address space initialization to
2732         * shrink the range used by drm_mm */
2733        ggtt_vm->total = end - start - PAGE_SIZE;
2734        i915_address_space_init(ggtt_vm, dev_priv);
2735        ggtt_vm->total += PAGE_SIZE;
2736
2737        if (intel_vgpu_active(dev)) {
2738                ret = intel_vgt_balloon(dev);
2739                if (ret)
2740                        return ret;
2741        }
2742
2743        if (!HAS_LLC(dev))
2744                ggtt_vm->mm.color_adjust = i915_gtt_color_adjust;
2745
2746        /* Mark any preallocated objects as occupied */
2747        list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
2748                struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
2749
2750                DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n",
2751                              i915_gem_obj_ggtt_offset(obj), obj->base.size);
2752
2753                WARN_ON(i915_gem_obj_ggtt_bound(obj));
2754                ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
2755                if (ret) {
2756                        DRM_DEBUG_KMS("Reservation failed: %i\n", ret);
2757                        return ret;
2758                }
2759                vma->bound |= GLOBAL_BIND;
2760                __i915_vma_set_map_and_fenceable(vma);
2761                list_add_tail(&vma->vm_link, &ggtt_vm->inactive_list);
2762        }
2763
2764        /* Clear any non-preallocated blocks */
2765        drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
2766                DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2767                              hole_start, hole_end);
2768                ggtt_vm->clear_range(ggtt_vm, hole_start,
2769                                     hole_end - hole_start, true);
2770        }
2771
2772        /* And finally clear the reserved guard page */
2773        ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
2774
2775        if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) {
2776                struct i915_hw_ppgtt *ppgtt;
2777
2778                ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2779                if (!ppgtt)
2780                        return -ENOMEM;
2781
2782                ret = __hw_ppgtt_init(dev, ppgtt);
2783                if (ret) {
2784                        ppgtt->base.cleanup(&ppgtt->base);
2785                        kfree(ppgtt);
2786                        return ret;
2787                }
2788
2789                if (ppgtt->base.allocate_va_range)
2790                        ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
2791                                                            ppgtt->base.total);
2792                if (ret) {
2793                        ppgtt->base.cleanup(&ppgtt->base);
2794                        kfree(ppgtt);
2795                        return ret;
2796                }
2797
2798                ppgtt->base.clear_range(&ppgtt->base,
2799                                        ppgtt->base.start,
2800                                        ppgtt->base.total,
2801                                        true);
2802
2803                dev_priv->mm.aliasing_ppgtt = ppgtt;
2804                WARN_ON(dev_priv->gtt.base.bind_vma != ggtt_bind_vma);
2805                dev_priv->gtt.base.bind_vma = aliasing_gtt_bind_vma;
2806        }
2807
2808        return 0;
2809}
2810
2811void i915_gem_init_global_gtt(struct drm_device *dev)
2812{
2813        struct drm_i915_private *dev_priv = dev->dev_private;
2814        u64 gtt_size, mappable_size;
2815
2816        gtt_size = dev_priv->gtt.base.total;
2817        mappable_size = dev_priv->gtt.mappable_end;
2818
2819        i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
2820}
2821
2822void i915_global_gtt_cleanup(struct drm_device *dev)
2823{
2824        struct drm_i915_private *dev_priv = dev->dev_private;
2825        struct i915_address_space *vm = &dev_priv->gtt.base;
2826
2827        if (dev_priv->mm.aliasing_ppgtt) {
2828                struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2829
2830                ppgtt->base.cleanup(&ppgtt->base);
2831        }
2832
2833        i915_gem_cleanup_stolen(dev);
2834
2835        if (drm_mm_initialized(&vm->mm)) {
2836                if (intel_vgpu_active(dev))
2837                        intel_vgt_deballoon();
2838
2839                drm_mm_takedown(&vm->mm);
2840                list_del(&vm->global_link);
2841        }
2842
2843        vm->cleanup(vm);
2844}
2845
2846static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2847{
2848        snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2849        snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2850        return snb_gmch_ctl << 20;
2851}
2852
2853static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2854{
2855        bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2856        bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2857        if (bdw_gmch_ctl)
2858                bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2859
2860#ifdef CONFIG_X86_32
2861        /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
2862        if (bdw_gmch_ctl > 4)
2863                bdw_gmch_ctl = 4;
2864#endif
2865
2866        return bdw_gmch_ctl << 20;
2867}
2868
2869static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
2870{
2871        gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
2872        gmch_ctrl &= SNB_GMCH_GGMS_MASK;
2873
2874        if (gmch_ctrl)
2875                return 1 << (20 + gmch_ctrl);
2876
2877        return 0;
2878}
2879
2880static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
2881{
2882        snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
2883        snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
2884        return snb_gmch_ctl << 25; /* 32 MB units */
2885}
2886
2887static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
2888{
2889        bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2890        bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
2891        return bdw_gmch_ctl << 25; /* 32 MB units */
2892}
2893
2894static size_t chv_get_stolen_size(u16 gmch_ctrl)
2895{
2896        gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
2897        gmch_ctrl &= SNB_GMCH_GMS_MASK;
2898
2899        /*
2900         * 0x0  to 0x10: 32MB increments starting at 0MB
2901         * 0x11 to 0x16: 4MB increments starting at 8MB
2902         * 0x17 to 0x1d: 4MB increments start at 36MB
2903         */
2904        if (gmch_ctrl < 0x11)
2905                return gmch_ctrl << 25;
2906        else if (gmch_ctrl < 0x17)
2907                return (gmch_ctrl - 0x11 + 2) << 22;
2908        else
2909                return (gmch_ctrl - 0x17 + 9) << 22;
2910}
2911
2912static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
2913{
2914        gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2915        gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
2916
2917        if (gen9_gmch_ctl < 0xf0)
2918                return gen9_gmch_ctl << 25; /* 32 MB units */
2919        else
2920                /* 4MB increments starting at 0xf0 for 4MB */
2921                return (gen9_gmch_ctl - 0xf0 + 1) << 22;
2922}
2923
2924static int ggtt_probe_common(struct drm_device *dev,
2925                             size_t gtt_size)
2926{
2927        struct drm_i915_private *dev_priv = dev->dev_private;
2928        struct i915_page_scratch *scratch_page;
2929        phys_addr_t gtt_phys_addr;
2930
2931        /* For Modern GENs the PTEs and register space are split in the BAR */
2932        gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
2933                (pci_resource_len(dev->pdev, 0) / 2);
2934
2935        /*
2936         * On BXT writes larger than 64 bit to the GTT pagetable range will be
2937         * dropped. For WC mappings in general we have 64 byte burst writes
2938         * when the WC buffer is flushed, so we can't use it, but have to
2939         * resort to an uncached mapping. The WC issue is easily caught by the
2940         * readback check when writing GTT PTE entries.
2941         */
2942        if (IS_BROXTON(dev))
2943                dev_priv->gtt.gsm = ioremap_nocache(gtt_phys_addr, gtt_size);
2944        else
2945                dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
2946        if (!dev_priv->gtt.gsm) {
2947                DRM_ERROR("Failed to map the gtt page table\n");
2948                return -ENOMEM;
2949        }
2950
2951        scratch_page = alloc_scratch_page(dev);
2952        if (IS_ERR(scratch_page)) {
2953                DRM_ERROR("Scratch setup failed\n");
2954                /* iounmap will also get called at remove, but meh */
2955                iounmap(dev_priv->gtt.gsm);
2956                return PTR_ERR(scratch_page);
2957        }
2958
2959        dev_priv->gtt.base.scratch_page = scratch_page;
2960
2961        return 0;
2962}
2963
2964/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
2965 * bits. When using advanced contexts each context stores its own PAT, but
2966 * writing this data shouldn't be harmful even in those cases. */
2967static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
2968{
2969        uint64_t pat;
2970
2971        pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
2972              GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
2973              GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
2974              GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
2975              GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
2976              GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
2977              GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
2978              GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
2979
2980        if (!USES_PPGTT(dev_priv->dev))
2981                /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
2982                 * so RTL will always use the value corresponding to
2983                 * pat_sel = 000".
2984                 * So let's disable cache for GGTT to avoid screen corruptions.
2985                 * MOCS still can be used though.
2986                 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
2987                 * before this patch, i.e. the same uncached + snooping access
2988                 * like on gen6/7 seems to be in effect.
2989                 * - So this just fixes blitter/render access. Again it looks
2990                 * like it's not just uncached access, but uncached + snooping.
2991                 * So we can still hold onto all our assumptions wrt cpu
2992                 * clflushing on LLC machines.
2993                 */
2994                pat = GEN8_PPAT(0, GEN8_PPAT_UC);
2995
2996        /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
2997         * write would work. */
2998        I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
2999        I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3000}
3001
3002static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
3003{
3004        uint64_t pat;
3005
3006        /*
3007         * Map WB on BDW to snooped on CHV.
3008         *
3009         * Only the snoop bit has meaning for CHV, the rest is
3010         * ignored.
3011         *
3012         * The hardware will never snoop for certain types of accesses:
3013         * - CPU GTT (GMADR->GGTT->no snoop->memory)
3014         * - PPGTT page tables
3015         * - some other special cycles
3016         *
3017         * As with BDW, we also need to consider the following for GT accesses:
3018         * "For GGTT, there is NO pat_sel[2:0] from the entry,
3019         * so RTL will always use the value corresponding to
3020         * pat_sel = 000".
3021         * Which means we must set the snoop bit in PAT entry 0
3022         * in order to keep the global status page working.
3023         */
3024        pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
3025              GEN8_PPAT(1, 0) |
3026              GEN8_PPAT(2, 0) |
3027              GEN8_PPAT(3, 0) |
3028              GEN8_PPAT(4, CHV_PPAT_SNOOP) |
3029              GEN8_PPAT(5, CHV_PPAT_SNOOP) |
3030              GEN8_PPAT(6, CHV_PPAT_SNOOP) |
3031              GEN8_PPAT(7, CHV_PPAT_SNOOP);
3032
3033        I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
3034        I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
3035}
3036
3037static int gen8_gmch_probe(struct drm_device *dev,
3038                           u64 *gtt_total,
3039                           size_t *stolen,
3040                           phys_addr_t *mappable_base,
3041                           u64 *mappable_end)
3042{
3043        struct drm_i915_private *dev_priv = dev->dev_private;
3044        u64 gtt_size;
3045        u16 snb_gmch_ctl;
3046        int ret;
3047
3048        /* TODO: We're not aware of mappable constraints on gen8 yet */
3049        *mappable_base = pci_resource_start(dev->pdev, 2);
3050        *mappable_end = pci_resource_len(dev->pdev, 2);
3051
3052        if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
3053                pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
3054
3055        pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3056
3057        if (INTEL_INFO(dev)->gen >= 9) {
3058                *stolen = gen9_get_stolen_size(snb_gmch_ctl);
3059                gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
3060        } else if (IS_CHERRYVIEW(dev)) {
3061                *stolen = chv_get_stolen_size(snb_gmch_ctl);
3062                gtt_size = chv_get_total_gtt_size(snb_gmch_ctl);
3063        } else {
3064                *stolen = gen8_get_stolen_size(snb_gmch_ctl);
3065                gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
3066        }
3067
3068        *gtt_total = (gtt_size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
3069
3070        if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
3071                chv_setup_private_ppat(dev_priv);
3072        else
3073                bdw_setup_private_ppat(dev_priv);
3074
3075        ret = ggtt_probe_common(dev, gtt_size);
3076
3077        dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
3078        dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
3079        dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
3080        dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
3081
3082        if (IS_CHERRYVIEW(dev_priv))
3083                dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries__BKL;
3084
3085        return ret;
3086}
3087
3088static int gen6_gmch_probe(struct drm_device *dev,
3089                           u64 *gtt_total,
3090                           size_t *stolen,
3091                           phys_addr_t *mappable_base,
3092                           u64 *mappable_end)
3093{
3094        struct drm_i915_private *dev_priv = dev->dev_private;
3095        unsigned int gtt_size;
3096        u16 snb_gmch_ctl;
3097        int ret;
3098
3099        *mappable_base = pci_resource_start(dev->pdev, 2);
3100        *mappable_end = pci_resource_len(dev->pdev, 2);
3101
3102        /* 64/512MB is the current min/max we actually know of, but this is just
3103         * a coarse sanity check.
3104         */
3105        if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
3106                DRM_ERROR("Unknown GMADR size (%llx)\n",
3107                          dev_priv->gtt.mappable_end);
3108                return -ENXIO;
3109        }
3110
3111        if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
3112                pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
3113        pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3114
3115        *stolen = gen6_get_stolen_size(snb_gmch_ctl);
3116
3117        gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
3118        *gtt_total = (gtt_size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
3119
3120        ret = ggtt_probe_common(dev, gtt_size);
3121
3122        dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
3123        dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
3124        dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
3125        dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
3126
3127        return ret;
3128}
3129
3130static void gen6_gmch_remove(struct i915_address_space *vm)
3131{
3132
3133        struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
3134
3135        iounmap(gtt->gsm);
3136        free_scratch_page(vm->dev, vm->scratch_page);
3137}
3138
3139static int i915_gmch_probe(struct drm_device *dev,
3140                           u64 *gtt_total,
3141                           size_t *stolen,
3142                           phys_addr_t *mappable_base,
3143                           u64 *mappable_end)
3144{
3145        struct drm_i915_private *dev_priv = dev->dev_private;
3146        int ret;
3147
3148        ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
3149        if (!ret) {
3150                DRM_ERROR("failed to set up gmch\n");
3151                return -EIO;
3152        }
3153
3154        intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
3155
3156        dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
3157        dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
3158        dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
3159        dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
3160        dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
3161
3162        if (unlikely(dev_priv->gtt.do_idle_maps))
3163                DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3164
3165        return 0;
3166}
3167
3168static void i915_gmch_remove(struct i915_address_space *vm)
3169{
3170        intel_gmch_remove();
3171}
3172
3173int i915_gem_gtt_init(struct drm_device *dev)
3174{
3175        struct drm_i915_private *dev_priv = dev->dev_private;
3176        struct i915_gtt *gtt = &dev_priv->gtt;
3177        int ret;
3178
3179        if (INTEL_INFO(dev)->gen <= 5) {
3180                gtt->gtt_probe = i915_gmch_probe;
3181                gtt->base.cleanup = i915_gmch_remove;
3182        } else if (INTEL_INFO(dev)->gen < 8) {
3183                gtt->gtt_probe = gen6_gmch_probe;
3184                gtt->base.cleanup = gen6_gmch_remove;
3185                if (IS_HASWELL(dev) && dev_priv->ellc_size)
3186                        gtt->base.pte_encode = iris_pte_encode;
3187                else if (IS_HASWELL(dev))
3188                        gtt->base.pte_encode = hsw_pte_encode;
3189                else if (IS_VALLEYVIEW(dev))
3190                        gtt->base.pte_encode = byt_pte_encode;
3191                else if (INTEL_INFO(dev)->gen >= 7)
3192                        gtt->base.pte_encode = ivb_pte_encode;
3193                else
3194                        gtt->base.pte_encode = snb_pte_encode;
3195        } else {
3196                dev_priv->gtt.gtt_probe = gen8_gmch_probe;
3197                dev_priv->gtt.base.cleanup = gen6_gmch_remove;
3198        }
3199
3200        gtt->base.dev = dev;
3201        gtt->base.is_ggtt = true;
3202
3203        ret = gtt->gtt_probe(dev, &gtt->base.total, &gtt->stolen_size,
3204                             &gtt->mappable_base, &gtt->mappable_end);
3205        if (ret)
3206                return ret;
3207
3208        /*
3209         * Initialise stolen early so that we may reserve preallocated
3210         * objects for the BIOS to KMS transition.
3211         */
3212        ret = i915_gem_init_stolen(dev);
3213        if (ret)
3214                goto out_gtt_cleanup;
3215
3216        /* GMADR is the PCI mmio aperture into the global GTT. */
3217        DRM_INFO("Memory usable by graphics device = %lluM\n",
3218                 gtt->base.total >> 20);
3219        DRM_DEBUG_DRIVER("GMADR size = %lldM\n", gtt->mappable_end >> 20);
3220        DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
3221#ifdef CONFIG_INTEL_IOMMU
3222        if (intel_iommu_gfx_mapped)
3223                DRM_INFO("VT-d active for gfx access\n");
3224#endif
3225        /*
3226         * i915.enable_ppgtt is read-only, so do an early pass to validate the
3227         * user's requested state against the hardware/driver capabilities.  We
3228         * do this now so that we can print out any log messages once rather
3229         * than every time we check intel_enable_ppgtt().
3230         */
3231        i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt);
3232        DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
3233
3234        return 0;
3235
3236out_gtt_cleanup:
3237        gtt->base.cleanup(&dev_priv->gtt.base);
3238
3239        return ret;
3240}
3241
3242void i915_gem_restore_gtt_mappings(struct drm_device *dev)
3243{
3244        struct drm_i915_private *dev_priv = dev->dev_private;
3245        struct drm_i915_gem_object *obj;
3246        struct i915_address_space *vm;
3247        struct i915_vma *vma;
3248        bool flush;
3249
3250        i915_check_and_clear_faults(dev);
3251
3252        /* First fill our portion of the GTT with scratch pages */
3253        dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
3254                                       dev_priv->gtt.base.start,
3255                                       dev_priv->gtt.base.total,
3256                                       true);
3257
3258        /* Cache flush objects bound into GGTT and rebind them. */
3259        vm = &dev_priv->gtt.base;
3260        list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
3261                flush = false;
3262                list_for_each_entry(vma, &obj->vma_list, obj_link) {
3263                        if (vma->vm != vm)
3264                                continue;
3265
3266                        WARN_ON(i915_vma_bind(vma, obj->cache_level,
3267                                              PIN_UPDATE));
3268
3269                        flush = true;
3270                }
3271
3272                if (flush)
3273                        i915_gem_clflush_object(obj, obj->pin_display);
3274        }
3275
3276        if (INTEL_INFO(dev)->gen >= 8) {
3277                if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
3278                        chv_setup_private_ppat(dev_priv);
3279                else
3280                        bdw_setup_private_ppat(dev_priv);
3281
3282                return;
3283        }
3284
3285        if (USES_PPGTT(dev)) {
3286                list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
3287                        /* TODO: Perhaps it shouldn't be gen6 specific */
3288
3289                        struct i915_hw_ppgtt *ppgtt =
3290                                        container_of(vm, struct i915_hw_ppgtt,
3291                                                     base);
3292
3293                        if (i915_is_ggtt(vm))
3294                                ppgtt = dev_priv->mm.aliasing_ppgtt;
3295
3296                        gen6_write_page_range(dev_priv, &ppgtt->pd,
3297                                              0, ppgtt->base.total);
3298                }
3299        }
3300
3301        i915_ggtt_flush(dev_priv);
3302}
3303
3304static struct i915_vma *
3305__i915_gem_vma_create(struct drm_i915_gem_object *obj,
3306                      struct i915_address_space *vm,
3307                      const struct i915_ggtt_view *ggtt_view)
3308{
3309        struct i915_vma *vma;
3310
3311        if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
3312                return ERR_PTR(-EINVAL);
3313
3314        vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
3315        if (vma == NULL)
3316                return ERR_PTR(-ENOMEM);
3317
3318        INIT_LIST_HEAD(&vma->vm_link);
3319        INIT_LIST_HEAD(&vma->obj_link);
3320        INIT_LIST_HEAD(&vma->exec_list);
3321        vma->vm = vm;
3322        vma->obj = obj;
3323        vma->is_ggtt = i915_is_ggtt(vm);
3324
3325        if (i915_is_ggtt(vm))
3326                vma->ggtt_view = *ggtt_view;
3327        else
3328                i915_ppgtt_get(i915_vm_to_ppgtt(vm));
3329
3330        list_add_tail(&vma->obj_link, &obj->vma_list);
3331
3332        return vma;
3333}
3334
3335struct i915_vma *
3336i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
3337                                  struct i915_address_space *vm)
3338{
3339        struct i915_vma *vma;
3340
3341        vma = i915_gem_obj_to_vma(obj, vm);
3342        if (!vma)
3343                vma = __i915_gem_vma_create(obj, vm,
3344                                            i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL);
3345
3346        return vma;
3347}
3348
3349struct i915_vma *
3350i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
3351                                       const struct i915_ggtt_view *view)
3352{
3353        struct i915_address_space *ggtt = i915_obj_to_ggtt(obj);
3354        struct i915_vma *vma;
3355
3356        if (WARN_ON(!view))
3357                return ERR_PTR(-EINVAL);
3358
3359        vma = i915_gem_obj_to_ggtt_view(obj, view);
3360
3361        if (IS_ERR(vma))
3362                return vma;
3363
3364        if (!vma)
3365                vma = __i915_gem_vma_create(obj, ggtt, view);
3366
3367        return vma;
3368
3369}
3370
3371static struct scatterlist *
3372rotate_pages(const dma_addr_t *in, unsigned int offset,
3373             unsigned int width, unsigned int height,
3374             unsigned int stride,
3375             struct sg_table *st, struct scatterlist *sg)
3376{
3377        unsigned int column, row;
3378        unsigned int src_idx;
3379
3380        if (!sg) {
3381                st->nents = 0;
3382                sg = st->sgl;
3383        }
3384
3385        for (column = 0; column < width; column++) {
3386                src_idx = stride * (height - 1) + column;
3387                for (row = 0; row < height; row++) {
3388                        st->nents++;
3389                        /* We don't need the pages, but need to initialize
3390                         * the entries so the sg list can be happily traversed.
3391                         * The only thing we need are DMA addresses.
3392                         */
3393                        sg_set_page(sg, NULL, PAGE_SIZE, 0);
3394                        sg_dma_address(sg) = in[offset + src_idx];
3395                        sg_dma_len(sg) = PAGE_SIZE;
3396                        sg = sg_next(sg);
3397                        src_idx -= stride;
3398                }
3399        }
3400
3401        return sg;
3402}
3403
3404static struct sg_table *
3405intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
3406                          struct drm_i915_gem_object *obj)
3407{
3408        unsigned int size_pages = rot_info->size >> PAGE_SHIFT;
3409        unsigned int size_pages_uv;
3410        struct sg_page_iter sg_iter;
3411        unsigned long i;
3412        dma_addr_t *page_addr_list;
3413        struct sg_table *st;
3414        unsigned int uv_start_page;
3415        struct scatterlist *sg;
3416        int ret = -ENOMEM;
3417
3418        /* Allocate a temporary list of source pages for random access. */
3419        page_addr_list = drm_malloc_ab(obj->base.size / PAGE_SIZE,
3420                                       sizeof(dma_addr_t));
3421        if (!page_addr_list)
3422                return ERR_PTR(ret);
3423
3424        /* Account for UV plane with NV12. */
3425        if (rot_info->pixel_format == DRM_FORMAT_NV12)
3426                size_pages_uv = rot_info->size_uv >> PAGE_SHIFT;
3427        else
3428                size_pages_uv = 0;
3429
3430        /* Allocate target SG list. */
3431        st = kmalloc(sizeof(*st), GFP_KERNEL);
3432        if (!st)
3433                goto err_st_alloc;
3434
3435        ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL);
3436        if (ret)
3437                goto err_sg_alloc;
3438
3439        /* Populate source page list from the object. */
3440        i = 0;
3441        for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
3442                page_addr_list[i] = sg_page_iter_dma_address(&sg_iter);
3443                i++;
3444        }
3445
3446        /* Rotate the pages. */
3447        sg = rotate_pages(page_addr_list, 0,
3448                     rot_info->width_pages, rot_info->height_pages,
3449                     rot_info->width_pages,
3450                     st, NULL);
3451
3452        /* Append the UV plane if NV12. */
3453        if (rot_info->pixel_format == DRM_FORMAT_NV12) {
3454                uv_start_page = size_pages;
3455
3456                /* Check for tile-row un-alignment. */
3457                if (offset_in_page(rot_info->uv_offset))
3458                        uv_start_page--;
3459
3460                rot_info->uv_start_page = uv_start_page;
3461
3462                rotate_pages(page_addr_list, uv_start_page,
3463                             rot_info->width_pages_uv,
3464                             rot_info->height_pages_uv,
3465                             rot_info->width_pages_uv,
3466                             st, sg);
3467        }
3468
3469        DRM_DEBUG_KMS(
3470                      "Created rotated page mapping for object size %zu (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0)).\n",
3471                      obj->base.size, rot_info->pitch, rot_info->height,
3472                      rot_info->pixel_format, rot_info->width_pages,
3473                      rot_info->height_pages, size_pages + size_pages_uv,
3474                      size_pages);
3475
3476        drm_free_large(page_addr_list);
3477
3478        return st;
3479
3480err_sg_alloc:
3481        kfree(st);
3482err_st_alloc:
3483        drm_free_large(page_addr_list);
3484
3485        DRM_DEBUG_KMS(
3486                      "Failed to create rotated mapping for object size %zu! (%d) (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0))\n",
3487                      obj->base.size, ret, rot_info->pitch, rot_info->height,
3488                      rot_info->pixel_format, rot_info->width_pages,
3489                      rot_info->height_pages, size_pages + size_pages_uv,
3490                      size_pages);
3491        return ERR_PTR(ret);
3492}
3493
3494static struct sg_table *
3495intel_partial_pages(const struct i915_ggtt_view *view,
3496                    struct drm_i915_gem_object *obj)
3497{
3498        struct sg_table *st;
3499        struct scatterlist *sg;
3500        struct sg_page_iter obj_sg_iter;
3501        int ret = -ENOMEM;
3502
3503        st = kmalloc(sizeof(*st), GFP_KERNEL);
3504        if (!st)
3505                goto err_st_alloc;
3506
3507        ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL);
3508        if (ret)
3509                goto err_sg_alloc;
3510
3511        sg = st->sgl;
3512        st->nents = 0;
3513        for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
3514                view->params.partial.offset)
3515        {
3516                if (st->nents >= view->params.partial.size)
3517                        break;
3518
3519                sg_set_page(sg, NULL, PAGE_SIZE, 0);
3520                sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter);
3521                sg_dma_len(sg) = PAGE_SIZE;
3522
3523                sg = sg_next(sg);
3524                st->nents++;
3525        }
3526
3527        return st;
3528
3529err_sg_alloc:
3530        kfree(st);
3531err_st_alloc:
3532        return ERR_PTR(ret);
3533}
3534
3535static int
3536i915_get_ggtt_vma_pages(struct i915_vma *vma)
3537{
3538        int ret = 0;
3539
3540        if (vma->ggtt_view.pages)
3541                return 0;
3542
3543        if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
3544                vma->ggtt_view.pages = vma->obj->pages;
3545        else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
3546                vma->ggtt_view.pages =
3547                        intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj);
3548        else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
3549                vma->ggtt_view.pages =
3550                        intel_partial_pages(&vma->ggtt_view, vma->obj);
3551        else
3552                WARN_ONCE(1, "GGTT view %u not implemented!\n",
3553                          vma->ggtt_view.type);
3554
3555        if (!vma->ggtt_view.pages) {
3556                DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
3557                          vma->ggtt_view.type);
3558                ret = -EINVAL;
3559        } else if (IS_ERR(vma->ggtt_view.pages)) {
3560                ret = PTR_ERR(vma->ggtt_view.pages);
3561                vma->ggtt_view.pages = NULL;
3562                DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3563                          vma->ggtt_view.type, ret);
3564        }
3565
3566        return ret;
3567}
3568
3569/**
3570 * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
3571 * @vma: VMA to map
3572 * @cache_level: mapping cache level
3573 * @flags: flags like global or local mapping
3574 *
3575 * DMA addresses are taken from the scatter-gather table of this object (or of
3576 * this VMA in case of non-default GGTT views) and PTE entries set up.
3577 * Note that DMA addresses are also the only part of the SG table we care about.
3578 */
3579int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
3580                  u32 flags)
3581{
3582        int ret;
3583        u32 bind_flags;
3584
3585        if (WARN_ON(flags == 0))
3586                return -EINVAL;
3587
3588        bind_flags = 0;
3589        if (flags & PIN_GLOBAL)
3590                bind_flags |= GLOBAL_BIND;
3591        if (flags & PIN_USER)
3592                bind_flags |= LOCAL_BIND;
3593
3594        if (flags & PIN_UPDATE)
3595                bind_flags |= vma->bound;
3596        else
3597                bind_flags &= ~vma->bound;
3598
3599        if (bind_flags == 0)
3600                return 0;
3601
3602        if (vma->bound == 0 && vma->vm->allocate_va_range) {
3603                /* XXX: i915_vma_pin() will fix this +- hack */
3604                vma->pin_count++;
3605                trace_i915_va_alloc(vma);
3606                ret = vma->vm->allocate_va_range(vma->vm,
3607                                                 vma->node.start,
3608                                                 vma->node.size);
3609                vma->pin_count--;
3610                if (ret)
3611                        return ret;
3612        }
3613
3614        ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
3615        if (ret)
3616                return ret;
3617
3618        vma->bound |= bind_flags;
3619
3620        return 0;
3621}
3622
3623/**
3624 * i915_ggtt_view_size - Get the size of a GGTT view.
3625 * @obj: Object the view is of.
3626 * @view: The view in question.
3627 *
3628 * @return The size of the GGTT view in bytes.
3629 */
3630size_t
3631i915_ggtt_view_size(struct drm_i915_gem_object *obj,
3632                    const struct i915_ggtt_view *view)
3633{
3634        if (view->type == I915_GGTT_VIEW_NORMAL) {
3635                return obj->base.size;
3636        } else if (view->type == I915_GGTT_VIEW_ROTATED) {
3637                return view->params.rotated.size;
3638        } else if (view->type == I915_GGTT_VIEW_PARTIAL) {
3639                return view->params.partial.size << PAGE_SHIFT;
3640        } else {
3641                WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type);
3642                return obj->base.size;
3643        }
3644}
3645