linux/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
<<
>>
Prefs
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2020 Intel Corporation
   4 */
   5
   6#include <linux/log2.h>
   7
   8#include "gen6_ppgtt.h"
   9#include "i915_scatterlist.h"
  10#include "i915_trace.h"
  11#include "i915_vgpu.h"
  12#include "intel_gt.h"
  13
  14/* Write pde (index) from the page directory @pd to the page table @pt */
  15static void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
  16                           const unsigned int pde,
  17                           const struct i915_page_table *pt)
  18{
  19        dma_addr_t addr = pt ? px_dma(pt) : px_dma(ppgtt->base.vm.scratch[1]);
  20
  21        /* Caller needs to make sure the write completes if necessary */
  22        iowrite32(GEN6_PDE_ADDR_ENCODE(addr) | GEN6_PDE_VALID,
  23                  ppgtt->pd_addr + pde);
  24}
  25
  26void gen7_ppgtt_enable(struct intel_gt *gt)
  27{
  28        struct drm_i915_private *i915 = gt->i915;
  29        struct intel_uncore *uncore = gt->uncore;
  30        u32 ecochk;
  31
  32        intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B);
  33
  34        ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
  35        if (IS_HASWELL(i915)) {
  36                ecochk |= ECOCHK_PPGTT_WB_HSW;
  37        } else {
  38                ecochk |= ECOCHK_PPGTT_LLC_IVB;
  39                ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
  40        }
  41        intel_uncore_write(uncore, GAM_ECOCHK, ecochk);
  42}
  43
  44void gen6_ppgtt_enable(struct intel_gt *gt)
  45{
  46        struct intel_uncore *uncore = gt->uncore;
  47
  48        intel_uncore_rmw(uncore,
  49                         GAC_ECO_BITS,
  50                         0,
  51                         ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B);
  52
  53        intel_uncore_rmw(uncore,
  54                         GAB_CTL,
  55                         0,
  56                         GAB_CTL_CONT_AFTER_PAGEFAULT);
  57
  58        intel_uncore_rmw(uncore,
  59                         GAM_ECOCHK,
  60                         0,
  61                         ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
  62
  63        if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */
  64                intel_uncore_write(uncore,
  65                                   GFX_MODE,
  66                                   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
  67}
  68
  69/* PPGTT support for Sandybdrige/Gen6 and later */
  70static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
  71                                   u64 start, u64 length)
  72{
  73        struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
  74        const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
  75        const gen6_pte_t scratch_pte = vm->scratch[0]->encode;
  76        unsigned int pde = first_entry / GEN6_PTES;
  77        unsigned int pte = first_entry % GEN6_PTES;
  78        unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
  79
  80        while (num_entries) {
  81                struct i915_page_table * const pt =
  82                        i915_pt_entry(ppgtt->base.pd, pde++);
  83                const unsigned int count = min(num_entries, GEN6_PTES - pte);
  84                gen6_pte_t *vaddr;
  85
  86                num_entries -= count;
  87
  88                GEM_BUG_ON(count > atomic_read(&pt->used));
  89                if (!atomic_sub_return(count, &pt->used))
  90                        ppgtt->scan_for_unused_pt = true;
  91
  92                /*
  93                 * Note that the hw doesn't support removing PDE on the fly
  94                 * (they are cached inside the context with no means to
  95                 * invalidate the cache), so we can only reset the PTE
  96                 * entries back to scratch.
  97                 */
  98
  99                vaddr = px_vaddr(pt);
 100                memset32(vaddr + pte, scratch_pte, count);
 101
 102                pte = 0;
 103        }
 104}
 105
 106static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 107                                      struct i915_vma *vma,
 108                                      enum i915_cache_level cache_level,
 109                                      u32 flags)
 110{
 111        struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 112        struct i915_page_directory * const pd = ppgtt->pd;
 113        unsigned int first_entry = vma->node.start / I915_GTT_PAGE_SIZE;
 114        unsigned int act_pt = first_entry / GEN6_PTES;
 115        unsigned int act_pte = first_entry % GEN6_PTES;
 116        const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
 117        struct sgt_dma iter = sgt_dma(vma);
 118        gen6_pte_t *vaddr;
 119
 120        GEM_BUG_ON(!pd->entry[act_pt]);
 121
 122        vaddr = px_vaddr(i915_pt_entry(pd, act_pt));
 123        do {
 124                GEM_BUG_ON(sg_dma_len(iter.sg) < I915_GTT_PAGE_SIZE);
 125                vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
 126
 127                iter.dma += I915_GTT_PAGE_SIZE;
 128                if (iter.dma == iter.max) {
 129                        iter.sg = __sg_next(iter.sg);
 130                        if (!iter.sg || sg_dma_len(iter.sg) == 0)
 131                                break;
 132
 133                        iter.dma = sg_dma_address(iter.sg);
 134                        iter.max = iter.dma + sg_dma_len(iter.sg);
 135                }
 136
 137                if (++act_pte == GEN6_PTES) {
 138                        vaddr = px_vaddr(i915_pt_entry(pd, ++act_pt));
 139                        act_pte = 0;
 140                }
 141        } while (1);
 142
 143        vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
 144}
 145
 146static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
 147{
 148        struct i915_page_directory * const pd = ppgtt->base.pd;
 149        struct i915_page_table *pt;
 150        unsigned int pde;
 151
 152        start = round_down(start, SZ_64K);
 153        end = round_up(end, SZ_64K) - start;
 154
 155        mutex_lock(&ppgtt->flush);
 156
 157        gen6_for_each_pde(pt, pd, start, end, pde)
 158                gen6_write_pde(ppgtt, pde, pt);
 159
 160        mb();
 161        ioread32(ppgtt->pd_addr + pde - 1);
 162        gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt);
 163        mb();
 164
 165        mutex_unlock(&ppgtt->flush);
 166}
 167
 168static void gen6_alloc_va_range(struct i915_address_space *vm,
 169                                struct i915_vm_pt_stash *stash,
 170                                u64 start, u64 length)
 171{
 172        struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
 173        struct i915_page_directory * const pd = ppgtt->base.pd;
 174        struct i915_page_table *pt;
 175        bool flush = false;
 176        u64 from = start;
 177        unsigned int pde;
 178
 179        spin_lock(&pd->lock);
 180        gen6_for_each_pde(pt, pd, start, length, pde) {
 181                const unsigned int count = gen6_pte_count(start, length);
 182
 183                if (!pt) {
 184                        spin_unlock(&pd->lock);
 185
 186                        pt = stash->pt[0];
 187                        __i915_gem_object_pin_pages(pt->base);
 188                        i915_gem_object_make_unshrinkable(pt->base);
 189
 190                        fill32_px(pt, vm->scratch[0]->encode);
 191
 192                        spin_lock(&pd->lock);
 193                        if (!pd->entry[pde]) {
 194                                stash->pt[0] = pt->stash;
 195                                atomic_set(&pt->used, 0);
 196                                pd->entry[pde] = pt;
 197                        } else {
 198                                pt = pd->entry[pde];
 199                        }
 200
 201                        flush = true;
 202                }
 203
 204                atomic_add(count, &pt->used);
 205        }
 206        spin_unlock(&pd->lock);
 207
 208        if (flush && i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) {
 209                intel_wakeref_t wakeref;
 210
 211                with_intel_runtime_pm(&vm->i915->runtime_pm, wakeref)
 212                        gen6_flush_pd(ppgtt, from, start);
 213        }
 214}
 215
 216static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
 217{
 218        struct i915_address_space * const vm = &ppgtt->base.vm;
 219        int ret;
 220
 221        ret = setup_scratch_page(vm);
 222        if (ret)
 223                return ret;
 224
 225        vm->scratch[0]->encode =
 226                vm->pte_encode(px_dma(vm->scratch[0]),
 227                               I915_CACHE_NONE, PTE_READ_ONLY);
 228
 229        vm->scratch[1] = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
 230        if (IS_ERR(vm->scratch[1])) {
 231                ret = PTR_ERR(vm->scratch[1]);
 232                goto err_scratch0;
 233        }
 234
 235        ret = map_pt_dma(vm, vm->scratch[1]);
 236        if (ret)
 237                goto err_scratch1;
 238
 239        fill32_px(vm->scratch[1], vm->scratch[0]->encode);
 240
 241        return 0;
 242
 243err_scratch1:
 244        i915_gem_object_put(vm->scratch[1]);
 245err_scratch0:
 246        i915_gem_object_put(vm->scratch[0]);
 247        return ret;
 248}
 249
 250static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
 251{
 252        struct i915_page_directory * const pd = ppgtt->base.pd;
 253        struct i915_page_table *pt;
 254        u32 pde;
 255
 256        gen6_for_all_pdes(pt, pd, pde)
 257                if (pt)
 258                        free_pt(&ppgtt->base.vm, pt);
 259}
 260
 261static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 262{
 263        struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
 264
 265        __i915_vma_put(ppgtt->vma);
 266
 267        gen6_ppgtt_free_pd(ppgtt);
 268        free_scratch(vm);
 269
 270        mutex_destroy(&ppgtt->flush);
 271        mutex_destroy(&ppgtt->pin_mutex);
 272
 273        free_pd(&ppgtt->base.vm, ppgtt->base.pd);
 274}
 275
 276static int pd_vma_set_pages(struct i915_vma *vma)
 277{
 278        vma->pages = ERR_PTR(-ENODEV);
 279        return 0;
 280}
 281
 282static void pd_vma_clear_pages(struct i915_vma *vma)
 283{
 284        GEM_BUG_ON(!vma->pages);
 285
 286        vma->pages = NULL;
 287}
 288
 289static void pd_vma_bind(struct i915_address_space *vm,
 290                        struct i915_vm_pt_stash *stash,
 291                        struct i915_vma *vma,
 292                        enum i915_cache_level cache_level,
 293                        u32 unused)
 294{
 295        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 296        struct gen6_ppgtt *ppgtt = vma->private;
 297        u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
 298
 299        ppgtt->pp_dir = ggtt_offset * sizeof(gen6_pte_t) << 10;
 300        ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
 301
 302        gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total);
 303}
 304
 305static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
 306{
 307        struct gen6_ppgtt *ppgtt = vma->private;
 308        struct i915_page_directory * const pd = ppgtt->base.pd;
 309        struct i915_page_table *pt;
 310        unsigned int pde;
 311
 312        if (!ppgtt->scan_for_unused_pt)
 313                return;
 314
 315        /* Free all no longer used page tables */
 316        gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
 317                if (!pt || atomic_read(&pt->used))
 318                        continue;
 319
 320                free_pt(&ppgtt->base.vm, pt);
 321                pd->entry[pde] = NULL;
 322        }
 323
 324        ppgtt->scan_for_unused_pt = false;
 325}
 326
 327static const struct i915_vma_ops pd_vma_ops = {
 328        .set_pages = pd_vma_set_pages,
 329        .clear_pages = pd_vma_clear_pages,
 330        .bind_vma = pd_vma_bind,
 331        .unbind_vma = pd_vma_unbind,
 332};
 333
 334static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
 335{
 336        struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt;
 337        struct i915_vma *vma;
 338
 339        GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
 340        GEM_BUG_ON(size > ggtt->vm.total);
 341
 342        vma = i915_vma_alloc();
 343        if (!vma)
 344                return ERR_PTR(-ENOMEM);
 345
 346        i915_active_init(&vma->active, NULL, NULL, 0);
 347
 348        kref_init(&vma->ref);
 349        mutex_init(&vma->pages_mutex);
 350        vma->vm = i915_vm_get(&ggtt->vm);
 351        vma->ops = &pd_vma_ops;
 352        vma->private = ppgtt;
 353
 354        vma->size = size;
 355        vma->fence_size = size;
 356        atomic_set(&vma->flags, I915_VMA_GGTT);
 357        vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
 358
 359        INIT_LIST_HEAD(&vma->obj_link);
 360        INIT_LIST_HEAD(&vma->closed_link);
 361
 362        return vma;
 363}
 364
 365int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
 366{
 367        struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
 368        int err;
 369
 370        GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open));
 371
 372        /*
 373         * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
 374         * which will be pinned into every active context.
 375         * (When vma->pin_count becomes atomic, I expect we will naturally
 376         * need a larger, unpacked, type and kill this redundancy.)
 377         */
 378        if (atomic_add_unless(&ppgtt->pin_count, 1, 0))
 379                return 0;
 380
 381        if (mutex_lock_interruptible(&ppgtt->pin_mutex))
 382                return -EINTR;
 383
 384        /*
 385         * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
 386         * allocator works in address space sizes, so it's multiplied by page
 387         * size. We allocate at the top of the GTT to avoid fragmentation.
 388         */
 389        err = 0;
 390        if (!atomic_read(&ppgtt->pin_count))
 391                err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH);
 392        if (!err)
 393                atomic_inc(&ppgtt->pin_count);
 394        mutex_unlock(&ppgtt->pin_mutex);
 395
 396        return err;
 397}
 398
 399void gen6_ppgtt_unpin(struct i915_ppgtt *base)
 400{
 401        struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
 402
 403        GEM_BUG_ON(!atomic_read(&ppgtt->pin_count));
 404        if (atomic_dec_and_test(&ppgtt->pin_count))
 405                i915_vma_unpin(ppgtt->vma);
 406}
 407
 408void gen6_ppgtt_unpin_all(struct i915_ppgtt *base)
 409{
 410        struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
 411
 412        if (!atomic_read(&ppgtt->pin_count))
 413                return;
 414
 415        i915_vma_unpin(ppgtt->vma);
 416        atomic_set(&ppgtt->pin_count, 0);
 417}
 418
 419struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
 420{
 421        struct i915_ggtt * const ggtt = gt->ggtt;
 422        struct gen6_ppgtt *ppgtt;
 423        int err;
 424
 425        ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
 426        if (!ppgtt)
 427                return ERR_PTR(-ENOMEM);
 428
 429        mutex_init(&ppgtt->flush);
 430        mutex_init(&ppgtt->pin_mutex);
 431
 432        ppgtt_init(&ppgtt->base, gt);
 433        ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
 434        ppgtt->base.vm.top = 1;
 435
 436        ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
 437        ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
 438        ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
 439        ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
 440        ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
 441
 442        ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma;
 443        ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
 444
 445        ppgtt->base.pd = __alloc_pd(I915_PDES);
 446        if (!ppgtt->base.pd) {
 447                err = -ENOMEM;
 448                goto err_free;
 449        }
 450
 451        err = gen6_ppgtt_init_scratch(ppgtt);
 452        if (err)
 453                goto err_pd;
 454
 455        ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
 456        if (IS_ERR(ppgtt->vma)) {
 457                err = PTR_ERR(ppgtt->vma);
 458                goto err_scratch;
 459        }
 460
 461        return &ppgtt->base;
 462
 463err_scratch:
 464        free_scratch(&ppgtt->base.vm);
 465err_pd:
 466        free_pd(&ppgtt->base.vm, ppgtt->base.pd);
 467err_free:
 468        mutex_destroy(&ppgtt->pin_mutex);
 469        kfree(ppgtt);
 470        return ERR_PTR(err);
 471}
 472