linux/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
<<
>>
Prefs
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2020 Intel Corporation
   4 */
   5
   6#include <linux/log2.h>
   7
   8#include "gem/i915_gem_internal.h"
   9
  10#include "gen6_ppgtt.h"
  11#include "i915_scatterlist.h"
  12#include "i915_trace.h"
  13#include "i915_vgpu.h"
  14#include "intel_gt_regs.h"
  15#include "intel_engine_regs.h"
  16#include "intel_gt.h"
  17
  18/* Write pde (index) from the page directory @pd to the page table @pt */
  19static void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
  20                           const unsigned int pde,
  21                           const struct i915_page_table *pt)
  22{
  23        dma_addr_t addr = pt ? px_dma(pt) : px_dma(ppgtt->base.vm.scratch[1]);
  24
  25        /* Caller needs to make sure the write completes if necessary */
  26        iowrite32(GEN6_PDE_ADDR_ENCODE(addr) | GEN6_PDE_VALID,
  27                  ppgtt->pd_addr + pde);
  28}
  29
  30void gen7_ppgtt_enable(struct intel_gt *gt)
  31{
  32        struct drm_i915_private *i915 = gt->i915;
  33        struct intel_uncore *uncore = gt->uncore;
  34        u32 ecochk;
  35
  36        intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B);
  37
  38        ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
  39        if (IS_HASWELL(i915)) {
  40                ecochk |= ECOCHK_PPGTT_WB_HSW;
  41        } else {
  42                ecochk |= ECOCHK_PPGTT_LLC_IVB;
  43                ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
  44        }
  45        intel_uncore_write(uncore, GAM_ECOCHK, ecochk);
  46}
  47
  48void gen6_ppgtt_enable(struct intel_gt *gt)
  49{
  50        struct intel_uncore *uncore = gt->uncore;
  51
  52        intel_uncore_rmw(uncore,
  53                         GAC_ECO_BITS,
  54                         0,
  55                         ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B);
  56
  57        intel_uncore_rmw(uncore,
  58                         GAB_CTL,
  59                         0,
  60                         GAB_CTL_CONT_AFTER_PAGEFAULT);
  61
  62        intel_uncore_rmw(uncore,
  63                         GAM_ECOCHK,
  64                         0,
  65                         ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
  66
  67        if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */
  68                intel_uncore_write(uncore,
  69                                   GFX_MODE,
  70                                   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
  71}
  72
  73/* PPGTT support for Sandybdrige/Gen6 and later */
  74static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
  75                                   u64 start, u64 length)
  76{
  77        struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
  78        const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
  79        const gen6_pte_t scratch_pte = vm->scratch[0]->encode;
  80        unsigned int pde = first_entry / GEN6_PTES;
  81        unsigned int pte = first_entry % GEN6_PTES;
  82        unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
  83
  84        while (num_entries) {
  85                struct i915_page_table * const pt =
  86                        i915_pt_entry(ppgtt->base.pd, pde++);
  87                const unsigned int count = min(num_entries, GEN6_PTES - pte);
  88                gen6_pte_t *vaddr;
  89
  90                num_entries -= count;
  91
  92                GEM_BUG_ON(count > atomic_read(&pt->used));
  93                if (!atomic_sub_return(count, &pt->used))
  94                        ppgtt->scan_for_unused_pt = true;
  95
  96                /*
  97                 * Note that the hw doesn't support removing PDE on the fly
  98                 * (they are cached inside the context with no means to
  99                 * invalidate the cache), so we can only reset the PTE
 100                 * entries back to scratch.
 101                 */
 102
 103                vaddr = px_vaddr(pt);
 104                memset32(vaddr + pte, scratch_pte, count);
 105
 106                pte = 0;
 107        }
 108}
 109
 110static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 111                                      struct i915_vma_resource *vma_res,
 112                                      enum i915_cache_level cache_level,
 113                                      u32 flags)
 114{
 115        struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 116        struct i915_page_directory * const pd = ppgtt->pd;
 117        unsigned int first_entry = vma_res->start / I915_GTT_PAGE_SIZE;
 118        unsigned int act_pt = first_entry / GEN6_PTES;
 119        unsigned int act_pte = first_entry % GEN6_PTES;
 120        const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
 121        struct sgt_dma iter = sgt_dma(vma_res);
 122        gen6_pte_t *vaddr;
 123
 124        GEM_BUG_ON(!pd->entry[act_pt]);
 125
 126        vaddr = px_vaddr(i915_pt_entry(pd, act_pt));
 127        do {
 128                GEM_BUG_ON(sg_dma_len(iter.sg) < I915_GTT_PAGE_SIZE);
 129                vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
 130
 131                iter.dma += I915_GTT_PAGE_SIZE;
 132                if (iter.dma == iter.max) {
 133                        iter.sg = __sg_next(iter.sg);
 134                        if (!iter.sg || sg_dma_len(iter.sg) == 0)
 135                                break;
 136
 137                        iter.dma = sg_dma_address(iter.sg);
 138                        iter.max = iter.dma + sg_dma_len(iter.sg);
 139                }
 140
 141                if (++act_pte == GEN6_PTES) {
 142                        vaddr = px_vaddr(i915_pt_entry(pd, ++act_pt));
 143                        act_pte = 0;
 144                }
 145        } while (1);
 146
 147        vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
 148}
 149
 150static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
 151{
 152        struct i915_page_directory * const pd = ppgtt->base.pd;
 153        struct i915_page_table *pt;
 154        unsigned int pde;
 155
 156        start = round_down(start, SZ_64K);
 157        end = round_up(end, SZ_64K) - start;
 158
 159        mutex_lock(&ppgtt->flush);
 160
 161        gen6_for_each_pde(pt, pd, start, end, pde)
 162                gen6_write_pde(ppgtt, pde, pt);
 163
 164        mb();
 165        ioread32(ppgtt->pd_addr + pde - 1);
 166        gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt);
 167        mb();
 168
 169        mutex_unlock(&ppgtt->flush);
 170}
 171
 172static void gen6_alloc_va_range(struct i915_address_space *vm,
 173                                struct i915_vm_pt_stash *stash,
 174                                u64 start, u64 length)
 175{
 176        struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
 177        struct i915_page_directory * const pd = ppgtt->base.pd;
 178        struct i915_page_table *pt;
 179        bool flush = false;
 180        u64 from = start;
 181        unsigned int pde;
 182
 183        spin_lock(&pd->lock);
 184        gen6_for_each_pde(pt, pd, start, length, pde) {
 185                const unsigned int count = gen6_pte_count(start, length);
 186
 187                if (!pt) {
 188                        spin_unlock(&pd->lock);
 189
 190                        pt = stash->pt[0];
 191                        __i915_gem_object_pin_pages(pt->base);
 192
 193                        fill32_px(pt, vm->scratch[0]->encode);
 194
 195                        spin_lock(&pd->lock);
 196                        if (!pd->entry[pde]) {
 197                                stash->pt[0] = pt->stash;
 198                                atomic_set(&pt->used, 0);
 199                                pd->entry[pde] = pt;
 200                        } else {
 201                                pt = pd->entry[pde];
 202                        }
 203
 204                        flush = true;
 205                }
 206
 207                atomic_add(count, &pt->used);
 208        }
 209        spin_unlock(&pd->lock);
 210
 211        if (flush && i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) {
 212                intel_wakeref_t wakeref;
 213
 214                with_intel_runtime_pm(&vm->i915->runtime_pm, wakeref)
 215                        gen6_flush_pd(ppgtt, from, start);
 216        }
 217}
 218
 219static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
 220{
 221        struct i915_address_space * const vm = &ppgtt->base.vm;
 222        int ret;
 223
 224        ret = setup_scratch_page(vm);
 225        if (ret)
 226                return ret;
 227
 228        vm->scratch[0]->encode =
 229                vm->pte_encode(px_dma(vm->scratch[0]),
 230                               I915_CACHE_NONE, PTE_READ_ONLY);
 231
 232        vm->scratch[1] = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
 233        if (IS_ERR(vm->scratch[1])) {
 234                ret = PTR_ERR(vm->scratch[1]);
 235                goto err_scratch0;
 236        }
 237
 238        ret = map_pt_dma(vm, vm->scratch[1]);
 239        if (ret)
 240                goto err_scratch1;
 241
 242        fill32_px(vm->scratch[1], vm->scratch[0]->encode);
 243
 244        return 0;
 245
 246err_scratch1:
 247        i915_gem_object_put(vm->scratch[1]);
 248err_scratch0:
 249        i915_gem_object_put(vm->scratch[0]);
 250        return ret;
 251}
 252
 253static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
 254{
 255        struct i915_page_directory * const pd = ppgtt->base.pd;
 256        struct i915_page_table *pt;
 257        u32 pde;
 258
 259        gen6_for_all_pdes(pt, pd, pde)
 260                if (pt)
 261                        free_pt(&ppgtt->base.vm, pt);
 262}
 263
 264static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 265{
 266        struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
 267
 268        gen6_ppgtt_free_pd(ppgtt);
 269        free_scratch(vm);
 270
 271        mutex_destroy(&ppgtt->flush);
 272
 273        free_pd(&ppgtt->base.vm, ppgtt->base.pd);
 274}
 275
 276static void pd_vma_bind(struct i915_address_space *vm,
 277                        struct i915_vm_pt_stash *stash,
 278                        struct i915_vma_resource *vma_res,
 279                        enum i915_cache_level cache_level,
 280                        u32 unused)
 281{
 282        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 283        struct gen6_ppgtt *ppgtt = vma_res->private;
 284        u32 ggtt_offset = vma_res->start / I915_GTT_PAGE_SIZE;
 285
 286        ppgtt->pp_dir = ggtt_offset * sizeof(gen6_pte_t) << 10;
 287        ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
 288
 289        gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total);
 290}
 291
 292static void pd_vma_unbind(struct i915_address_space *vm,
 293                          struct i915_vma_resource *vma_res)
 294{
 295        struct gen6_ppgtt *ppgtt = vma_res->private;
 296        struct i915_page_directory * const pd = ppgtt->base.pd;
 297        struct i915_page_table *pt;
 298        unsigned int pde;
 299
 300        if (!ppgtt->scan_for_unused_pt)
 301                return;
 302
 303        /* Free all no longer used page tables */
 304        gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
 305                if (!pt || atomic_read(&pt->used))
 306                        continue;
 307
 308                free_pt(&ppgtt->base.vm, pt);
 309                pd->entry[pde] = NULL;
 310        }
 311
 312        ppgtt->scan_for_unused_pt = false;
 313}
 314
 315static const struct i915_vma_ops pd_vma_ops = {
 316        .bind_vma = pd_vma_bind,
 317        .unbind_vma = pd_vma_unbind,
 318};
 319
 320int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
 321{
 322        struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
 323        int err;
 324
 325        GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open));
 326
 327        /*
 328         * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
 329         * which will be pinned into every active context.
 330         * (When vma->pin_count becomes atomic, I expect we will naturally
 331         * need a larger, unpacked, type and kill this redundancy.)
 332         */
 333        if (atomic_add_unless(&ppgtt->pin_count, 1, 0))
 334                return 0;
 335
 336        /* grab the ppgtt resv to pin the object */
 337        err = i915_vm_lock_objects(&ppgtt->base.vm, ww);
 338        if (err)
 339                return err;
 340
 341        /*
 342         * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
 343         * allocator works in address space sizes, so it's multiplied by page
 344         * size. We allocate at the top of the GTT to avoid fragmentation.
 345         */
 346        if (!atomic_read(&ppgtt->pin_count)) {
 347                err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH);
 348
 349                GEM_BUG_ON(ppgtt->vma->fence);
 350                clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(ppgtt->vma));
 351        }
 352        if (!err)
 353                atomic_inc(&ppgtt->pin_count);
 354
 355        return err;
 356}
 357
 358static int pd_dummy_obj_get_pages(struct drm_i915_gem_object *obj)
 359{
 360        obj->mm.pages = ZERO_SIZE_PTR;
 361        return 0;
 362}
 363
 364static void pd_dummy_obj_put_pages(struct drm_i915_gem_object *obj,
 365                                   struct sg_table *pages)
 366{
 367}
 368
 369static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = {
 370        .name = "pd_dummy_obj",
 371        .get_pages = pd_dummy_obj_get_pages,
 372        .put_pages = pd_dummy_obj_put_pages,
 373};
 374
 375static struct i915_page_directory *
 376gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt)
 377{
 378        struct i915_ggtt * const ggtt = ppgtt->base.vm.gt->ggtt;
 379        struct i915_page_directory *pd;
 380        int err;
 381
 382        pd = __alloc_pd(I915_PDES);
 383        if (unlikely(!pd))
 384                return ERR_PTR(-ENOMEM);
 385
 386        pd->pt.base = __i915_gem_object_create_internal(ppgtt->base.vm.gt->i915,
 387                                                        &pd_dummy_obj_ops,
 388                                                        I915_PDES * SZ_4K);
 389        if (IS_ERR(pd->pt.base)) {
 390                err = PTR_ERR(pd->pt.base);
 391                pd->pt.base = NULL;
 392                goto err_pd;
 393        }
 394
 395        pd->pt.base->base.resv = i915_vm_resv_get(&ppgtt->base.vm);
 396        pd->pt.base->shares_resv_from = &ppgtt->base.vm;
 397
 398        ppgtt->vma = i915_vma_instance(pd->pt.base, &ggtt->vm, NULL);
 399        if (IS_ERR(ppgtt->vma)) {
 400                err = PTR_ERR(ppgtt->vma);
 401                ppgtt->vma = NULL;
 402                goto err_pd;
 403        }
 404
 405        /* The dummy object we create is special, override ops.. */
 406        ppgtt->vma->ops = &pd_vma_ops;
 407        ppgtt->vma->private = ppgtt;
 408        return pd;
 409
 410err_pd:
 411        free_pd(&ppgtt->base.vm, pd);
 412        return ERR_PTR(err);
 413}
 414
 415void gen6_ppgtt_unpin(struct i915_ppgtt *base)
 416{
 417        struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
 418
 419        GEM_BUG_ON(!atomic_read(&ppgtt->pin_count));
 420        if (atomic_dec_and_test(&ppgtt->pin_count))
 421                i915_vma_unpin(ppgtt->vma);
 422}
 423
 424struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
 425{
 426        struct i915_ggtt * const ggtt = gt->ggtt;
 427        struct gen6_ppgtt *ppgtt;
 428        int err;
 429
 430        ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
 431        if (!ppgtt)
 432                return ERR_PTR(-ENOMEM);
 433
 434        mutex_init(&ppgtt->flush);
 435
 436        ppgtt_init(&ppgtt->base, gt, 0);
 437        ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
 438        ppgtt->base.vm.top = 1;
 439
 440        ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
 441        ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
 442        ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
 443        ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
 444        ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
 445
 446        ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma;
 447        ppgtt->base.vm.alloc_scratch_dma = alloc_pt_dma;
 448        ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
 449
 450        err = gen6_ppgtt_init_scratch(ppgtt);
 451        if (err)
 452                goto err_free;
 453
 454        ppgtt->base.pd = gen6_alloc_top_pd(ppgtt);
 455        if (IS_ERR(ppgtt->base.pd)) {
 456                err = PTR_ERR(ppgtt->base.pd);
 457                goto err_scratch;
 458        }
 459
 460        return &ppgtt->base;
 461
 462err_scratch:
 463        free_scratch(&ppgtt->base.vm);
 464err_free:
 465        kfree(ppgtt);
 466        return ERR_PTR(err);
 467}
 468