linux/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
<<
>>
Prefs
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2019 Intel Corporation
   4 */
   5
   6#include "i915_selftest.h"
   7
   8#include "gt/intel_engine_user.h"
   9#include "gt/intel_gt.h"
  10#include "gt/intel_gpu_commands.h"
  11#include "gem/i915_gem_lmem.h"
  12
  13#include "selftests/igt_flush_test.h"
  14#include "selftests/mock_drm.h"
  15#include "selftests/i915_random.h"
  16#include "huge_gem_object.h"
  17#include "mock_context.h"
  18
  19static int __igt_client_fill(struct intel_engine_cs *engine)
  20{
  21        struct intel_context *ce = engine->kernel_context;
  22        struct drm_i915_gem_object *obj;
  23        struct rnd_state prng;
  24        IGT_TIMEOUT(end);
  25        u32 *vaddr;
  26        int err = 0;
  27
  28        prandom_seed_state(&prng, i915_selftest.random_seed);
  29
  30        intel_engine_pm_get(engine);
  31        do {
  32                const u32 max_block_size = S16_MAX * PAGE_SIZE;
  33                u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng));
  34                u32 phys_sz = sz % (max_block_size + 1);
  35                u32 val = prandom_u32_state(&prng);
  36                u32 i;
  37
  38                sz = round_up(sz, PAGE_SIZE);
  39                phys_sz = round_up(phys_sz, PAGE_SIZE);
  40
  41                pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
  42                         phys_sz, sz, val);
  43
  44                obj = huge_gem_object(engine->i915, phys_sz, sz);
  45                if (IS_ERR(obj)) {
  46                        err = PTR_ERR(obj);
  47                        goto err_flush;
  48                }
  49
  50                vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
  51                if (IS_ERR(vaddr)) {
  52                        err = PTR_ERR(vaddr);
  53                        goto err_put;
  54                }
  55
  56                /*
  57                 * XXX: The goal is move this to get_pages, so try to dirty the
  58                 * CPU cache first to check that we do the required clflush
  59                 * before scheduling the blt for !llc platforms. This matches
  60                 * some version of reality where at get_pages the pages
  61                 * themselves may not yet be coherent with the GPU(swap-in). If
  62                 * we are missing the flush then we should see the stale cache
  63                 * values after we do the set_to_cpu_domain and pick it up as a
  64                 * test failure.
  65                 */
  66                memset32(vaddr, val ^ 0xdeadbeaf,
  67                         huge_gem_object_phys_size(obj) / sizeof(u32));
  68
  69                if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
  70                        obj->cache_dirty = true;
  71
  72                err = i915_gem_schedule_fill_pages_blt(obj, ce, obj->mm.pages,
  73                                                       &obj->mm.page_sizes,
  74                                                       val);
  75                if (err)
  76                        goto err_unpin;
  77
  78                i915_gem_object_lock(obj);
  79                err = i915_gem_object_set_to_cpu_domain(obj, false);
  80                i915_gem_object_unlock(obj);
  81                if (err)
  82                        goto err_unpin;
  83
  84                for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) {
  85                        if (vaddr[i] != val) {
  86                                pr_err("vaddr[%u]=%x, expected=%x\n", i,
  87                                       vaddr[i], val);
  88                                err = -EINVAL;
  89                                goto err_unpin;
  90                        }
  91                }
  92
  93                i915_gem_object_unpin_map(obj);
  94                i915_gem_object_put(obj);
  95        } while (!time_after(jiffies, end));
  96
  97        goto err_flush;
  98
  99err_unpin:
 100        i915_gem_object_unpin_map(obj);
 101err_put:
 102        i915_gem_object_put(obj);
 103err_flush:
 104        if (err == -ENOMEM)
 105                err = 0;
 106        intel_engine_pm_put(engine);
 107
 108        return err;
 109}
 110
 111static int igt_client_fill(void *arg)
 112{
 113        int inst = 0;
 114
 115        do {
 116                struct intel_engine_cs *engine;
 117                int err;
 118
 119                engine = intel_engine_lookup_user(arg,
 120                                                  I915_ENGINE_CLASS_COPY,
 121                                                  inst++);
 122                if (!engine)
 123                        return 0;
 124
 125                err = __igt_client_fill(engine);
 126                if (err == -ENOMEM)
 127                        err = 0;
 128                if (err)
 129                        return err;
 130        } while (1);
 131}
 132
 133#define WIDTH 512
 134#define HEIGHT 32
 135
 136struct blit_buffer {
 137        struct i915_vma *vma;
 138        u32 start_val;
 139        u32 tiling;
 140};
 141
 142struct tiled_blits {
 143        struct intel_context *ce;
 144        struct blit_buffer buffers[3];
 145        struct blit_buffer scratch;
 146        struct i915_vma *batch;
 147        u64 hole;
 148        u32 width;
 149        u32 height;
 150};
 151
 152static int prepare_blit(const struct tiled_blits *t,
 153                        struct blit_buffer *dst,
 154                        struct blit_buffer *src,
 155                        struct drm_i915_gem_object *batch)
 156{
 157        const int gen = INTEL_GEN(to_i915(batch->base.dev));
 158        bool use_64b_reloc = gen >= 8;
 159        u32 src_pitch, dst_pitch;
 160        u32 cmd, *cs;
 161
 162        cs = i915_gem_object_pin_map(batch, I915_MAP_WC);
 163        if (IS_ERR(cs))
 164                return PTR_ERR(cs);
 165
 166        *cs++ = MI_LOAD_REGISTER_IMM(1);
 167        *cs++ = i915_mmio_reg_offset(BCS_SWCTRL);
 168        cmd = (BCS_SRC_Y | BCS_DST_Y) << 16;
 169        if (src->tiling == I915_TILING_Y)
 170                cmd |= BCS_SRC_Y;
 171        if (dst->tiling == I915_TILING_Y)
 172                cmd |= BCS_DST_Y;
 173        *cs++ = cmd;
 174
 175        cmd = MI_FLUSH_DW;
 176        if (gen >= 8)
 177                cmd++;
 178        *cs++ = cmd;
 179        *cs++ = 0;
 180        *cs++ = 0;
 181        *cs++ = 0;
 182
 183        cmd = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (8 - 2);
 184        if (gen >= 8)
 185                cmd += 2;
 186
 187        src_pitch = t->width * 4;
 188        if (src->tiling) {
 189                cmd |= XY_SRC_COPY_BLT_SRC_TILED;
 190                src_pitch /= 4;
 191        }
 192
 193        dst_pitch = t->width * 4;
 194        if (dst->tiling) {
 195                cmd |= XY_SRC_COPY_BLT_DST_TILED;
 196                dst_pitch /= 4;
 197        }
 198
 199        *cs++ = cmd;
 200        *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | dst_pitch;
 201        *cs++ = 0;
 202        *cs++ = t->height << 16 | t->width;
 203        *cs++ = lower_32_bits(dst->vma->node.start);
 204        if (use_64b_reloc)
 205                *cs++ = upper_32_bits(dst->vma->node.start);
 206        *cs++ = 0;
 207        *cs++ = src_pitch;
 208        *cs++ = lower_32_bits(src->vma->node.start);
 209        if (use_64b_reloc)
 210                *cs++ = upper_32_bits(src->vma->node.start);
 211
 212        *cs++ = MI_BATCH_BUFFER_END;
 213
 214        i915_gem_object_flush_map(batch);
 215        i915_gem_object_unpin_map(batch);
 216
 217        return 0;
 218}
 219
 220static void tiled_blits_destroy_buffers(struct tiled_blits *t)
 221{
 222        int i;
 223
 224        for (i = 0; i < ARRAY_SIZE(t->buffers); i++)
 225                i915_vma_put(t->buffers[i].vma);
 226
 227        i915_vma_put(t->scratch.vma);
 228        i915_vma_put(t->batch);
 229}
 230
 231static struct i915_vma *
 232__create_vma(struct tiled_blits *t, size_t size, bool lmem)
 233{
 234        struct drm_i915_private *i915 = t->ce->vm->i915;
 235        struct drm_i915_gem_object *obj;
 236        struct i915_vma *vma;
 237
 238        if (lmem)
 239                obj = i915_gem_object_create_lmem(i915, size, 0);
 240        else
 241                obj = i915_gem_object_create_shmem(i915, size);
 242        if (IS_ERR(obj))
 243                return ERR_CAST(obj);
 244
 245        vma = i915_vma_instance(obj, t->ce->vm, NULL);
 246        if (IS_ERR(vma))
 247                i915_gem_object_put(obj);
 248
 249        return vma;
 250}
 251
 252static struct i915_vma *create_vma(struct tiled_blits *t, bool lmem)
 253{
 254        return __create_vma(t, PAGE_ALIGN(t->width * t->height * 4), lmem);
 255}
 256
 257static int tiled_blits_create_buffers(struct tiled_blits *t,
 258                                      int width, int height,
 259                                      struct rnd_state *prng)
 260{
 261        struct drm_i915_private *i915 = t->ce->engine->i915;
 262        int i;
 263
 264        t->width = width;
 265        t->height = height;
 266
 267        t->batch = __create_vma(t, PAGE_SIZE, false);
 268        if (IS_ERR(t->batch))
 269                return PTR_ERR(t->batch);
 270
 271        t->scratch.vma = create_vma(t, false);
 272        if (IS_ERR(t->scratch.vma)) {
 273                i915_vma_put(t->batch);
 274                return PTR_ERR(t->scratch.vma);
 275        }
 276
 277        for (i = 0; i < ARRAY_SIZE(t->buffers); i++) {
 278                struct i915_vma *vma;
 279
 280                vma = create_vma(t, HAS_LMEM(i915) && i % 2);
 281                if (IS_ERR(vma)) {
 282                        tiled_blits_destroy_buffers(t);
 283                        return PTR_ERR(vma);
 284                }
 285
 286                t->buffers[i].vma = vma;
 287                t->buffers[i].tiling =
 288                        i915_prandom_u32_max_state(I915_TILING_Y + 1, prng);
 289        }
 290
 291        return 0;
 292}
 293
 294static void fill_scratch(struct tiled_blits *t, u32 *vaddr, u32 val)
 295{
 296        int i;
 297
 298        t->scratch.start_val = val;
 299        for (i = 0; i < t->width * t->height; i++)
 300                vaddr[i] = val++;
 301
 302        i915_gem_object_flush_map(t->scratch.vma->obj);
 303}
 304
 305static u64 swizzle_bit(unsigned int bit, u64 offset)
 306{
 307        return (offset & BIT_ULL(bit)) >> (bit - 6);
 308}
 309
 310static u64 tiled_offset(const struct intel_gt *gt,
 311                        u64 v,
 312                        unsigned int stride,
 313                        unsigned int tiling)
 314{
 315        unsigned int swizzle;
 316        u64 x, y;
 317
 318        if (tiling == I915_TILING_NONE)
 319                return v;
 320
 321        y = div64_u64_rem(v, stride, &x);
 322
 323        if (tiling == I915_TILING_X) {
 324                v = div64_u64_rem(y, 8, &y) * stride * 8;
 325                v += y * 512;
 326                v += div64_u64_rem(x, 512, &x) << 12;
 327                v += x;
 328
 329                swizzle = gt->ggtt->bit_6_swizzle_x;
 330        } else {
 331                const unsigned int ytile_span = 16;
 332                const unsigned int ytile_height = 512;
 333
 334                v = div64_u64_rem(y, 32, &y) * stride * 32;
 335                v += y * ytile_span;
 336                v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
 337                v += x;
 338
 339                swizzle = gt->ggtt->bit_6_swizzle_y;
 340        }
 341
 342        switch (swizzle) {
 343        case I915_BIT_6_SWIZZLE_9:
 344                v ^= swizzle_bit(9, v);
 345                break;
 346        case I915_BIT_6_SWIZZLE_9_10:
 347                v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v);
 348                break;
 349        case I915_BIT_6_SWIZZLE_9_11:
 350                v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v);
 351                break;
 352        case I915_BIT_6_SWIZZLE_9_10_11:
 353                v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v);
 354                break;
 355        }
 356
 357        return v;
 358}
 359
 360static const char *repr_tiling(int tiling)
 361{
 362        switch (tiling) {
 363        case I915_TILING_NONE: return "linear";
 364        case I915_TILING_X: return "X";
 365        case I915_TILING_Y: return "Y";
 366        default: return "unknown";
 367        }
 368}
 369
 370static int verify_buffer(const struct tiled_blits *t,
 371                         struct blit_buffer *buf,
 372                         struct rnd_state *prng)
 373{
 374        const u32 *vaddr;
 375        int ret = 0;
 376        int x, y, p;
 377
 378        x = i915_prandom_u32_max_state(t->width, prng);
 379        y = i915_prandom_u32_max_state(t->height, prng);
 380        p = y * t->width + x;
 381
 382        vaddr = i915_gem_object_pin_map(buf->vma->obj, I915_MAP_WC);
 383        if (IS_ERR(vaddr))
 384                return PTR_ERR(vaddr);
 385
 386        if (vaddr[0] != buf->start_val) {
 387                ret = -EINVAL;
 388        } else {
 389                u64 v = tiled_offset(buf->vma->vm->gt,
 390                                     p * 4, t->width * 4,
 391                                     buf->tiling);
 392
 393                if (vaddr[v / sizeof(*vaddr)] != buf->start_val + p)
 394                        ret = -EINVAL;
 395        }
 396        if (ret) {
 397                pr_err("Invalid %s tiling detected at (%d, %d), start_val %x\n",
 398                       repr_tiling(buf->tiling),
 399                       x, y, buf->start_val);
 400                igt_hexdump(vaddr, 4096);
 401        }
 402
 403        i915_gem_object_unpin_map(buf->vma->obj);
 404        return ret;
 405}
 406
 407static int move_to_active(struct i915_vma *vma,
 408                          struct i915_request *rq,
 409                          unsigned int flags)
 410{
 411        int err;
 412
 413        i915_vma_lock(vma);
 414        err = i915_request_await_object(rq, vma->obj, false);
 415        if (err == 0)
 416                err = i915_vma_move_to_active(vma, rq, flags);
 417        i915_vma_unlock(vma);
 418
 419        return err;
 420}
 421
 422static int pin_buffer(struct i915_vma *vma, u64 addr)
 423{
 424        int err;
 425
 426        if (drm_mm_node_allocated(&vma->node) && vma->node.start != addr) {
 427                err = i915_vma_unbind(vma);
 428                if (err)
 429                        return err;
 430        }
 431
 432        err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED | addr);
 433        if (err)
 434                return err;
 435
 436        return 0;
 437}
 438
 439static int
 440tiled_blit(struct tiled_blits *t,
 441           struct blit_buffer *dst, u64 dst_addr,
 442           struct blit_buffer *src, u64 src_addr)
 443{
 444        struct i915_request *rq;
 445        int err;
 446
 447        err = pin_buffer(src->vma, src_addr);
 448        if (err) {
 449                pr_err("Cannot pin src @ %llx\n", src_addr);
 450                return err;
 451        }
 452
 453        err = pin_buffer(dst->vma, dst_addr);
 454        if (err) {
 455                pr_err("Cannot pin dst @ %llx\n", dst_addr);
 456                goto err_src;
 457        }
 458
 459        err = i915_vma_pin(t->batch, 0, 0, PIN_USER | PIN_HIGH);
 460        if (err) {
 461                pr_err("cannot pin batch\n");
 462                goto err_dst;
 463        }
 464
 465        err = prepare_blit(t, dst, src, t->batch->obj);
 466        if (err)
 467                goto err_bb;
 468
 469        rq = intel_context_create_request(t->ce);
 470        if (IS_ERR(rq)) {
 471                err = PTR_ERR(rq);
 472                goto err_bb;
 473        }
 474
 475        err = move_to_active(t->batch, rq, 0);
 476        if (!err)
 477                err = move_to_active(src->vma, rq, 0);
 478        if (!err)
 479                err = move_to_active(dst->vma, rq, 0);
 480        if (!err)
 481                err = rq->engine->emit_bb_start(rq,
 482                                                t->batch->node.start,
 483                                                t->batch->node.size,
 484                                                0);
 485        i915_request_get(rq);
 486        i915_request_add(rq);
 487        if (i915_request_wait(rq, 0, HZ / 2) < 0)
 488                err = -ETIME;
 489        i915_request_put(rq);
 490
 491        dst->start_val = src->start_val;
 492err_bb:
 493        i915_vma_unpin(t->batch);
 494err_dst:
 495        i915_vma_unpin(dst->vma);
 496err_src:
 497        i915_vma_unpin(src->vma);
 498        return err;
 499}
 500
 501static struct tiled_blits *
 502tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng)
 503{
 504        struct drm_mm_node hole;
 505        struct tiled_blits *t;
 506        u64 hole_size;
 507        int err;
 508
 509        t = kzalloc(sizeof(*t), GFP_KERNEL);
 510        if (!t)
 511                return ERR_PTR(-ENOMEM);
 512
 513        t->ce = intel_context_create(engine);
 514        if (IS_ERR(t->ce)) {
 515                err = PTR_ERR(t->ce);
 516                goto err_free;
 517        }
 518
 519        hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4);
 520        hole_size *= 2; /* room to maneuver */
 521        hole_size += 2 * I915_GTT_MIN_ALIGNMENT;
 522
 523        mutex_lock(&t->ce->vm->mutex);
 524        memset(&hole, 0, sizeof(hole));
 525        err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole,
 526                                          hole_size, 0, I915_COLOR_UNEVICTABLE,
 527                                          0, U64_MAX,
 528                                          DRM_MM_INSERT_BEST);
 529        if (!err)
 530                drm_mm_remove_node(&hole);
 531        mutex_unlock(&t->ce->vm->mutex);
 532        if (err) {
 533                err = -ENODEV;
 534                goto err_put;
 535        }
 536
 537        t->hole = hole.start + I915_GTT_MIN_ALIGNMENT;
 538        pr_info("Using hole at %llx\n", t->hole);
 539
 540        err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng);
 541        if (err)
 542                goto err_put;
 543
 544        return t;
 545
 546err_put:
 547        intel_context_put(t->ce);
 548err_free:
 549        kfree(t);
 550        return ERR_PTR(err);
 551}
 552
 553static void tiled_blits_destroy(struct tiled_blits *t)
 554{
 555        tiled_blits_destroy_buffers(t);
 556
 557        intel_context_put(t->ce);
 558        kfree(t);
 559}
 560
 561static int tiled_blits_prepare(struct tiled_blits *t,
 562                               struct rnd_state *prng)
 563{
 564        u64 offset = PAGE_ALIGN(t->width * t->height * 4);
 565        u32 *map;
 566        int err;
 567        int i;
 568
 569        map = i915_gem_object_pin_map(t->scratch.vma->obj, I915_MAP_WC);
 570        if (IS_ERR(map))
 571                return PTR_ERR(map);
 572
 573        /* Use scratch to fill objects */
 574        for (i = 0; i < ARRAY_SIZE(t->buffers); i++) {
 575                fill_scratch(t, map, prandom_u32_state(prng));
 576                GEM_BUG_ON(verify_buffer(t, &t->scratch, prng));
 577
 578                err = tiled_blit(t,
 579                                 &t->buffers[i], t->hole + offset,
 580                                 &t->scratch, t->hole);
 581                if (err == 0)
 582                        err = verify_buffer(t, &t->buffers[i], prng);
 583                if (err) {
 584                        pr_err("Failed to create buffer %d\n", i);
 585                        break;
 586                }
 587        }
 588
 589        i915_gem_object_unpin_map(t->scratch.vma->obj);
 590        return err;
 591}
 592
 593static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng)
 594{
 595        u64 offset =
 596                round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT);
 597        int err;
 598
 599        /* We want to check position invariant tiling across GTT eviction */
 600
 601        err = tiled_blit(t,
 602                         &t->buffers[1], t->hole + offset / 2,
 603                         &t->buffers[0], t->hole + 2 * offset);
 604        if (err)
 605                return err;
 606
 607        /* Reposition so that we overlap the old addresses, and slightly off */
 608        err = tiled_blit(t,
 609                         &t->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT,
 610                         &t->buffers[1], t->hole + 3 * offset / 2);
 611        if (err)
 612                return err;
 613
 614        err = verify_buffer(t, &t->buffers[2], prng);
 615        if (err)
 616                return err;
 617
 618        return 0;
 619}
 620
 621static int __igt_client_tiled_blits(struct intel_engine_cs *engine,
 622                                    struct rnd_state *prng)
 623{
 624        struct tiled_blits *t;
 625        int err;
 626
 627        t = tiled_blits_create(engine, prng);
 628        if (IS_ERR(t))
 629                return PTR_ERR(t);
 630
 631        err = tiled_blits_prepare(t, prng);
 632        if (err)
 633                goto out;
 634
 635        err = tiled_blits_bounce(t, prng);
 636        if (err)
 637                goto out;
 638
 639out:
 640        tiled_blits_destroy(t);
 641        return err;
 642}
 643
 644static bool has_bit17_swizzle(int sw)
 645{
 646        return (sw == I915_BIT_6_SWIZZLE_9_10_17 ||
 647                sw == I915_BIT_6_SWIZZLE_9_17);
 648}
 649
 650static bool bad_swizzling(struct drm_i915_private *i915)
 651{
 652        struct i915_ggtt *ggtt = &i915->ggtt;
 653
 654        if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
 655                return true;
 656
 657        if (has_bit17_swizzle(ggtt->bit_6_swizzle_x) ||
 658            has_bit17_swizzle(ggtt->bit_6_swizzle_y))
 659                return true;
 660
 661        return false;
 662}
 663
 664static int igt_client_tiled_blits(void *arg)
 665{
 666        struct drm_i915_private *i915 = arg;
 667        I915_RND_STATE(prng);
 668        int inst = 0;
 669
 670        /* Test requires explicit BLT tiling controls */
 671        if (INTEL_GEN(i915) < 4)
 672                return 0;
 673
 674        if (bad_swizzling(i915)) /* Requires sane (sub-page) swizzling */
 675                return 0;
 676
 677        do {
 678                struct intel_engine_cs *engine;
 679                int err;
 680
 681                engine = intel_engine_lookup_user(i915,
 682                                                  I915_ENGINE_CLASS_COPY,
 683                                                  inst++);
 684                if (!engine)
 685                        return 0;
 686
 687                err = __igt_client_tiled_blits(engine, &prng);
 688                if (err == -ENODEV)
 689                        err = 0;
 690                if (err)
 691                        return err;
 692        } while (1);
 693}
 694
 695int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915)
 696{
 697        static const struct i915_subtest tests[] = {
 698                SUBTEST(igt_client_fill),
 699                SUBTEST(igt_client_tiled_blits),
 700        };
 701
 702        if (intel_gt_is_wedged(&i915->gt))
 703                return 0;
 704
 705        if (!HAS_ENGINE(i915, BCS0))
 706                return 0;
 707
 708        return i915_live_subtests(tests, i915);
 709}
 710