LXR linux/drivers/gpu/drm/i915/gt/intel

   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2016-2018 Intel Corporation
   5 */
   6
   7#include "i915_drv.h"
   8
   9#include "i915_active.h"
  10#include "i915_syncmap.h"
  11#include "intel_gt.h"
  12#include "intel_ring.h"
  13#include "intel_timeline.h"
  14
  15#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
  16#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
  17
  18#define CACHELINE_BITS 6
  19#define CACHELINE_FREE CACHELINE_BITS
  20
  21struct intel_timeline_hwsp {
  22        struct intel_gt *gt;
  23        struct intel_gt_timelines *gt_timelines;
  24        struct list_head free_link;
  25        struct i915_vma *vma;
  26        u64 free_bitmap;
  27};
  28
  29static struct i915_vma *__hwsp_alloc(struct intel_gt *gt)
  30{
  31        struct drm_i915_private *i915 = gt->i915;
  32        struct drm_i915_gem_object *obj;
  33        struct i915_vma *vma;
  34
  35        obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
  36        if (IS_ERR(obj))
  37                return ERR_CAST(obj);
  38
  39        i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
  40
  41        vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
  42        if (IS_ERR(vma))
  43                i915_gem_object_put(obj);
  44
  45        return vma;
  46}
  47
  48static struct i915_vma *
  49hwsp_alloc(struct intel_timeline *timeline, unsigned int *cacheline)
  50{
  51        struct intel_gt_timelines *gt = &timeline->gt->timelines;
  52        struct intel_timeline_hwsp *hwsp;
  53
  54        BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE);
  55
  56        spin_lock_irq(&gt->hwsp_lock);
  57
  58        /* hwsp_free_list only contains HWSP that have available cachelines */
  59        hwsp = list_first_entry_or_null(&gt->hwsp_free_list,
  60                                        typeof(*hwsp), free_link);
  61        if (!hwsp) {
  62                struct i915_vma *vma;
  63
  64                spin_unlock_irq(&gt->hwsp_lock);
  65
  66                hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL);
  67                if (!hwsp)
  68                        return ERR_PTR(-ENOMEM);
  69
  70                vma = __hwsp_alloc(timeline->gt);
  71                if (IS_ERR(vma)) {
  72                        kfree(hwsp);
  73                        return vma;
  74                }
  75
  76                vma->private = hwsp;
  77                hwsp->gt = timeline->gt;
  78                hwsp->vma = vma;
  79                hwsp->free_bitmap = ~0ull;
  80                hwsp->gt_timelines = gt;
  81
  82                spin_lock_irq(&gt->hwsp_lock);
  83                list_add(&hwsp->free_link, &gt->hwsp_free_list);
  84        }
  85
  86        GEM_BUG_ON(!hwsp->free_bitmap);
  87        *cacheline = __ffs64(hwsp->free_bitmap);
  88        hwsp->free_bitmap &= ~BIT_ULL(*cacheline);
  89        if (!hwsp->free_bitmap)
  90                list_del(&hwsp->free_link);
  91
  92        spin_unlock_irq(&gt->hwsp_lock);
  93
  94        GEM_BUG_ON(hwsp->vma->private != hwsp);
  95        return hwsp->vma;
  96}
  97
  98static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline)
  99{
 100        struct intel_gt_timelines *gt = hwsp->gt_timelines;
 101        unsigned long flags;
 102
 103        spin_lock_irqsave(&gt->hwsp_lock, flags);
 104
 105        /* As a cacheline becomes available, publish the HWSP on the freelist */
 106        if (!hwsp->free_bitmap)
 107                list_add_tail(&hwsp->free_link, &gt->hwsp_free_list);
 108
 109        GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap));
 110        hwsp->free_bitmap |= BIT_ULL(cacheline);
 111
 112        /* And if no one is left using it, give the page back to the system */
 113        if (hwsp->free_bitmap == ~0ull) {
 114                i915_vma_put(hwsp->vma);
 115                list_del(&hwsp->free_link);
 116                kfree(hwsp);
 117        }
 118
 119        spin_unlock_irqrestore(&gt->hwsp_lock, flags);
 120}
 121
 122static void __rcu_cacheline_free(struct rcu_head *rcu)
 123{
 124        struct intel_timeline_cacheline *cl =
 125                container_of(rcu, typeof(*cl), rcu);
 126
 127        i915_active_fini(&cl->active);
 128        kfree(cl);
 129}
 130
 131static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
 132{
 133        GEM_BUG_ON(!i915_active_is_idle(&cl->active));
 134
 135        i915_gem_object_unpin_map(cl->hwsp->vma->obj);
 136        i915_vma_put(cl->hwsp->vma);
 137        __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
 138
 139        call_rcu(&cl->rcu, __rcu_cacheline_free);
 140}
 141
 142__i915_active_call
 143static void __cacheline_retire(struct i915_active *active)
 144{
 145        struct intel_timeline_cacheline *cl =
 146                container_of(active, typeof(*cl), active);
 147
 148        i915_vma_unpin(cl->hwsp->vma);
 149        if (ptr_test_bit(cl->vaddr, CACHELINE_FREE))
 150                __idle_cacheline_free(cl);
 151}
 152
 153static int __cacheline_active(struct i915_active *active)
 154{
 155        struct intel_timeline_cacheline *cl =
 156                container_of(active, typeof(*cl), active);
 157
 158        __i915_vma_pin(cl->hwsp->vma);
 159        return 0;
 160}
 161
 162static struct intel_timeline_cacheline *
 163cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
 164{
 165        struct intel_timeline_cacheline *cl;
 166        void *vaddr;
 167
 168        GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS));
 169
 170        cl = kmalloc(sizeof(*cl), GFP_KERNEL);
 171        if (!cl)
 172                return ERR_PTR(-ENOMEM);
 173
 174        vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB);
 175        if (IS_ERR(vaddr)) {
 176                kfree(cl);
 177                return ERR_CAST(vaddr);
 178        }
 179
 180        i915_vma_get(hwsp->vma);
 181        cl->hwsp = hwsp;
 182        cl->vaddr = page_pack_bits(vaddr, cacheline);
 183
 184        i915_active_init(&cl->active, __cacheline_active, __cacheline_retire);
 185
 186        return cl;
 187}
 188
 189static void cacheline_acquire(struct intel_timeline_cacheline *cl)
 190{
 191        if (cl)
 192                i915_active_acquire(&cl->active);
 193}
 194
 195static void cacheline_release(struct intel_timeline_cacheline *cl)
 196{
 197        if (cl)
 198                i915_active_release(&cl->active);
 199}
 200
 201static void cacheline_free(struct intel_timeline_cacheline *cl)
 202{
 203        if (!i915_active_acquire_if_busy(&cl->active)) {
 204                __idle_cacheline_free(cl);
 205                return;
 206        }
 207
 208        GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE));
 209        cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE);
 210
 211        i915_active_release(&cl->active);
 212}
 213
 214static int intel_timeline_init(struct intel_timeline *timeline,
 215                               struct intel_gt *gt,
 216                               struct i915_vma *hwsp)
 217{
 218        void *vaddr;
 219
 220        kref_init(&timeline->kref);
 221        atomic_set(&timeline->pin_count, 0);
 222
 223        timeline->gt = gt;
 224
 225        timeline->has_initial_breadcrumb = !hwsp;
 226        timeline->hwsp_cacheline = NULL;
 227
 228        if (!hwsp) {
 229                struct intel_timeline_cacheline *cl;
 230                unsigned int cacheline;
 231
 232                hwsp = hwsp_alloc(timeline, &cacheline);
 233                if (IS_ERR(hwsp))
 234                        return PTR_ERR(hwsp);
 235
 236                cl = cacheline_alloc(hwsp->private, cacheline);
 237                if (IS_ERR(cl)) {
 238                        __idle_hwsp_free(hwsp->private, cacheline);
 239                        return PTR_ERR(cl);
 240                }
 241
 242                timeline->hwsp_cacheline = cl;
 243                timeline->hwsp_offset = cacheline * CACHELINE_BYTES;
 244
 245                vaddr = page_mask_bits(cl->vaddr);
 246        } else {
 247                timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
 248
 249                vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
 250                if (IS_ERR(vaddr))
 251                        return PTR_ERR(vaddr);
 252        }
 253
 254        timeline->hwsp_seqno =
 255                memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES);
 256
 257        timeline->hwsp_ggtt = i915_vma_get(hwsp);
 258        GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size);
 259
 260        timeline->fence_context = dma_fence_context_alloc(1);
 261
 262        mutex_init(&timeline->mutex);
 263
 264        INIT_ACTIVE_FENCE(&timeline->last_request);
 265        INIT_LIST_HEAD(&timeline->requests);
 266
 267        i915_syncmap_init(&timeline->sync);
 268
 269        return 0;
 270}
 271
 272void intel_gt_init_timelines(struct intel_gt *gt)
 273{
 274        struct intel_gt_timelines *timelines = &gt->timelines;
 275
 276        spin_lock_init(&timelines->lock);
 277        INIT_LIST_HEAD(&timelines->active_list);
 278
 279        spin_lock_init(&timelines->hwsp_lock);
 280        INIT_LIST_HEAD(&timelines->hwsp_free_list);
 281}
 282
 283static void intel_timeline_fini(struct intel_timeline *timeline)
 284{
 285        GEM_BUG_ON(atomic_read(&timeline->pin_count));
 286        GEM_BUG_ON(!list_empty(&timeline->requests));
 287        GEM_BUG_ON(timeline->retire);
 288
 289        if (timeline->hwsp_cacheline)
 290                cacheline_free(timeline->hwsp_cacheline);
 291        else
 292                i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
 293
 294        i915_vma_put(timeline->hwsp_ggtt);
 295}
 296
 297struct intel_timeline *
 298intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
 299{
 300        struct intel_timeline *timeline;
 301        int err;
 302
 303        timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
 304        if (!timeline)
 305                return ERR_PTR(-ENOMEM);
 306
 307        err = intel_timeline_init(timeline, gt, global_hwsp);
 308        if (err) {
 309                kfree(timeline);
 310                return ERR_PTR(err);
 311        }
 312
 313        return timeline;
 314}
 315
 316int intel_timeline_pin(struct intel_timeline *tl)
 317{
 318        int err;
 319
 320        if (atomic_add_unless(&tl->pin_count, 1, 0))
 321                return 0;
 322
 323        err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH);
 324        if (err)
 325                return err;
 326
 327        tl->hwsp_offset =
 328                i915_ggtt_offset(tl->hwsp_ggtt) +
 329                offset_in_page(tl->hwsp_offset);
 330
 331        cacheline_acquire(tl->hwsp_cacheline);
 332        if (atomic_fetch_inc(&tl->pin_count)) {
 333                cacheline_release(tl->hwsp_cacheline);
 334                __i915_vma_unpin(tl->hwsp_ggtt);
 335        }
 336
 337        return 0;
 338}
 339
 340void intel_timeline_reset_seqno(const struct intel_timeline *tl)
 341{
 342        /* Must be pinned to be writable, and no requests in flight. */
 343        GEM_BUG_ON(!atomic_read(&tl->pin_count));
 344        WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
 345}
 346
 347void intel_timeline_enter(struct intel_timeline *tl)
 348{
 349        struct intel_gt_timelines *timelines = &tl->gt->timelines;
 350
 351        /*
 352         * Pretend we are serialised by the timeline->mutex.
 353         *
 354         * While generally true, there are a few exceptions to the rule
 355         * for the engine->kernel_context being used to manage power
 356         * transitions. As the engine_park may be called from under any
 357         * timeline, it uses the power mutex as a global serialisation
 358         * lock to prevent any other request entering its timeline.
 359         *
 360         * The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
 361         *
 362         * However, intel_gt_retire_request() does not know which engine
 363         * it is retiring along and so cannot partake in the engine-pm
 364         * barrier, and there we use the tl->active_count as a means to
 365         * pin the timeline in the active_list while the locks are dropped.
 366         * Ergo, as that is outside of the engine-pm barrier, we need to
 367         * use atomic to manipulate tl->active_count.
 368         */
 369        lockdep_assert_held(&tl->mutex);
 370
 371        if (atomic_add_unless(&tl->active_count, 1, 0))
 372                return;
 373
 374        spin_lock(&timelines->lock);
 375        if (!atomic_fetch_inc(&tl->active_count)) {
 376                /*
 377                 * The HWSP is volatile, and may have been lost while inactive,
 378                 * e.g. across suspend/resume. Be paranoid, and ensure that
 379                 * the HWSP value matches our seqno so we don't proclaim
 380                 * the next request as already complete.
 381                 */
 382                intel_timeline_reset_seqno(tl);
 383                list_add_tail(&tl->link, &timelines->active_list);
 384        }
 385        spin_unlock(&timelines->lock);
 386}
 387
 388void intel_timeline_exit(struct intel_timeline *tl)
 389{
 390        struct intel_gt_timelines *timelines = &tl->gt->timelines;
 391
 392        /* See intel_timeline_enter() */
 393        lockdep_assert_held(&tl->mutex);
 394
 395        GEM_BUG_ON(!atomic_read(&tl->active_count));
 396        if (atomic_add_unless(&tl->active_count, -1, 1))
 397                return;
 398
 399        spin_lock(&timelines->lock);
 400        if (atomic_dec_and_test(&tl->active_count))
 401                list_del(&tl->link);
 402        spin_unlock(&timelines->lock);
 403
 404        /*
 405         * Since this timeline is idle, all bariers upon which we were waiting
 406         * must also be complete and so we can discard the last used barriers
 407         * without loss of information.
 408         */
 409        i915_syncmap_free(&tl->sync);
 410}
 411
 412static u32 timeline_advance(struct intel_timeline *tl)
 413{
 414        GEM_BUG_ON(!atomic_read(&tl->pin_count));
 415        GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
 416
 417        return tl->seqno += 1 + tl->has_initial_breadcrumb;
 418}
 419
 420static void timeline_rollback(struct intel_timeline *tl)
 421{
 422        tl->seqno -= 1 + tl->has_initial_breadcrumb;
 423}
 424
 425static noinline int
 426__intel_timeline_get_seqno(struct intel_timeline *tl,
 427                           struct i915_request *rq,
 428                           u32 *seqno)
 429{
 430        struct intel_timeline_cacheline *cl;
 431        unsigned int cacheline;
 432        struct i915_vma *vma;
 433        void *vaddr;
 434        int err;
 435
 436        might_lock(&tl->gt->ggtt->vm.mutex);
 437
 438        /*
 439         * If there is an outstanding GPU reference to this cacheline,
 440         * such as it being sampled by a HW semaphore on another timeline,
 441         * we cannot wraparound our seqno value (the HW semaphore does
 442         * a strict greater-than-or-equals compare, not i915_seqno_passed).
 443         * So if the cacheline is still busy, we must detach ourselves
 444         * from it and leave it inflight alongside its users.
 445         *
 446         * However, if nobody is watching and we can guarantee that nobody
 447         * will, we could simply reuse the same cacheline.
 448         *
 449         * if (i915_active_request_is_signaled(&tl->last_request) &&
 450         *     i915_active_is_signaled(&tl->hwsp_cacheline->active))
 451         *      return 0;
 452         *
 453         * That seems unlikely for a busy timeline that needed to wrap in
 454         * the first place, so just replace the cacheline.
 455         */
 456
 457        vma = hwsp_alloc(tl, &cacheline);
 458        if (IS_ERR(vma)) {
 459                err = PTR_ERR(vma);
 460                goto err_rollback;
 461        }
 462
 463        err = i915_ggtt_pin(vma, 0, PIN_HIGH);
 464        if (err) {
 465                __idle_hwsp_free(vma->private, cacheline);
 466                goto err_rollback;
 467        }
 468
 469        cl = cacheline_alloc(vma->private, cacheline);
 470        if (IS_ERR(cl)) {
 471                err = PTR_ERR(cl);
 472                __idle_hwsp_free(vma->private, cacheline);
 473                goto err_unpin;
 474        }
 475        GEM_BUG_ON(cl->hwsp->vma != vma);
 476
 477        /*
 478         * Attach the old cacheline to the current request, so that we only
 479         * free it after the current request is retired, which ensures that
 480         * all writes into the cacheline from previous requests are complete.
 481         */
 482        err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence);
 483        if (err)
 484                goto err_cacheline;
 485
 486        cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */
 487        cacheline_free(tl->hwsp_cacheline);
 488
 489        i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */
 490        i915_vma_put(tl->hwsp_ggtt);
 491
 492        tl->hwsp_ggtt = i915_vma_get(vma);
 493
 494        vaddr = page_mask_bits(cl->vaddr);
 495        tl->hwsp_offset = cacheline * CACHELINE_BYTES;
 496        tl->hwsp_seqno =
 497                memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES);
 498
 499        tl->hwsp_offset += i915_ggtt_offset(vma);
 500
 501        cacheline_acquire(cl);
 502        tl->hwsp_cacheline = cl;
 503
 504        *seqno = timeline_advance(tl);
 505        GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno));
 506        return 0;
 507
 508err_cacheline:
 509        cacheline_free(cl);
 510err_unpin:
 511        i915_vma_unpin(vma);
 512err_rollback:
 513        timeline_rollback(tl);
 514        return err;
 515}
 516
 517int intel_timeline_get_seqno(struct intel_timeline *tl,
 518                             struct i915_request *rq,
 519                             u32 *seqno)
 520{
 521        *seqno = timeline_advance(tl);
 522
 523        /* Replace the HWSP on wraparound for HW semaphores */
 524        if (unlikely(!*seqno && tl->hwsp_cacheline))
 525                return __intel_timeline_get_seqno(tl, rq, seqno);
 526
 527        return 0;
 528}
 529
 530static int cacheline_ref(struct intel_timeline_cacheline *cl,
 531                         struct i915_request *rq)
 532{
 533        return i915_active_add_request(&cl->active, rq);
 534}
 535
 536int intel_timeline_read_hwsp(struct i915_request *from,
 537                             struct i915_request *to,
 538                             u32 *hwsp)
 539{
 540        struct intel_timeline_cacheline *cl;
 541        int err;
 542
 543        GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline));
 544
 545        rcu_read_lock();
 546        cl = rcu_dereference(from->hwsp_cacheline);
 547        if (i915_request_completed(from)) /* confirm cacheline is valid */
 548                goto unlock;
 549        if (unlikely(!i915_active_acquire_if_busy(&cl->active)))
 550                goto unlock; /* seqno wrapped and completed! */
 551        if (unlikely(i915_request_completed(from)))
 552                goto release;
 553        rcu_read_unlock();
 554
 555        err = cacheline_ref(cl, to);
 556        if (err)
 557                goto out;
 558
 559        *hwsp = i915_ggtt_offset(cl->hwsp->vma) +
 560                ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;
 561
 562out:
 563        i915_active_release(&cl->active);
 564        return err;
 565
 566release:
 567        i915_active_release(&cl->active);
 568unlock:
 569        rcu_read_unlock();
 570        return 1;
 571}
 572
 573void intel_timeline_unpin(struct intel_timeline *tl)
 574{
 575        GEM_BUG_ON(!atomic_read(&tl->pin_count));
 576        if (!atomic_dec_and_test(&tl->pin_count))
 577                return;
 578
 579        cacheline_release(tl->hwsp_cacheline);
 580
 581        __i915_vma_unpin(tl->hwsp_ggtt);
 582}
 583
 584void __intel_timeline_free(struct kref *kref)
 585{
 586        struct intel_timeline *timeline =
 587                container_of(kref, typeof(*timeline), kref);
 588
 589        intel_timeline_fini(timeline);
 590        kfree_rcu(timeline, rcu);
 591}
 592
 593void intel_gt_fini_timelines(struct intel_gt *gt)
 594{
 595        struct intel_gt_timelines *timelines = &gt->timelines;
 596
 597        GEM_BUG_ON(!list_empty(&timelines->active_list));
 598        GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list));
 599}
 600
 601#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 602#include "gt/selftests/mock_timeline.c"
 603#include "gt/selftest_timeline.c"
 604#endif
 605