LXR linux/drivers/gpu/drm/i915/gt/intel

   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2016-2018 Intel Corporation
   5 */
   6
   7#include "i915_drv.h"
   8
   9#include "i915_active.h"
  10#include "i915_syncmap.h"
  11#include "intel_gt.h"
  12#include "intel_ring.h"
  13#include "intel_timeline.h"
  14
  15#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
  16#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
  17
  18#define CACHELINE_BITS 6
  19#define CACHELINE_FREE CACHELINE_BITS
  20
  21struct intel_timeline_hwsp {
  22        struct intel_gt *gt;
  23        struct intel_gt_timelines *gt_timelines;
  24        struct list_head free_link;
  25        struct i915_vma *vma;
  26        u64 free_bitmap;
  27};
  28
  29static struct i915_vma *__hwsp_alloc(struct intel_gt *gt)
  30{
  31        struct drm_i915_private *i915 = gt->i915;
  32        struct drm_i915_gem_object *obj;
  33        struct i915_vma *vma;
  34
  35        obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
  36        if (IS_ERR(obj))
  37                return ERR_CAST(obj);
  38
  39        i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
  40
  41        vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
  42        if (IS_ERR(vma))
  43                i915_gem_object_put(obj);
  44
  45        return vma;
  46}
  47
  48static struct i915_vma *
  49hwsp_alloc(struct intel_timeline *timeline, unsigned int *cacheline)
  50{
  51        struct intel_gt_timelines *gt = &timeline->gt->timelines;
  52        struct intel_timeline_hwsp *hwsp;
  53
  54        BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE);
  55
  56        spin_lock_irq(&gt->hwsp_lock);
  57
  58        /* hwsp_free_list only contains HWSP that have available cachelines */
  59        hwsp = list_first_entry_or_null(&gt->hwsp_free_list,
  60                                        typeof(*hwsp), free_link);
  61        if (!hwsp) {
  62                struct i915_vma *vma;
  63
  64                spin_unlock_irq(&gt->hwsp_lock);
  65
  66                hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL);
  67                if (!hwsp)
  68                        return ERR_PTR(-ENOMEM);
  69
  70                vma = __hwsp_alloc(timeline->gt);
  71                if (IS_ERR(vma)) {
  72                        kfree(hwsp);
  73                        return vma;
  74                }
  75
  76                vma->private = hwsp;
  77                hwsp->gt = timeline->gt;
  78                hwsp->vma = vma;
  79                hwsp->free_bitmap = ~0ull;
  80                hwsp->gt_timelines = gt;
  81
  82                spin_lock_irq(&gt->hwsp_lock);
  83                list_add(&hwsp->free_link, &gt->hwsp_free_list);
  84        }
  85
  86        GEM_BUG_ON(!hwsp->free_bitmap);
  87        *cacheline = __ffs64(hwsp->free_bitmap);
  88        hwsp->free_bitmap &= ~BIT_ULL(*cacheline);
  89        if (!hwsp->free_bitmap)
  90                list_del(&hwsp->free_link);
  91
  92        spin_unlock_irq(&gt->hwsp_lock);
  93
  94        GEM_BUG_ON(hwsp->vma->private != hwsp);
  95        return hwsp->vma;
  96}
  97
  98static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline)
  99{
 100        struct intel_gt_timelines *gt = hwsp->gt_timelines;
 101        unsigned long flags;
 102
 103        spin_lock_irqsave(&gt->hwsp_lock, flags);
 104
 105        /* As a cacheline becomes available, publish the HWSP on the freelist */
 106        if (!hwsp->free_bitmap)
 107                list_add_tail(&hwsp->free_link, &gt->hwsp_free_list);
 108
 109        GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap));
 110        hwsp->free_bitmap |= BIT_ULL(cacheline);
 111
 112        /* And if no one is left using it, give the page back to the system */
 113        if (hwsp->free_bitmap == ~0ull) {
 114                i915_vma_put(hwsp->vma);
 115                list_del(&hwsp->free_link);
 116                kfree(hwsp);
 117        }
 118
 119        spin_unlock_irqrestore(&gt->hwsp_lock, flags);
 120}
 121
 122static void __rcu_cacheline_free(struct rcu_head *rcu)
 123{
 124        struct intel_timeline_cacheline *cl =
 125                container_of(rcu, typeof(*cl), rcu);
 126
 127        i915_active_fini(&cl->active);
 128        kfree(cl);
 129}
 130
 131static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
 132{
 133        GEM_BUG_ON(!i915_active_is_idle(&cl->active));
 134
 135        i915_gem_object_unpin_map(cl->hwsp->vma->obj);
 136        i915_vma_put(cl->hwsp->vma);
 137        __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
 138
 139        call_rcu(&cl->rcu, __rcu_cacheline_free);
 140}
 141
 142__i915_active_call
 143static void __cacheline_retire(struct i915_active *active)
 144{
 145        struct intel_timeline_cacheline *cl =
 146                container_of(active, typeof(*cl), active);
 147
 148        i915_vma_unpin(cl->hwsp->vma);
 149        if (ptr_test_bit(cl->vaddr, CACHELINE_FREE))
 150                __idle_cacheline_free(cl);
 151}
 152
 153static int __cacheline_active(struct i915_active *active)
 154{
 155        struct intel_timeline_cacheline *cl =
 156                container_of(active, typeof(*cl), active);
 157
 158        __i915_vma_pin(cl->hwsp->vma);
 159        return 0;
 160}
 161
 162static struct intel_timeline_cacheline *
 163cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
 164{
 165        struct intel_timeline_cacheline *cl;
 166        void *vaddr;
 167
 168        GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS));
 169
 170        cl = kmalloc(sizeof(*cl), GFP_KERNEL);
 171        if (!cl)
 172                return ERR_PTR(-ENOMEM);
 173
 174        vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB);
 175        if (IS_ERR(vaddr)) {
 176                kfree(cl);
 177                return ERR_CAST(vaddr);
 178        }
 179
 180        i915_vma_get(hwsp->vma);
 181        cl->hwsp = hwsp;
 182        cl->vaddr = page_pack_bits(vaddr, cacheline);
 183
 184        i915_active_init(&cl->active, __cacheline_active, __cacheline_retire);
 185
 186        return cl;
 187}
 188
 189static void cacheline_acquire(struct intel_timeline_cacheline *cl)
 190{
 191        if (cl)
 192                i915_active_acquire(&cl->active);
 193}
 194
 195static void cacheline_release(struct intel_timeline_cacheline *cl)
 196{
 197        if (cl)
 198                i915_active_release(&cl->active);
 199}
 200
 201static void cacheline_free(struct intel_timeline_cacheline *cl)
 202{
 203        if (!i915_active_acquire_if_busy(&cl->active)) {
 204                __idle_cacheline_free(cl);
 205                return;
 206        }
 207
 208        GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE));
 209        cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE);
 210
 211        i915_active_release(&cl->active);
 212}
 213
 214int intel_timeline_init(struct intel_timeline *timeline,
 215                        struct intel_gt *gt,
 216                        struct i915_vma *hwsp)
 217{
 218        void *vaddr;
 219
 220        kref_init(&timeline->kref);
 221        atomic_set(&timeline->pin_count, 0);
 222
 223        timeline->gt = gt;
 224
 225        timeline->has_initial_breadcrumb = !hwsp;
 226        timeline->hwsp_cacheline = NULL;
 227
 228        if (!hwsp) {
 229                struct intel_timeline_cacheline *cl;
 230                unsigned int cacheline;
 231
 232                hwsp = hwsp_alloc(timeline, &cacheline);
 233                if (IS_ERR(hwsp))
 234                        return PTR_ERR(hwsp);
 235
 236                cl = cacheline_alloc(hwsp->private, cacheline);
 237                if (IS_ERR(cl)) {
 238                        __idle_hwsp_free(hwsp->private, cacheline);
 239                        return PTR_ERR(cl);
 240                }
 241
 242                timeline->hwsp_cacheline = cl;
 243                timeline->hwsp_offset = cacheline * CACHELINE_BYTES;
 244
 245                vaddr = page_mask_bits(cl->vaddr);
 246        } else {
 247                timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
 248
 249                vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
 250                if (IS_ERR(vaddr))
 251                        return PTR_ERR(vaddr);
 252        }
 253
 254        timeline->hwsp_seqno =
 255                memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES);
 256
 257        timeline->hwsp_ggtt = i915_vma_get(hwsp);
 258        GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size);
 259
 260        timeline->fence_context = dma_fence_context_alloc(1);
 261
 262        mutex_init(&timeline->mutex);
 263
 264        INIT_ACTIVE_FENCE(&timeline->last_request);
 265        INIT_LIST_HEAD(&timeline->requests);
 266
 267        i915_syncmap_init(&timeline->sync);
 268
 269        return 0;
 270}
 271
 272void intel_gt_init_timelines(struct intel_gt *gt)
 273{
 274        struct intel_gt_timelines *timelines = &gt->timelines;
 275
 276        spin_lock_init(&timelines->lock);
 277        INIT_LIST_HEAD(&timelines->active_list);
 278
 279        spin_lock_init(&timelines->hwsp_lock);
 280        INIT_LIST_HEAD(&timelines->hwsp_free_list);
 281}
 282
 283void intel_timeline_fini(struct intel_timeline *timeline)
 284{
 285        GEM_BUG_ON(atomic_read(&timeline->pin_count));
 286        GEM_BUG_ON(!list_empty(&timeline->requests));
 287        GEM_BUG_ON(timeline->retire);
 288
 289        if (timeline->hwsp_cacheline)
 290                cacheline_free(timeline->hwsp_cacheline);
 291        else
 292                i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
 293
 294        i915_vma_put(timeline->hwsp_ggtt);
 295}
 296
 297struct intel_timeline *
 298intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
 299{
 300        struct intel_timeline *timeline;
 301        int err;
 302
 303        timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
 304        if (!timeline)
 305                return ERR_PTR(-ENOMEM);
 306
 307        err = intel_timeline_init(timeline, gt, global_hwsp);
 308        if (err) {
 309                kfree(timeline);
 310                return ERR_PTR(err);
 311        }
 312
 313        return timeline;
 314}
 315
 316int intel_timeline_pin(struct intel_timeline *tl)
 317{
 318        int err;
 319
 320        if (atomic_add_unless(&tl->pin_count, 1, 0))
 321                return 0;
 322
 323        err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
 324        if (err)
 325                return err;
 326
 327        tl->hwsp_offset =
 328                i915_ggtt_offset(tl->hwsp_ggtt) +
 329                offset_in_page(tl->hwsp_offset);
 330
 331        cacheline_acquire(tl->hwsp_cacheline);
 332        if (atomic_fetch_inc(&tl->pin_count)) {
 333                cacheline_release(tl->hwsp_cacheline);
 334                __i915_vma_unpin(tl->hwsp_ggtt);
 335        }
 336
 337        return 0;
 338}
 339
 340void intel_timeline_enter(struct intel_timeline *tl)
 341{
 342        struct intel_gt_timelines *timelines = &tl->gt->timelines;
 343
 344        /*
 345         * Pretend we are serialised by the timeline->mutex.
 346         *
 347         * While generally true, there are a few exceptions to the rule
 348         * for the engine->kernel_context being used to manage power
 349         * transitions. As the engine_park may be called from under any
 350         * timeline, it uses the power mutex as a global serialisation
 351         * lock to prevent any other request entering its timeline.
 352         *
 353         * The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
 354         *
 355         * However, intel_gt_retire_request() does not know which engine
 356         * it is retiring along and so cannot partake in the engine-pm
 357         * barrier, and there we use the tl->active_count as a means to
 358         * pin the timeline in the active_list while the locks are dropped.
 359         * Ergo, as that is outside of the engine-pm barrier, we need to
 360         * use atomic to manipulate tl->active_count.
 361         */
 362        lockdep_assert_held(&tl->mutex);
 363
 364        if (atomic_add_unless(&tl->active_count, 1, 0))
 365                return;
 366
 367        spin_lock(&timelines->lock);
 368        if (!atomic_fetch_inc(&tl->active_count))
 369                list_add_tail(&tl->link, &timelines->active_list);
 370        spin_unlock(&timelines->lock);
 371}
 372
 373void intel_timeline_exit(struct intel_timeline *tl)
 374{
 375        struct intel_gt_timelines *timelines = &tl->gt->timelines;
 376
 377        /* See intel_timeline_enter() */
 378        lockdep_assert_held(&tl->mutex);
 379
 380        GEM_BUG_ON(!atomic_read(&tl->active_count));
 381        if (atomic_add_unless(&tl->active_count, -1, 1))
 382                return;
 383
 384        spin_lock(&timelines->lock);
 385        if (atomic_dec_and_test(&tl->active_count))
 386                list_del(&tl->link);
 387        spin_unlock(&timelines->lock);
 388
 389        /*
 390         * Since this timeline is idle, all bariers upon which we were waiting
 391         * must also be complete and so we can discard the last used barriers
 392         * without loss of information.
 393         */
 394        i915_syncmap_free(&tl->sync);
 395}
 396
 397static u32 timeline_advance(struct intel_timeline *tl)
 398{
 399        GEM_BUG_ON(!atomic_read(&tl->pin_count));
 400        GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
 401
 402        return tl->seqno += 1 + tl->has_initial_breadcrumb;
 403}
 404
 405static void timeline_rollback(struct intel_timeline *tl)
 406{
 407        tl->seqno -= 1 + tl->has_initial_breadcrumb;
 408}
 409
 410static noinline int
 411__intel_timeline_get_seqno(struct intel_timeline *tl,
 412                           struct i915_request *rq,
 413                           u32 *seqno)
 414{
 415        struct intel_timeline_cacheline *cl;
 416        unsigned int cacheline;
 417        struct i915_vma *vma;
 418        void *vaddr;
 419        int err;
 420
 421        /*
 422         * If there is an outstanding GPU reference to this cacheline,
 423         * such as it being sampled by a HW semaphore on another timeline,
 424         * we cannot wraparound our seqno value (the HW semaphore does
 425         * a strict greater-than-or-equals compare, not i915_seqno_passed).
 426         * So if the cacheline is still busy, we must detach ourselves
 427         * from it and leave it inflight alongside its users.
 428         *
 429         * However, if nobody is watching and we can guarantee that nobody
 430         * will, we could simply reuse the same cacheline.
 431         *
 432         * if (i915_active_request_is_signaled(&tl->last_request) &&
 433         *     i915_active_is_signaled(&tl->hwsp_cacheline->active))
 434         *      return 0;
 435         *
 436         * That seems unlikely for a busy timeline that needed to wrap in
 437         * the first place, so just replace the cacheline.
 438         */
 439
 440        vma = hwsp_alloc(tl, &cacheline);
 441        if (IS_ERR(vma)) {
 442                err = PTR_ERR(vma);
 443                goto err_rollback;
 444        }
 445
 446        err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
 447        if (err) {
 448                __idle_hwsp_free(vma->private, cacheline);
 449                goto err_rollback;
 450        }
 451
 452        cl = cacheline_alloc(vma->private, cacheline);
 453        if (IS_ERR(cl)) {
 454                err = PTR_ERR(cl);
 455                __idle_hwsp_free(vma->private, cacheline);
 456                goto err_unpin;
 457        }
 458        GEM_BUG_ON(cl->hwsp->vma != vma);
 459
 460        /*
 461         * Attach the old cacheline to the current request, so that we only
 462         * free it after the current request is retired, which ensures that
 463         * all writes into the cacheline from previous requests are complete.
 464         */
 465        err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence);
 466        if (err)
 467                goto err_cacheline;
 468
 469        cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */
 470        cacheline_free(tl->hwsp_cacheline);
 471
 472        i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */
 473        i915_vma_put(tl->hwsp_ggtt);
 474
 475        tl->hwsp_ggtt = i915_vma_get(vma);
 476
 477        vaddr = page_mask_bits(cl->vaddr);
 478        tl->hwsp_offset = cacheline * CACHELINE_BYTES;
 479        tl->hwsp_seqno =
 480                memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES);
 481
 482        tl->hwsp_offset += i915_ggtt_offset(vma);
 483
 484        cacheline_acquire(cl);
 485        tl->hwsp_cacheline = cl;
 486
 487        *seqno = timeline_advance(tl);
 488        GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno));
 489        return 0;
 490
 491err_cacheline:
 492        cacheline_free(cl);
 493err_unpin:
 494        i915_vma_unpin(vma);
 495err_rollback:
 496        timeline_rollback(tl);
 497        return err;
 498}
 499
 500int intel_timeline_get_seqno(struct intel_timeline *tl,
 501                             struct i915_request *rq,
 502                             u32 *seqno)
 503{
 504        *seqno = timeline_advance(tl);
 505
 506        /* Replace the HWSP on wraparound for HW semaphores */
 507        if (unlikely(!*seqno && tl->hwsp_cacheline))
 508                return __intel_timeline_get_seqno(tl, rq, seqno);
 509
 510        return 0;
 511}
 512
 513static int cacheline_ref(struct intel_timeline_cacheline *cl,
 514                         struct i915_request *rq)
 515{
 516        return i915_active_add_request(&cl->active, rq);
 517}
 518
 519int intel_timeline_read_hwsp(struct i915_request *from,
 520                             struct i915_request *to,
 521                             u32 *hwsp)
 522{
 523        struct intel_timeline_cacheline *cl;
 524        int err;
 525
 526        GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline));
 527
 528        rcu_read_lock();
 529        cl = rcu_dereference(from->hwsp_cacheline);
 530        if (i915_request_completed(from)) /* confirm cacheline is valid */
 531                goto unlock;
 532        if (unlikely(!i915_active_acquire_if_busy(&cl->active)))
 533                goto unlock; /* seqno wrapped and completed! */
 534        if (unlikely(i915_request_completed(from)))
 535                goto release;
 536        rcu_read_unlock();
 537
 538        err = cacheline_ref(cl, to);
 539        if (err)
 540                goto out;
 541
 542        *hwsp = i915_ggtt_offset(cl->hwsp->vma) +
 543                ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;
 544
 545out:
 546        i915_active_release(&cl->active);
 547        return err;
 548
 549release:
 550        i915_active_release(&cl->active);
 551unlock:
 552        rcu_read_unlock();
 553        return 1;
 554}
 555
 556void intel_timeline_unpin(struct intel_timeline *tl)
 557{
 558        GEM_BUG_ON(!atomic_read(&tl->pin_count));
 559        if (!atomic_dec_and_test(&tl->pin_count))
 560                return;
 561
 562        cacheline_release(tl->hwsp_cacheline);
 563
 564        __i915_vma_unpin(tl->hwsp_ggtt);
 565}
 566
 567void __intel_timeline_free(struct kref *kref)
 568{
 569        struct intel_timeline *timeline =
 570                container_of(kref, typeof(*timeline), kref);
 571
 572        intel_timeline_fini(timeline);
 573        kfree_rcu(timeline, rcu);
 574}
 575
 576void intel_gt_fini_timelines(struct intel_gt *gt)
 577{
 578        struct intel_gt_timelines *timelines = &gt->timelines;
 579
 580        GEM_BUG_ON(!list_empty(&timelines->active_list));
 581        GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list));
 582}
 583
 584#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 585#include "gt/selftests/mock_timeline.c"
 586#include "gt/selftest_timeline.c"
 587#endif
 588