linux/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
<<
>>
Prefs
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2015-2021 Intel Corporation
   4 */
   5
   6#include <linux/kthread.h>
   7#include <trace/events/dma_fence.h>
   8#include <uapi/linux/sched/types.h>
   9
  10#include "i915_drv.h"
  11#include "i915_trace.h"
  12#include "intel_breadcrumbs.h"
  13#include "intel_context.h"
  14#include "intel_engine_pm.h"
  15#include "intel_gt_pm.h"
  16#include "intel_gt_requests.h"
  17
  18static bool irq_enable(struct intel_engine_cs *engine)
  19{
  20        if (!engine->irq_enable)
  21                return false;
  22
  23        /* Caller disables interrupts */
  24        spin_lock(&engine->gt->irq_lock);
  25        engine->irq_enable(engine);
  26        spin_unlock(&engine->gt->irq_lock);
  27
  28        return true;
  29}
  30
  31static void irq_disable(struct intel_engine_cs *engine)
  32{
  33        if (!engine->irq_disable)
  34                return;
  35
  36        /* Caller disables interrupts */
  37        spin_lock(&engine->gt->irq_lock);
  38        engine->irq_disable(engine);
  39        spin_unlock(&engine->gt->irq_lock);
  40}
  41
  42static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
  43{
  44        /*
  45         * Since we are waiting on a request, the GPU should be busy
  46         * and should have its own rpm reference.
  47         */
  48        if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt)))
  49                return;
  50
  51        /*
  52         * The breadcrumb irq will be disarmed on the interrupt after the
  53         * waiters are signaled. This gives us a single interrupt window in
  54         * which we can add a new waiter and avoid the cost of re-enabling
  55         * the irq.
  56         */
  57        WRITE_ONCE(b->irq_armed, true);
  58
  59        /* Requests may have completed before we could enable the interrupt. */
  60        if (!b->irq_enabled++ && irq_enable(b->irq_engine))
  61                irq_work_queue(&b->irq_work);
  62}
  63
  64static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
  65{
  66        if (!b->irq_engine)
  67                return;
  68
  69        spin_lock(&b->irq_lock);
  70        if (!b->irq_armed)
  71                __intel_breadcrumbs_arm_irq(b);
  72        spin_unlock(&b->irq_lock);
  73}
  74
  75static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
  76{
  77        GEM_BUG_ON(!b->irq_enabled);
  78        if (!--b->irq_enabled)
  79                irq_disable(b->irq_engine);
  80
  81        WRITE_ONCE(b->irq_armed, false);
  82        intel_gt_pm_put_async(b->irq_engine->gt);
  83}
  84
  85static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
  86{
  87        spin_lock(&b->irq_lock);
  88        if (b->irq_armed)
  89                __intel_breadcrumbs_disarm_irq(b);
  90        spin_unlock(&b->irq_lock);
  91}
  92
  93static void add_signaling_context(struct intel_breadcrumbs *b,
  94                                  struct intel_context *ce)
  95{
  96        lockdep_assert_held(&ce->signal_lock);
  97
  98        spin_lock(&b->signalers_lock);
  99        list_add_rcu(&ce->signal_link, &b->signalers);
 100        spin_unlock(&b->signalers_lock);
 101}
 102
 103static bool remove_signaling_context(struct intel_breadcrumbs *b,
 104                                     struct intel_context *ce)
 105{
 106        lockdep_assert_held(&ce->signal_lock);
 107
 108        if (!list_empty(&ce->signals))
 109                return false;
 110
 111        spin_lock(&b->signalers_lock);
 112        list_del_rcu(&ce->signal_link);
 113        spin_unlock(&b->signalers_lock);
 114
 115        return true;
 116}
 117
 118__maybe_unused static bool
 119check_signal_order(struct intel_context *ce, struct i915_request *rq)
 120{
 121        if (rq->context != ce)
 122                return false;
 123
 124        if (!list_is_last(&rq->signal_link, &ce->signals) &&
 125            i915_seqno_passed(rq->fence.seqno,
 126                              list_next_entry(rq, signal_link)->fence.seqno))
 127                return false;
 128
 129        if (!list_is_first(&rq->signal_link, &ce->signals) &&
 130            i915_seqno_passed(list_prev_entry(rq, signal_link)->fence.seqno,
 131                              rq->fence.seqno))
 132                return false;
 133
 134        return true;
 135}
 136
 137static bool
 138__dma_fence_signal(struct dma_fence *fence)
 139{
 140        return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
 141}
 142
 143static void
 144__dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
 145{
 146        fence->timestamp = timestamp;
 147        set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
 148        trace_dma_fence_signaled(fence);
 149}
 150
 151static void
 152__dma_fence_signal__notify(struct dma_fence *fence,
 153                           const struct list_head *list)
 154{
 155        struct dma_fence_cb *cur, *tmp;
 156
 157        lockdep_assert_held(fence->lock);
 158
 159        list_for_each_entry_safe(cur, tmp, list, node) {
 160                INIT_LIST_HEAD(&cur->node);
 161                cur->func(fence, cur);
 162        }
 163}
 164
 165static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
 166{
 167        if (b->irq_engine)
 168                intel_engine_add_retire(b->irq_engine, tl);
 169}
 170
 171static struct llist_node *
 172slist_add(struct llist_node *node, struct llist_node *head)
 173{
 174        node->next = head;
 175        return node;
 176}
 177
 178static void signal_irq_work(struct irq_work *work)
 179{
 180        struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
 181        const ktime_t timestamp = ktime_get();
 182        struct llist_node *signal, *sn;
 183        struct intel_context *ce;
 184
 185        signal = NULL;
 186        if (unlikely(!llist_empty(&b->signaled_requests)))
 187                signal = llist_del_all(&b->signaled_requests);
 188
 189        /*
 190         * Keep the irq armed until the interrupt after all listeners are gone.
 191         *
 192         * Enabling/disabling the interrupt is rather costly, roughly a couple
 193         * of hundred microseconds. If we are proactive and enable/disable
 194         * the interrupt around every request that wants a breadcrumb, we
 195         * quickly drown in the extra orders of magnitude of latency imposed
 196         * on request submission.
 197         *
 198         * So we try to be lazy, and keep the interrupts enabled until no
 199         * more listeners appear within a breadcrumb interrupt interval (that
 200         * is until a request completes that no one cares about). The
 201         * observation is that listeners come in batches, and will often
 202         * listen to a bunch of requests in succession. Though note on icl+,
 203         * interrupts are always enabled due to concerns with rc6 being
 204         * dysfunctional with per-engine interrupt masking.
 205         *
 206         * We also try to avoid raising too many interrupts, as they may
 207         * be generated by userspace batches and it is unfortunately rather
 208         * too easy to drown the CPU under a flood of GPU interrupts. Thus
 209         * whenever no one appears to be listening, we turn off the interrupts.
 210         * Fewer interrupts should conserve power -- at the very least, fewer
 211         * interrupt draw less ire from other users of the system and tools
 212         * like powertop.
 213         */
 214        if (!signal && READ_ONCE(b->irq_armed) && list_empty(&b->signalers))
 215                intel_breadcrumbs_disarm_irq(b);
 216
 217        rcu_read_lock();
 218        atomic_inc(&b->signaler_active);
 219        list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
 220                struct i915_request *rq;
 221
 222                list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
 223                        bool release;
 224
 225                        if (!__i915_request_is_complete(rq))
 226                                break;
 227
 228                        if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
 229                                                &rq->fence.flags))
 230                                break;
 231
 232                        /*
 233                         * Queue for execution after dropping the signaling
 234                         * spinlock as the callback chain may end up adding
 235                         * more signalers to the same context or engine.
 236                         */
 237                        spin_lock(&ce->signal_lock);
 238                        list_del_rcu(&rq->signal_link);
 239                        release = remove_signaling_context(b, ce);
 240                        spin_unlock(&ce->signal_lock);
 241                        if (release) {
 242                                if (intel_timeline_is_last(ce->timeline, rq))
 243                                        add_retire(b, ce->timeline);
 244                                intel_context_put(ce);
 245                        }
 246
 247                        if (__dma_fence_signal(&rq->fence))
 248                                /* We own signal_node now, xfer to local list */
 249                                signal = slist_add(&rq->signal_node, signal);
 250                        else
 251                                i915_request_put(rq);
 252                }
 253        }
 254        atomic_dec(&b->signaler_active);
 255        rcu_read_unlock();
 256
 257        llist_for_each_safe(signal, sn, signal) {
 258                struct i915_request *rq =
 259                        llist_entry(signal, typeof(*rq), signal_node);
 260                struct list_head cb_list;
 261
 262                spin_lock(&rq->lock);
 263                list_replace(&rq->fence.cb_list, &cb_list);
 264                __dma_fence_signal__timestamp(&rq->fence, timestamp);
 265                __dma_fence_signal__notify(&rq->fence, &cb_list);
 266                spin_unlock(&rq->lock);
 267
 268                i915_request_put(rq);
 269        }
 270
 271        if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers))
 272                intel_breadcrumbs_arm_irq(b);
 273}
 274
 275struct intel_breadcrumbs *
 276intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
 277{
 278        struct intel_breadcrumbs *b;
 279
 280        b = kzalloc(sizeof(*b), GFP_KERNEL);
 281        if (!b)
 282                return NULL;
 283
 284        b->irq_engine = irq_engine;
 285
 286        spin_lock_init(&b->signalers_lock);
 287        INIT_LIST_HEAD(&b->signalers);
 288        init_llist_head(&b->signaled_requests);
 289
 290        spin_lock_init(&b->irq_lock);
 291        init_irq_work(&b->irq_work, signal_irq_work);
 292
 293        return b;
 294}
 295
 296void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
 297{
 298        unsigned long flags;
 299
 300        if (!b->irq_engine)
 301                return;
 302
 303        spin_lock_irqsave(&b->irq_lock, flags);
 304
 305        if (b->irq_enabled)
 306                irq_enable(b->irq_engine);
 307        else
 308                irq_disable(b->irq_engine);
 309
 310        spin_unlock_irqrestore(&b->irq_lock, flags);
 311}
 312
 313void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
 314{
 315        if (!READ_ONCE(b->irq_armed))
 316                return;
 317
 318        /* Kick the work once more to drain the signalers, and disarm the irq */
 319        irq_work_sync(&b->irq_work);
 320        while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
 321                local_irq_disable();
 322                signal_irq_work(&b->irq_work);
 323                local_irq_enable();
 324                cond_resched();
 325        }
 326}
 327
 328void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
 329{
 330        irq_work_sync(&b->irq_work);
 331        GEM_BUG_ON(!list_empty(&b->signalers));
 332        GEM_BUG_ON(b->irq_armed);
 333        kfree(b);
 334}
 335
 336static void irq_signal_request(struct i915_request *rq,
 337                               struct intel_breadcrumbs *b)
 338{
 339        if (!__dma_fence_signal(&rq->fence))
 340                return;
 341
 342        i915_request_get(rq);
 343        if (llist_add(&rq->signal_node, &b->signaled_requests))
 344                irq_work_queue(&b->irq_work);
 345}
 346
 347static void insert_breadcrumb(struct i915_request *rq)
 348{
 349        struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
 350        struct intel_context *ce = rq->context;
 351        struct list_head *pos;
 352
 353        if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
 354                return;
 355
 356        /*
 357         * If the request is already completed, we can transfer it
 358         * straight onto a signaled list, and queue the irq worker for
 359         * its signal completion.
 360         */
 361        if (__i915_request_is_complete(rq)) {
 362                irq_signal_request(rq, b);
 363                return;
 364        }
 365
 366        if (list_empty(&ce->signals)) {
 367                intel_context_get(ce);
 368                add_signaling_context(b, ce);
 369                pos = &ce->signals;
 370        } else {
 371                /*
 372                 * We keep the seqno in retirement order, so we can break
 373                 * inside intel_engine_signal_breadcrumbs as soon as we've
 374                 * passed the last completed request (or seen a request that
 375                 * hasn't event started). We could walk the timeline->requests,
 376                 * but keeping a separate signalers_list has the advantage of
 377                 * hopefully being much smaller than the full list and so
 378                 * provides faster iteration and detection when there are no
 379                 * more interrupts required for this context.
 380                 *
 381                 * We typically expect to add new signalers in order, so we
 382                 * start looking for our insertion point from the tail of
 383                 * the list.
 384                 */
 385                list_for_each_prev(pos, &ce->signals) {
 386                        struct i915_request *it =
 387                                list_entry(pos, typeof(*it), signal_link);
 388
 389                        if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
 390                                break;
 391                }
 392        }
 393
 394        i915_request_get(rq);
 395        list_add_rcu(&rq->signal_link, pos);
 396        GEM_BUG_ON(!check_signal_order(ce, rq));
 397        GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags));
 398        set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
 399
 400        /*
 401         * Defer enabling the interrupt to after HW submission and recheck
 402         * the request as it may have completed and raised the interrupt as
 403         * we were attaching it into the lists.
 404         */
 405        irq_work_queue(&b->irq_work);
 406}
 407
 408bool i915_request_enable_breadcrumb(struct i915_request *rq)
 409{
 410        struct intel_context *ce = rq->context;
 411
 412        /* Serialises with i915_request_retire() using rq->lock */
 413        if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
 414                return true;
 415
 416        /*
 417         * Peek at i915_request_submit()/i915_request_unsubmit() status.
 418         *
 419         * If the request is not yet active (and not signaled), we will
 420         * attach the breadcrumb later.
 421         */
 422        if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
 423                return true;
 424
 425        spin_lock(&ce->signal_lock);
 426        if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
 427                insert_breadcrumb(rq);
 428        spin_unlock(&ce->signal_lock);
 429
 430        return true;
 431}
 432
 433void i915_request_cancel_breadcrumb(struct i915_request *rq)
 434{
 435        struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
 436        struct intel_context *ce = rq->context;
 437        bool release;
 438
 439        spin_lock(&ce->signal_lock);
 440        if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
 441                spin_unlock(&ce->signal_lock);
 442                return;
 443        }
 444
 445        list_del_rcu(&rq->signal_link);
 446        release = remove_signaling_context(b, ce);
 447        spin_unlock(&ce->signal_lock);
 448        if (release)
 449                intel_context_put(ce);
 450
 451        if (__i915_request_is_complete(rq))
 452                irq_signal_request(rq, b);
 453
 454        i915_request_put(rq);
 455}
 456
 457void intel_context_remove_breadcrumbs(struct intel_context *ce,
 458                                      struct intel_breadcrumbs *b)
 459{
 460        struct i915_request *rq, *rn;
 461        bool release = false;
 462        unsigned long flags;
 463
 464        spin_lock_irqsave(&ce->signal_lock, flags);
 465
 466        if (list_empty(&ce->signals))
 467                goto unlock;
 468
 469        list_for_each_entry_safe(rq, rn, &ce->signals, signal_link) {
 470                GEM_BUG_ON(!__i915_request_is_complete(rq));
 471                if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
 472                                        &rq->fence.flags))
 473                        continue;
 474
 475                list_del_rcu(&rq->signal_link);
 476                irq_signal_request(rq, b);
 477                i915_request_put(rq);
 478        }
 479        release = remove_signaling_context(b, ce);
 480
 481unlock:
 482        spin_unlock_irqrestore(&ce->signal_lock, flags);
 483        if (release)
 484                intel_context_put(ce);
 485
 486        while (atomic_read(&b->signaler_active))
 487                cpu_relax();
 488}
 489
 490static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
 491{
 492        struct intel_context *ce;
 493        struct i915_request *rq;
 494
 495        drm_printf(p, "Signals:\n");
 496
 497        rcu_read_lock();
 498        list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
 499                list_for_each_entry_rcu(rq, &ce->signals, signal_link)
 500                        drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
 501                                   rq->fence.context, rq->fence.seqno,
 502                                   __i915_request_is_complete(rq) ? "!" :
 503                                   __i915_request_has_started(rq) ? "*" :
 504                                   "",
 505                                   jiffies_to_msecs(jiffies - rq->emitted_jiffies));
 506        }
 507        rcu_read_unlock();
 508}
 509
 510void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
 511                                    struct drm_printer *p)
 512{
 513        struct intel_breadcrumbs *b;
 514
 515        b = engine->breadcrumbs;
 516        if (!b)
 517                return;
 518
 519        drm_printf(p, "IRQ: %s\n", enableddisabled(b->irq_armed));
 520        if (!list_empty(&b->signalers))
 521                print_signals(b, p);
 522}
 523