linux/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2015 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 */
  24
  25#include <linux/kthread.h>
  26#include <trace/events/dma_fence.h>
  27#include <uapi/linux/sched/types.h>
  28
  29#include "i915_drv.h"
  30#include "i915_trace.h"
  31#include "intel_breadcrumbs.h"
  32#include "intel_context.h"
  33#include "intel_engine_pm.h"
  34#include "intel_gt_pm.h"
  35#include "intel_gt_requests.h"
  36
  37static bool irq_enable(struct intel_engine_cs *engine)
  38{
  39        if (!engine->irq_enable)
  40                return false;
  41
  42        /* Caller disables interrupts */
  43        spin_lock(&engine->gt->irq_lock);
  44        engine->irq_enable(engine);
  45        spin_unlock(&engine->gt->irq_lock);
  46
  47        return true;
  48}
  49
  50static void irq_disable(struct intel_engine_cs *engine)
  51{
  52        if (!engine->irq_disable)
  53                return;
  54
  55        /* Caller disables interrupts */
  56        spin_lock(&engine->gt->irq_lock);
  57        engine->irq_disable(engine);
  58        spin_unlock(&engine->gt->irq_lock);
  59}
  60
  61static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
  62{
  63        /*
  64         * Since we are waiting on a request, the GPU should be busy
  65         * and should have its own rpm reference.
  66         */
  67        if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt)))
  68                return;
  69
  70        /*
  71         * The breadcrumb irq will be disarmed on the interrupt after the
  72         * waiters are signaled. This gives us a single interrupt window in
  73         * which we can add a new waiter and avoid the cost of re-enabling
  74         * the irq.
  75         */
  76        WRITE_ONCE(b->irq_armed, true);
  77
  78        /* Requests may have completed before we could enable the interrupt. */
  79        if (!b->irq_enabled++ && irq_enable(b->irq_engine))
  80                irq_work_queue(&b->irq_work);
  81}
  82
  83static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
  84{
  85        if (!b->irq_engine)
  86                return;
  87
  88        spin_lock(&b->irq_lock);
  89        if (!b->irq_armed)
  90                __intel_breadcrumbs_arm_irq(b);
  91        spin_unlock(&b->irq_lock);
  92}
  93
  94static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
  95{
  96        GEM_BUG_ON(!b->irq_enabled);
  97        if (!--b->irq_enabled)
  98                irq_disable(b->irq_engine);
  99
 100        WRITE_ONCE(b->irq_armed, false);
 101        intel_gt_pm_put_async(b->irq_engine->gt);
 102}
 103
 104static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
 105{
 106        spin_lock(&b->irq_lock);
 107        if (b->irq_armed)
 108                __intel_breadcrumbs_disarm_irq(b);
 109        spin_unlock(&b->irq_lock);
 110}
 111
 112static void add_signaling_context(struct intel_breadcrumbs *b,
 113                                  struct intel_context *ce)
 114{
 115        lockdep_assert_held(&ce->signal_lock);
 116
 117        spin_lock(&b->signalers_lock);
 118        list_add_rcu(&ce->signal_link, &b->signalers);
 119        spin_unlock(&b->signalers_lock);
 120}
 121
 122static bool remove_signaling_context(struct intel_breadcrumbs *b,
 123                                     struct intel_context *ce)
 124{
 125        lockdep_assert_held(&ce->signal_lock);
 126
 127        if (!list_empty(&ce->signals))
 128                return false;
 129
 130        spin_lock(&b->signalers_lock);
 131        list_del_rcu(&ce->signal_link);
 132        spin_unlock(&b->signalers_lock);
 133
 134        return true;
 135}
 136
 137__maybe_unused static bool
 138check_signal_order(struct intel_context *ce, struct i915_request *rq)
 139{
 140        if (rq->context != ce)
 141                return false;
 142
 143        if (!list_is_last(&rq->signal_link, &ce->signals) &&
 144            i915_seqno_passed(rq->fence.seqno,
 145                              list_next_entry(rq, signal_link)->fence.seqno))
 146                return false;
 147
 148        if (!list_is_first(&rq->signal_link, &ce->signals) &&
 149            i915_seqno_passed(list_prev_entry(rq, signal_link)->fence.seqno,
 150                              rq->fence.seqno))
 151                return false;
 152
 153        return true;
 154}
 155
 156static bool
 157__dma_fence_signal(struct dma_fence *fence)
 158{
 159        return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
 160}
 161
 162static void
 163__dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
 164{
 165        fence->timestamp = timestamp;
 166        set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
 167        trace_dma_fence_signaled(fence);
 168}
 169
 170static void
 171__dma_fence_signal__notify(struct dma_fence *fence,
 172                           const struct list_head *list)
 173{
 174        struct dma_fence_cb *cur, *tmp;
 175
 176        lockdep_assert_held(fence->lock);
 177
 178        list_for_each_entry_safe(cur, tmp, list, node) {
 179                INIT_LIST_HEAD(&cur->node);
 180                cur->func(fence, cur);
 181        }
 182}
 183
 184static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
 185{
 186        if (b->irq_engine)
 187                intel_engine_add_retire(b->irq_engine, tl);
 188}
 189
 190static struct llist_node *
 191slist_add(struct llist_node *node, struct llist_node *head)
 192{
 193        node->next = head;
 194        return node;
 195}
 196
 197static void signal_irq_work(struct irq_work *work)
 198{
 199        struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
 200        const ktime_t timestamp = ktime_get();
 201        struct llist_node *signal, *sn;
 202        struct intel_context *ce;
 203
 204        signal = NULL;
 205        if (unlikely(!llist_empty(&b->signaled_requests)))
 206                signal = llist_del_all(&b->signaled_requests);
 207
 208        /*
 209         * Keep the irq armed until the interrupt after all listeners are gone.
 210         *
 211         * Enabling/disabling the interrupt is rather costly, roughly a couple
 212         * of hundred microseconds. If we are proactive and enable/disable
 213         * the interrupt around every request that wants a breadcrumb, we
 214         * quickly drown in the extra orders of magnitude of latency imposed
 215         * on request submission.
 216         *
 217         * So we try to be lazy, and keep the interrupts enabled until no
 218         * more listeners appear within a breadcrumb interrupt interval (that
 219         * is until a request completes that no one cares about). The
 220         * observation is that listeners come in batches, and will often
 221         * listen to a bunch of requests in succession. Though note on icl+,
 222         * interrupts are always enabled due to concerns with rc6 being
 223         * dysfunctional with per-engine interrupt masking.
 224         *
 225         * We also try to avoid raising too many interrupts, as they may
 226         * be generated by userspace batches and it is unfortunately rather
 227         * too easy to drown the CPU under a flood of GPU interrupts. Thus
 228         * whenever no one appears to be listening, we turn off the interrupts.
 229         * Fewer interrupts should conserve power -- at the very least, fewer
 230         * interrupt draw less ire from other users of the system and tools
 231         * like powertop.
 232         */
 233        if (!signal && READ_ONCE(b->irq_armed) && list_empty(&b->signalers))
 234                intel_breadcrumbs_disarm_irq(b);
 235
 236        rcu_read_lock();
 237        atomic_inc(&b->signaler_active);
 238        list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
 239                struct i915_request *rq;
 240
 241                list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
 242                        bool release;
 243
 244                        if (!__i915_request_is_complete(rq))
 245                                break;
 246
 247                        if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
 248                                                &rq->fence.flags))
 249                                break;
 250
 251                        /*
 252                         * Queue for execution after dropping the signaling
 253                         * spinlock as the callback chain may end up adding
 254                         * more signalers to the same context or engine.
 255                         */
 256                        spin_lock(&ce->signal_lock);
 257                        list_del_rcu(&rq->signal_link);
 258                        release = remove_signaling_context(b, ce);
 259                        spin_unlock(&ce->signal_lock);
 260                        if (release) {
 261                                if (intel_timeline_is_last(ce->timeline, rq))
 262                                        add_retire(b, ce->timeline);
 263                                intel_context_put(ce);
 264                        }
 265
 266                        if (__dma_fence_signal(&rq->fence))
 267                                /* We own signal_node now, xfer to local list */
 268                                signal = slist_add(&rq->signal_node, signal);
 269                        else
 270                                i915_request_put(rq);
 271                }
 272        }
 273        atomic_dec(&b->signaler_active);
 274        rcu_read_unlock();
 275
 276        llist_for_each_safe(signal, sn, signal) {
 277                struct i915_request *rq =
 278                        llist_entry(signal, typeof(*rq), signal_node);
 279                struct list_head cb_list;
 280
 281                spin_lock(&rq->lock);
 282                list_replace(&rq->fence.cb_list, &cb_list);
 283                __dma_fence_signal__timestamp(&rq->fence, timestamp);
 284                __dma_fence_signal__notify(&rq->fence, &cb_list);
 285                spin_unlock(&rq->lock);
 286
 287                i915_request_put(rq);
 288        }
 289
 290        if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers))
 291                intel_breadcrumbs_arm_irq(b);
 292}
 293
 294struct intel_breadcrumbs *
 295intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
 296{
 297        struct intel_breadcrumbs *b;
 298
 299        b = kzalloc(sizeof(*b), GFP_KERNEL);
 300        if (!b)
 301                return NULL;
 302
 303        b->irq_engine = irq_engine;
 304
 305        spin_lock_init(&b->signalers_lock);
 306        INIT_LIST_HEAD(&b->signalers);
 307        init_llist_head(&b->signaled_requests);
 308
 309        spin_lock_init(&b->irq_lock);
 310        init_irq_work(&b->irq_work, signal_irq_work);
 311
 312        return b;
 313}
 314
 315void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
 316{
 317        unsigned long flags;
 318
 319        if (!b->irq_engine)
 320                return;
 321
 322        spin_lock_irqsave(&b->irq_lock, flags);
 323
 324        if (b->irq_enabled)
 325                irq_enable(b->irq_engine);
 326        else
 327                irq_disable(b->irq_engine);
 328
 329        spin_unlock_irqrestore(&b->irq_lock, flags);
 330}
 331
 332void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
 333{
 334        if (!READ_ONCE(b->irq_armed))
 335                return;
 336
 337        /* Kick the work once more to drain the signalers, and disarm the irq */
 338        irq_work_sync(&b->irq_work);
 339        while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
 340                local_irq_disable();
 341                signal_irq_work(&b->irq_work);
 342                local_irq_enable();
 343                cond_resched();
 344        }
 345}
 346
 347void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
 348{
 349        irq_work_sync(&b->irq_work);
 350        GEM_BUG_ON(!list_empty(&b->signalers));
 351        GEM_BUG_ON(b->irq_armed);
 352        kfree(b);
 353}
 354
 355static void irq_signal_request(struct i915_request *rq,
 356                               struct intel_breadcrumbs *b)
 357{
 358        if (!__dma_fence_signal(&rq->fence))
 359                return;
 360
 361        i915_request_get(rq);
 362        if (llist_add(&rq->signal_node, &b->signaled_requests))
 363                irq_work_queue(&b->irq_work);
 364}
 365
 366static void insert_breadcrumb(struct i915_request *rq)
 367{
 368        struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
 369        struct intel_context *ce = rq->context;
 370        struct list_head *pos;
 371
 372        if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
 373                return;
 374
 375        /*
 376         * If the request is already completed, we can transfer it
 377         * straight onto a signaled list, and queue the irq worker for
 378         * its signal completion.
 379         */
 380        if (__i915_request_is_complete(rq)) {
 381                irq_signal_request(rq, b);
 382                return;
 383        }
 384
 385        if (list_empty(&ce->signals)) {
 386                intel_context_get(ce);
 387                add_signaling_context(b, ce);
 388                pos = &ce->signals;
 389        } else {
 390                /*
 391                 * We keep the seqno in retirement order, so we can break
 392                 * inside intel_engine_signal_breadcrumbs as soon as we've
 393                 * passed the last completed request (or seen a request that
 394                 * hasn't event started). We could walk the timeline->requests,
 395                 * but keeping a separate signalers_list has the advantage of
 396                 * hopefully being much smaller than the full list and so
 397                 * provides faster iteration and detection when there are no
 398                 * more interrupts required for this context.
 399                 *
 400                 * We typically expect to add new signalers in order, so we
 401                 * start looking for our insertion point from the tail of
 402                 * the list.
 403                 */
 404                list_for_each_prev(pos, &ce->signals) {
 405                        struct i915_request *it =
 406                                list_entry(pos, typeof(*it), signal_link);
 407
 408                        if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
 409                                break;
 410                }
 411        }
 412
 413        i915_request_get(rq);
 414        list_add_rcu(&rq->signal_link, pos);
 415        GEM_BUG_ON(!check_signal_order(ce, rq));
 416        GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags));
 417        set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
 418
 419        /*
 420         * Defer enabling the interrupt to after HW submission and recheck
 421         * the request as it may have completed and raised the interrupt as
 422         * we were attaching it into the lists.
 423         */
 424        irq_work_queue(&b->irq_work);
 425}
 426
 427bool i915_request_enable_breadcrumb(struct i915_request *rq)
 428{
 429        struct intel_context *ce = rq->context;
 430
 431        /* Serialises with i915_request_retire() using rq->lock */
 432        if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
 433                return true;
 434
 435        /*
 436         * Peek at i915_request_submit()/i915_request_unsubmit() status.
 437         *
 438         * If the request is not yet active (and not signaled), we will
 439         * attach the breadcrumb later.
 440         */
 441        if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
 442                return true;
 443
 444        spin_lock(&ce->signal_lock);
 445        if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
 446                insert_breadcrumb(rq);
 447        spin_unlock(&ce->signal_lock);
 448
 449        return true;
 450}
 451
 452void i915_request_cancel_breadcrumb(struct i915_request *rq)
 453{
 454        struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
 455        struct intel_context *ce = rq->context;
 456        bool release;
 457
 458        spin_lock(&ce->signal_lock);
 459        if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
 460                spin_unlock(&ce->signal_lock);
 461                return;
 462        }
 463
 464        list_del_rcu(&rq->signal_link);
 465        release = remove_signaling_context(b, ce);
 466        spin_unlock(&ce->signal_lock);
 467        if (release)
 468                intel_context_put(ce);
 469
 470        if (__i915_request_is_complete(rq))
 471                irq_signal_request(rq, b);
 472
 473        i915_request_put(rq);
 474}
 475
 476void intel_context_remove_breadcrumbs(struct intel_context *ce,
 477                                      struct intel_breadcrumbs *b)
 478{
 479        struct i915_request *rq, *rn;
 480        bool release = false;
 481        unsigned long flags;
 482
 483        spin_lock_irqsave(&ce->signal_lock, flags);
 484
 485        if (list_empty(&ce->signals))
 486                goto unlock;
 487
 488        list_for_each_entry_safe(rq, rn, &ce->signals, signal_link) {
 489                GEM_BUG_ON(!__i915_request_is_complete(rq));
 490                if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
 491                                        &rq->fence.flags))
 492                        continue;
 493
 494                list_del_rcu(&rq->signal_link);
 495                irq_signal_request(rq, b);
 496                i915_request_put(rq);
 497        }
 498        release = remove_signaling_context(b, ce);
 499
 500unlock:
 501        spin_unlock_irqrestore(&ce->signal_lock, flags);
 502        if (release)
 503                intel_context_put(ce);
 504
 505        while (atomic_read(&b->signaler_active))
 506                cpu_relax();
 507}
 508
 509static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
 510{
 511        struct intel_context *ce;
 512        struct i915_request *rq;
 513
 514        drm_printf(p, "Signals:\n");
 515
 516        rcu_read_lock();
 517        list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
 518                list_for_each_entry_rcu(rq, &ce->signals, signal_link)
 519                        drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
 520                                   rq->fence.context, rq->fence.seqno,
 521                                   __i915_request_is_complete(rq) ? "!" :
 522                                   __i915_request_has_started(rq) ? "*" :
 523                                   "",
 524                                   jiffies_to_msecs(jiffies - rq->emitted_jiffies));
 525        }
 526        rcu_read_unlock();
 527}
 528
 529void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
 530                                    struct drm_printer *p)
 531{
 532        struct intel_breadcrumbs *b;
 533
 534        b = engine->breadcrumbs;
 535        if (!b)
 536                return;
 537
 538        drm_printf(p, "IRQ: %s\n", enableddisabled(b->irq_armed));
 539        if (!list_empty(&b->signalers))
 540                print_signals(b, p);
 541}
 542