linux/drivers/gpu/drm/i915/intel_engine_cs.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2016 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 */
  24
  25#include <drm/drm_print.h>
  26
  27#include "i915_drv.h"
  28#include "i915_reset.h"
  29#include "intel_ringbuffer.h"
  30#include "intel_lrc.h"
  31
  32/* Haswell does have the CXT_SIZE register however it does not appear to be
  33 * valid. Now, docs explain in dwords what is in the context object. The full
  34 * size is 70720 bytes, however, the power context and execlist context will
  35 * never be saved (power context is stored elsewhere, and execlists don't work
  36 * on HSW) - so the final size, including the extra state required for the
  37 * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
  38 */
  39#define HSW_CXT_TOTAL_SIZE              (17 * PAGE_SIZE)
  40
  41#define DEFAULT_LR_CONTEXT_RENDER_SIZE  (22 * PAGE_SIZE)
  42#define GEN8_LR_CONTEXT_RENDER_SIZE     (20 * PAGE_SIZE)
  43#define GEN9_LR_CONTEXT_RENDER_SIZE     (22 * PAGE_SIZE)
  44#define GEN10_LR_CONTEXT_RENDER_SIZE    (18 * PAGE_SIZE)
  45#define GEN11_LR_CONTEXT_RENDER_SIZE    (14 * PAGE_SIZE)
  46
  47#define GEN8_LR_CONTEXT_OTHER_SIZE      ( 2 * PAGE_SIZE)
  48
  49struct engine_class_info {
  50        const char *name;
  51        int (*init_legacy)(struct intel_engine_cs *engine);
  52        int (*init_execlists)(struct intel_engine_cs *engine);
  53
  54        u8 uabi_class;
  55};
  56
  57static const struct engine_class_info intel_engine_classes[] = {
  58        [RENDER_CLASS] = {
  59                .name = "rcs",
  60                .init_execlists = logical_render_ring_init,
  61                .init_legacy = intel_init_render_ring_buffer,
  62                .uabi_class = I915_ENGINE_CLASS_RENDER,
  63        },
  64        [COPY_ENGINE_CLASS] = {
  65                .name = "bcs",
  66                .init_execlists = logical_xcs_ring_init,
  67                .init_legacy = intel_init_blt_ring_buffer,
  68                .uabi_class = I915_ENGINE_CLASS_COPY,
  69        },
  70        [VIDEO_DECODE_CLASS] = {
  71                .name = "vcs",
  72                .init_execlists = logical_xcs_ring_init,
  73                .init_legacy = intel_init_bsd_ring_buffer,
  74                .uabi_class = I915_ENGINE_CLASS_VIDEO,
  75        },
  76        [VIDEO_ENHANCEMENT_CLASS] = {
  77                .name = "vecs",
  78                .init_execlists = logical_xcs_ring_init,
  79                .init_legacy = intel_init_vebox_ring_buffer,
  80                .uabi_class = I915_ENGINE_CLASS_VIDEO_ENHANCE,
  81        },
  82};
  83
  84#define MAX_MMIO_BASES 3
  85struct engine_info {
  86        unsigned int hw_id;
  87        u8 class;
  88        u8 instance;
  89        /* mmio bases table *must* be sorted in reverse gen order */
  90        struct engine_mmio_base {
  91                u32 gen : 8;
  92                u32 base : 24;
  93        } mmio_bases[MAX_MMIO_BASES];
  94};
  95
  96static const struct engine_info intel_engines[] = {
  97        [RCS0] = {
  98                .hw_id = RCS0_HW,
  99                .class = RENDER_CLASS,
 100                .instance = 0,
 101                .mmio_bases = {
 102                        { .gen = 1, .base = RENDER_RING_BASE }
 103                },
 104        },
 105        [BCS0] = {
 106                .hw_id = BCS0_HW,
 107                .class = COPY_ENGINE_CLASS,
 108                .instance = 0,
 109                .mmio_bases = {
 110                        { .gen = 6, .base = BLT_RING_BASE }
 111                },
 112        },
 113        [VCS0] = {
 114                .hw_id = VCS0_HW,
 115                .class = VIDEO_DECODE_CLASS,
 116                .instance = 0,
 117                .mmio_bases = {
 118                        { .gen = 11, .base = GEN11_BSD_RING_BASE },
 119                        { .gen = 6, .base = GEN6_BSD_RING_BASE },
 120                        { .gen = 4, .base = BSD_RING_BASE }
 121                },
 122        },
 123        [VCS1] = {
 124                .hw_id = VCS1_HW,
 125                .class = VIDEO_DECODE_CLASS,
 126                .instance = 1,
 127                .mmio_bases = {
 128                        { .gen = 11, .base = GEN11_BSD2_RING_BASE },
 129                        { .gen = 8, .base = GEN8_BSD2_RING_BASE }
 130                },
 131        },
 132        [VCS2] = {
 133                .hw_id = VCS2_HW,
 134                .class = VIDEO_DECODE_CLASS,
 135                .instance = 2,
 136                .mmio_bases = {
 137                        { .gen = 11, .base = GEN11_BSD3_RING_BASE }
 138                },
 139        },
 140        [VCS3] = {
 141                .hw_id = VCS3_HW,
 142                .class = VIDEO_DECODE_CLASS,
 143                .instance = 3,
 144                .mmio_bases = {
 145                        { .gen = 11, .base = GEN11_BSD4_RING_BASE }
 146                },
 147        },
 148        [VECS0] = {
 149                .hw_id = VECS0_HW,
 150                .class = VIDEO_ENHANCEMENT_CLASS,
 151                .instance = 0,
 152                .mmio_bases = {
 153                        { .gen = 11, .base = GEN11_VEBOX_RING_BASE },
 154                        { .gen = 7, .base = VEBOX_RING_BASE }
 155                },
 156        },
 157        [VECS1] = {
 158                .hw_id = VECS1_HW,
 159                .class = VIDEO_ENHANCEMENT_CLASS,
 160                .instance = 1,
 161                .mmio_bases = {
 162                        { .gen = 11, .base = GEN11_VEBOX2_RING_BASE }
 163                },
 164        },
 165};
 166
 167/**
 168 * ___intel_engine_context_size() - return the size of the context for an engine
 169 * @dev_priv: i915 device private
 170 * @class: engine class
 171 *
 172 * Each engine class may require a different amount of space for a context
 173 * image.
 174 *
 175 * Return: size (in bytes) of an engine class specific context image
 176 *
 177 * Note: this size includes the HWSP, which is part of the context image
 178 * in LRC mode, but does not include the "shared data page" used with
 179 * GuC submission. The caller should account for this if using the GuC.
 180 */
 181static u32
 182__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
 183{
 184        u32 cxt_size;
 185
 186        BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
 187
 188        switch (class) {
 189        case RENDER_CLASS:
 190                switch (INTEL_GEN(dev_priv)) {
 191                default:
 192                        MISSING_CASE(INTEL_GEN(dev_priv));
 193                        return DEFAULT_LR_CONTEXT_RENDER_SIZE;
 194                case 11:
 195                        return GEN11_LR_CONTEXT_RENDER_SIZE;
 196                case 10:
 197                        return GEN10_LR_CONTEXT_RENDER_SIZE;
 198                case 9:
 199                        return GEN9_LR_CONTEXT_RENDER_SIZE;
 200                case 8:
 201                        return GEN8_LR_CONTEXT_RENDER_SIZE;
 202                case 7:
 203                        if (IS_HASWELL(dev_priv))
 204                                return HSW_CXT_TOTAL_SIZE;
 205
 206                        cxt_size = I915_READ(GEN7_CXT_SIZE);
 207                        return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
 208                                        PAGE_SIZE);
 209                case 6:
 210                        cxt_size = I915_READ(CXT_SIZE);
 211                        return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
 212                                        PAGE_SIZE);
 213                case 5:
 214                case 4:
 215                case 3:
 216                case 2:
 217                /* For the special day when i810 gets merged. */
 218                case 1:
 219                        return 0;
 220                }
 221                break;
 222        default:
 223                MISSING_CASE(class);
 224                /* fall through */
 225        case VIDEO_DECODE_CLASS:
 226        case VIDEO_ENHANCEMENT_CLASS:
 227        case COPY_ENGINE_CLASS:
 228                if (INTEL_GEN(dev_priv) < 8)
 229                        return 0;
 230                return GEN8_LR_CONTEXT_OTHER_SIZE;
 231        }
 232}
 233
 234static u32 __engine_mmio_base(struct drm_i915_private *i915,
 235                              const struct engine_mmio_base *bases)
 236{
 237        int i;
 238
 239        for (i = 0; i < MAX_MMIO_BASES; i++)
 240                if (INTEL_GEN(i915) >= bases[i].gen)
 241                        break;
 242
 243        GEM_BUG_ON(i == MAX_MMIO_BASES);
 244        GEM_BUG_ON(!bases[i].base);
 245
 246        return bases[i].base;
 247}
 248
 249static void __sprint_engine_name(char *name, const struct engine_info *info)
 250{
 251        WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
 252                         intel_engine_classes[info->class].name,
 253                         info->instance) >= INTEL_ENGINE_CS_MAX_NAME);
 254}
 255
 256void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask)
 257{
 258        /*
 259         * Though they added more rings on g4x/ilk, they did not add
 260         * per-engine HWSTAM until gen6.
 261         */
 262        if (INTEL_GEN(engine->i915) < 6 && engine->class != RENDER_CLASS)
 263                return;
 264
 265        if (INTEL_GEN(engine->i915) >= 3)
 266                ENGINE_WRITE(engine, RING_HWSTAM, mask);
 267        else
 268                ENGINE_WRITE16(engine, RING_HWSTAM, mask);
 269}
 270
 271static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
 272{
 273        /* Mask off all writes into the unknown HWSP */
 274        intel_engine_set_hwsp_writemask(engine, ~0u);
 275}
 276
 277static int
 278intel_engine_setup(struct drm_i915_private *dev_priv,
 279                   enum intel_engine_id id)
 280{
 281        const struct engine_info *info = &intel_engines[id];
 282        struct intel_engine_cs *engine;
 283
 284        GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes));
 285
 286        BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
 287        BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));
 288
 289        if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS))
 290                return -EINVAL;
 291
 292        if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE))
 293                return -EINVAL;
 294
 295        if (GEM_DEBUG_WARN_ON(dev_priv->engine_class[info->class][info->instance]))
 296                return -EINVAL;
 297
 298        GEM_BUG_ON(dev_priv->engine[id]);
 299        engine = kzalloc(sizeof(*engine), GFP_KERNEL);
 300        if (!engine)
 301                return -ENOMEM;
 302
 303        BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);
 304
 305        engine->id = id;
 306        engine->mask = BIT(id);
 307        engine->i915 = dev_priv;
 308        engine->uncore = &dev_priv->uncore;
 309        __sprint_engine_name(engine->name, info);
 310        engine->hw_id = engine->guc_id = info->hw_id;
 311        engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases);
 312        engine->class = info->class;
 313        engine->instance = info->instance;
 314
 315        engine->uabi_class = intel_engine_classes[info->class].uabi_class;
 316
 317        engine->context_size = __intel_engine_context_size(dev_priv,
 318                                                           engine->class);
 319        if (WARN_ON(engine->context_size > BIT(20)))
 320                engine->context_size = 0;
 321        if (engine->context_size)
 322                DRIVER_CAPS(dev_priv)->has_logical_contexts = true;
 323
 324        /* Nothing to do here, execute in order of dependencies */
 325        engine->schedule = NULL;
 326
 327        seqlock_init(&engine->stats.lock);
 328
 329        ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
 330
 331        /* Scrub mmio state on takeover */
 332        intel_engine_sanitize_mmio(engine);
 333
 334        dev_priv->engine_class[info->class][info->instance] = engine;
 335        dev_priv->engine[id] = engine;
 336        return 0;
 337}
 338
 339/**
 340 * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
 341 * @dev_priv: i915 device private
 342 *
 343 * Return: non-zero if the initialization failed.
 344 */
 345int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
 346{
 347        struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
 348        const unsigned int engine_mask = INTEL_INFO(dev_priv)->engine_mask;
 349        struct intel_engine_cs *engine;
 350        enum intel_engine_id id;
 351        unsigned int mask = 0;
 352        unsigned int i;
 353        int err;
 354
 355        WARN_ON(engine_mask == 0);
 356        WARN_ON(engine_mask &
 357                GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES));
 358
 359        if (i915_inject_load_failure())
 360                return -ENODEV;
 361
 362        for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
 363                if (!HAS_ENGINE(dev_priv, i))
 364                        continue;
 365
 366                err = intel_engine_setup(dev_priv, i);
 367                if (err)
 368                        goto cleanup;
 369
 370                mask |= BIT(i);
 371        }
 372
 373        /*
 374         * Catch failures to update intel_engines table when the new engines
 375         * are added to the driver by a warning and disabling the forgotten
 376         * engines.
 377         */
 378        if (WARN_ON(mask != engine_mask))
 379                device_info->engine_mask = mask;
 380
 381        /* We always presume we have at least RCS available for later probing */
 382        if (WARN_ON(!HAS_ENGINE(dev_priv, RCS0))) {
 383                err = -ENODEV;
 384                goto cleanup;
 385        }
 386
 387        RUNTIME_INFO(dev_priv)->num_engines = hweight32(mask);
 388
 389        i915_check_and_clear_faults(dev_priv);
 390
 391        return 0;
 392
 393cleanup:
 394        for_each_engine(engine, dev_priv, id)
 395                kfree(engine);
 396        return err;
 397}
 398
 399/**
 400 * intel_engines_init() - init the Engine Command Streamers
 401 * @dev_priv: i915 device private
 402 *
 403 * Return: non-zero if the initialization failed.
 404 */
 405int intel_engines_init(struct drm_i915_private *dev_priv)
 406{
 407        struct intel_engine_cs *engine;
 408        enum intel_engine_id id, err_id;
 409        int err;
 410
 411        for_each_engine(engine, dev_priv, id) {
 412                const struct engine_class_info *class_info =
 413                        &intel_engine_classes[engine->class];
 414                int (*init)(struct intel_engine_cs *engine);
 415
 416                if (HAS_EXECLISTS(dev_priv))
 417                        init = class_info->init_execlists;
 418                else
 419                        init = class_info->init_legacy;
 420
 421                err = -EINVAL;
 422                err_id = id;
 423
 424                if (GEM_DEBUG_WARN_ON(!init))
 425                        goto cleanup;
 426
 427                err = init(engine);
 428                if (err)
 429                        goto cleanup;
 430
 431                GEM_BUG_ON(!engine->submit_request);
 432        }
 433
 434        return 0;
 435
 436cleanup:
 437        for_each_engine(engine, dev_priv, id) {
 438                if (id >= err_id) {
 439                        kfree(engine);
 440                        dev_priv->engine[id] = NULL;
 441                } else {
 442                        dev_priv->gt.cleanup_engine(engine);
 443                }
 444        }
 445        return err;
 446}
 447
 448static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
 449{
 450        i915_gem_batch_pool_init(&engine->batch_pool, engine);
 451}
 452
 453static void intel_engine_init_execlist(struct intel_engine_cs *engine)
 454{
 455        struct intel_engine_execlists * const execlists = &engine->execlists;
 456
 457        execlists->port_mask = 1;
 458        GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
 459        GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
 460
 461        execlists->queue_priority_hint = INT_MIN;
 462        execlists->queue = RB_ROOT_CACHED;
 463}
 464
 465static void cleanup_status_page(struct intel_engine_cs *engine)
 466{
 467        struct i915_vma *vma;
 468
 469        /* Prevent writes into HWSP after returning the page to the system */
 470        intel_engine_set_hwsp_writemask(engine, ~0u);
 471
 472        vma = fetch_and_zero(&engine->status_page.vma);
 473        if (!vma)
 474                return;
 475
 476        if (!HWS_NEEDS_PHYSICAL(engine->i915))
 477                i915_vma_unpin(vma);
 478
 479        i915_gem_object_unpin_map(vma->obj);
 480        __i915_gem_object_release_unless_active(vma->obj);
 481}
 482
 483static int pin_ggtt_status_page(struct intel_engine_cs *engine,
 484                                struct i915_vma *vma)
 485{
 486        unsigned int flags;
 487
 488        flags = PIN_GLOBAL;
 489        if (!HAS_LLC(engine->i915))
 490                /*
 491                 * On g33, we cannot place HWS above 256MiB, so
 492                 * restrict its pinning to the low mappable arena.
 493                 * Though this restriction is not documented for
 494                 * gen4, gen5, or byt, they also behave similarly
 495                 * and hang if the HWS is placed at the top of the
 496                 * GTT. To generalise, it appears that all !llc
 497                 * platforms have issues with us placing the HWS
 498                 * above the mappable region (even though we never
 499                 * actually map it).
 500                 */
 501                flags |= PIN_MAPPABLE;
 502        else
 503                flags |= PIN_HIGH;
 504
 505        return i915_vma_pin(vma, 0, 0, flags);
 506}
 507
 508static int init_status_page(struct intel_engine_cs *engine)
 509{
 510        struct drm_i915_gem_object *obj;
 511        struct i915_vma *vma;
 512        void *vaddr;
 513        int ret;
 514
 515        /*
 516         * Though the HWS register does support 36bit addresses, historically
 517         * we have had hangs and corruption reported due to wild writes if
 518         * the HWS is placed above 4G. We only allow objects to be allocated
 519         * in GFP_DMA32 for i965, and no earlier physical address users had
 520         * access to more than 4G.
 521         */
 522        obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
 523        if (IS_ERR(obj)) {
 524                DRM_ERROR("Failed to allocate status page\n");
 525                return PTR_ERR(obj);
 526        }
 527
 528        i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
 529
 530        vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
 531        if (IS_ERR(vma)) {
 532                ret = PTR_ERR(vma);
 533                goto err;
 534        }
 535
 536        vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
 537        if (IS_ERR(vaddr)) {
 538                ret = PTR_ERR(vaddr);
 539                goto err;
 540        }
 541
 542        engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
 543        engine->status_page.vma = vma;
 544
 545        if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
 546                ret = pin_ggtt_status_page(engine, vma);
 547                if (ret)
 548                        goto err_unpin;
 549        }
 550
 551        return 0;
 552
 553err_unpin:
 554        i915_gem_object_unpin_map(obj);
 555err:
 556        i915_gem_object_put(obj);
 557        return ret;
 558}
 559
 560/**
 561 * intel_engines_setup_common - setup engine state not requiring hw access
 562 * @engine: Engine to setup.
 563 *
 564 * Initializes @engine@ structure members shared between legacy and execlists
 565 * submission modes which do not require hardware access.
 566 *
 567 * Typically done early in the submission mode specific engine setup stage.
 568 */
 569int intel_engine_setup_common(struct intel_engine_cs *engine)
 570{
 571        int err;
 572
 573        err = init_status_page(engine);
 574        if (err)
 575                return err;
 576
 577        err = i915_timeline_init(engine->i915,
 578                                 &engine->timeline,
 579                                 engine->status_page.vma);
 580        if (err)
 581                goto err_hwsp;
 582
 583        i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
 584
 585        intel_engine_init_breadcrumbs(engine);
 586        intel_engine_init_execlist(engine);
 587        intel_engine_init_hangcheck(engine);
 588        intel_engine_init_batch_pool(engine);
 589        intel_engine_init_cmd_parser(engine);
 590
 591        return 0;
 592
 593err_hwsp:
 594        cleanup_status_page(engine);
 595        return err;
 596}
 597
 598void intel_engines_set_scheduler_caps(struct drm_i915_private *i915)
 599{
 600        static const struct {
 601                u8 engine;
 602                u8 sched;
 603        } map[] = {
 604#define MAP(x, y) { ilog2(I915_ENGINE_HAS_##x), ilog2(I915_SCHEDULER_CAP_##y) }
 605                MAP(PREEMPTION, PREEMPTION),
 606                MAP(SEMAPHORES, SEMAPHORES),
 607#undef MAP
 608        };
 609        struct intel_engine_cs *engine;
 610        enum intel_engine_id id;
 611        u32 enabled, disabled;
 612
 613        enabled = 0;
 614        disabled = 0;
 615        for_each_engine(engine, i915, id) { /* all engines must agree! */
 616                int i;
 617
 618                if (engine->schedule)
 619                        enabled |= (I915_SCHEDULER_CAP_ENABLED |
 620                                    I915_SCHEDULER_CAP_PRIORITY);
 621                else
 622                        disabled |= (I915_SCHEDULER_CAP_ENABLED |
 623                                     I915_SCHEDULER_CAP_PRIORITY);
 624
 625                for (i = 0; i < ARRAY_SIZE(map); i++) {
 626                        if (engine->flags & BIT(map[i].engine))
 627                                enabled |= BIT(map[i].sched);
 628                        else
 629                                disabled |= BIT(map[i].sched);
 630                }
 631        }
 632
 633        i915->caps.scheduler = enabled & ~disabled;
 634        if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_ENABLED))
 635                i915->caps.scheduler = 0;
 636}
 637
 638struct measure_breadcrumb {
 639        struct i915_request rq;
 640        struct i915_timeline timeline;
 641        struct intel_ring ring;
 642        u32 cs[1024];
 643};
 644
 645static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
 646{
 647        struct measure_breadcrumb *frame;
 648        int dw = -ENOMEM;
 649
 650        GEM_BUG_ON(!engine->i915->gt.scratch);
 651
 652        frame = kzalloc(sizeof(*frame), GFP_KERNEL);
 653        if (!frame)
 654                return -ENOMEM;
 655
 656        if (i915_timeline_init(engine->i915,
 657                               &frame->timeline,
 658                               engine->status_page.vma))
 659                goto out_frame;
 660
 661        INIT_LIST_HEAD(&frame->ring.request_list);
 662        frame->ring.timeline = &frame->timeline;
 663        frame->ring.vaddr = frame->cs;
 664        frame->ring.size = sizeof(frame->cs);
 665        frame->ring.effective_size = frame->ring.size;
 666        intel_ring_update_space(&frame->ring);
 667
 668        frame->rq.i915 = engine->i915;
 669        frame->rq.engine = engine;
 670        frame->rq.ring = &frame->ring;
 671        frame->rq.timeline = &frame->timeline;
 672
 673        dw = i915_timeline_pin(&frame->timeline);
 674        if (dw < 0)
 675                goto out_timeline;
 676
 677        dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
 678
 679        i915_timeline_unpin(&frame->timeline);
 680
 681out_timeline:
 682        i915_timeline_fini(&frame->timeline);
 683out_frame:
 684        kfree(frame);
 685        return dw;
 686}
 687
 688static int pin_context(struct i915_gem_context *ctx,
 689                       struct intel_engine_cs *engine,
 690                       struct intel_context **out)
 691{
 692        struct intel_context *ce;
 693
 694        ce = intel_context_pin(ctx, engine);
 695        if (IS_ERR(ce))
 696                return PTR_ERR(ce);
 697
 698        *out = ce;
 699        return 0;
 700}
 701
 702/**
 703 * intel_engines_init_common - initialize cengine state which might require hw access
 704 * @engine: Engine to initialize.
 705 *
 706 * Initializes @engine@ structure members shared between legacy and execlists
 707 * submission modes which do require hardware access.
 708 *
 709 * Typcally done at later stages of submission mode specific engine setup.
 710 *
 711 * Returns zero on success or an error code on failure.
 712 */
 713int intel_engine_init_common(struct intel_engine_cs *engine)
 714{
 715        struct drm_i915_private *i915 = engine->i915;
 716        int ret;
 717
 718        /* We may need to do things with the shrinker which
 719         * require us to immediately switch back to the default
 720         * context. This can cause a problem as pinning the
 721         * default context also requires GTT space which may not
 722         * be available. To avoid this we always pin the default
 723         * context.
 724         */
 725        ret = pin_context(i915->kernel_context, engine,
 726                          &engine->kernel_context);
 727        if (ret)
 728                return ret;
 729
 730        /*
 731         * Similarly the preempt context must always be available so that
 732         * we can interrupt the engine at any time. However, as preemption
 733         * is optional, we allow it to fail.
 734         */
 735        if (i915->preempt_context)
 736                pin_context(i915->preempt_context, engine,
 737                            &engine->preempt_context);
 738
 739        ret = measure_breadcrumb_dw(engine);
 740        if (ret < 0)
 741                goto err_unpin;
 742
 743        engine->emit_fini_breadcrumb_dw = ret;
 744
 745        engine->set_default_submission(engine);
 746
 747        return 0;
 748
 749err_unpin:
 750        if (engine->preempt_context)
 751                intel_context_unpin(engine->preempt_context);
 752        intel_context_unpin(engine->kernel_context);
 753        return ret;
 754}
 755
 756void intel_gt_resume(struct drm_i915_private *i915)
 757{
 758        struct intel_engine_cs *engine;
 759        enum intel_engine_id id;
 760
 761        /*
 762         * After resume, we may need to poke into the pinned kernel
 763         * contexts to paper over any damage caused by the sudden suspend.
 764         * Only the kernel contexts should remain pinned over suspend,
 765         * allowing us to fixup the user contexts on their first pin.
 766         */
 767        for_each_engine(engine, i915, id) {
 768                struct intel_context *ce;
 769
 770                ce = engine->kernel_context;
 771                if (ce)
 772                        ce->ops->reset(ce);
 773
 774                ce = engine->preempt_context;
 775                if (ce)
 776                        ce->ops->reset(ce);
 777        }
 778}
 779
 780/**
 781 * intel_engines_cleanup_common - cleans up the engine state created by
 782 *                                the common initiailizers.
 783 * @engine: Engine to cleanup.
 784 *
 785 * This cleans up everything created by the common helpers.
 786 */
 787void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 788{
 789        cleanup_status_page(engine);
 790
 791        intel_engine_fini_breadcrumbs(engine);
 792        intel_engine_cleanup_cmd_parser(engine);
 793        i915_gem_batch_pool_fini(&engine->batch_pool);
 794
 795        if (engine->default_state)
 796                i915_gem_object_put(engine->default_state);
 797
 798        if (engine->preempt_context)
 799                intel_context_unpin(engine->preempt_context);
 800        intel_context_unpin(engine->kernel_context);
 801
 802        i915_timeline_fini(&engine->timeline);
 803
 804        intel_wa_list_free(&engine->ctx_wa_list);
 805        intel_wa_list_free(&engine->wa_list);
 806        intel_wa_list_free(&engine->whitelist);
 807}
 808
 809u64 intel_engine_get_active_head(const struct intel_engine_cs *engine)
 810{
 811        struct drm_i915_private *i915 = engine->i915;
 812
 813        u64 acthd;
 814
 815        if (INTEL_GEN(i915) >= 8)
 816                acthd = ENGINE_READ64(engine, RING_ACTHD, RING_ACTHD_UDW);
 817        else if (INTEL_GEN(i915) >= 4)
 818                acthd = ENGINE_READ(engine, RING_ACTHD);
 819        else
 820                acthd = ENGINE_READ(engine, ACTHD);
 821
 822        return acthd;
 823}
 824
 825u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)
 826{
 827        u64 bbaddr;
 828
 829        if (INTEL_GEN(engine->i915) >= 8)
 830                bbaddr = ENGINE_READ64(engine, RING_BBADDR, RING_BBADDR_UDW);
 831        else
 832                bbaddr = ENGINE_READ(engine, RING_BBADDR);
 833
 834        return bbaddr;
 835}
 836
 837int intel_engine_stop_cs(struct intel_engine_cs *engine)
 838{
 839        struct intel_uncore *uncore = engine->uncore;
 840        const u32 base = engine->mmio_base;
 841        const i915_reg_t mode = RING_MI_MODE(base);
 842        int err;
 843
 844        if (INTEL_GEN(engine->i915) < 3)
 845                return -ENODEV;
 846
 847        GEM_TRACE("%s\n", engine->name);
 848
 849        intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
 850
 851        err = 0;
 852        if (__intel_wait_for_register_fw(uncore,
 853                                         mode, MODE_IDLE, MODE_IDLE,
 854                                         1000, 0,
 855                                         NULL)) {
 856                GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name);
 857                err = -ETIMEDOUT;
 858        }
 859
 860        /* A final mmio read to let GPU writes be hopefully flushed to memory */
 861        intel_uncore_posting_read_fw(uncore, mode);
 862
 863        return err;
 864}
 865
 866void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
 867{
 868        GEM_TRACE("%s\n", engine->name);
 869
 870        ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
 871}
 872
 873const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
 874{
 875        switch (type) {
 876        case I915_CACHE_NONE: return " uncached";
 877        case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped";
 878        case I915_CACHE_L3_LLC: return " L3+LLC";
 879        case I915_CACHE_WT: return " WT";
 880        default: return "";
 881        }
 882}
 883
 884u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv)
 885{
 886        const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
 887        u32 mcr_s_ss_select;
 888        u32 slice = fls(sseu->slice_mask);
 889        u32 subslice = fls(sseu->subslice_mask[slice]);
 890
 891        if (IS_GEN(dev_priv, 10))
 892                mcr_s_ss_select = GEN8_MCR_SLICE(slice) |
 893                                  GEN8_MCR_SUBSLICE(subslice);
 894        else if (INTEL_GEN(dev_priv) >= 11)
 895                mcr_s_ss_select = GEN11_MCR_SLICE(slice) |
 896                                  GEN11_MCR_SUBSLICE(subslice);
 897        else
 898                mcr_s_ss_select = 0;
 899
 900        return mcr_s_ss_select;
 901}
 902
 903static inline u32
 904read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
 905                  int subslice, i915_reg_t reg)
 906{
 907        struct intel_uncore *uncore = &dev_priv->uncore;
 908        u32 mcr_slice_subslice_mask;
 909        u32 mcr_slice_subslice_select;
 910        u32 default_mcr_s_ss_select;
 911        u32 mcr;
 912        u32 ret;
 913        enum forcewake_domains fw_domains;
 914
 915        if (INTEL_GEN(dev_priv) >= 11) {
 916                mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
 917                                          GEN11_MCR_SUBSLICE_MASK;
 918                mcr_slice_subslice_select = GEN11_MCR_SLICE(slice) |
 919                                            GEN11_MCR_SUBSLICE(subslice);
 920        } else {
 921                mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
 922                                          GEN8_MCR_SUBSLICE_MASK;
 923                mcr_slice_subslice_select = GEN8_MCR_SLICE(slice) |
 924                                            GEN8_MCR_SUBSLICE(subslice);
 925        }
 926
 927        default_mcr_s_ss_select = intel_calculate_mcr_s_ss_select(dev_priv);
 928
 929        fw_domains = intel_uncore_forcewake_for_reg(uncore, reg,
 930                                                    FW_REG_READ);
 931        fw_domains |= intel_uncore_forcewake_for_reg(uncore,
 932                                                     GEN8_MCR_SELECTOR,
 933                                                     FW_REG_READ | FW_REG_WRITE);
 934
 935        spin_lock_irq(&uncore->lock);
 936        intel_uncore_forcewake_get__locked(uncore, fw_domains);
 937
 938        mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
 939
 940        WARN_ON_ONCE((mcr & mcr_slice_subslice_mask) !=
 941                     default_mcr_s_ss_select);
 942
 943        mcr &= ~mcr_slice_subslice_mask;
 944        mcr |= mcr_slice_subslice_select;
 945        intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
 946
 947        ret = intel_uncore_read_fw(uncore, reg);
 948
 949        mcr &= ~mcr_slice_subslice_mask;
 950        mcr |= default_mcr_s_ss_select;
 951
 952        intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
 953
 954        intel_uncore_forcewake_put__locked(uncore, fw_domains);
 955        spin_unlock_irq(&uncore->lock);
 956
 957        return ret;
 958}
 959
 960/* NB: please notice the memset */
 961void intel_engine_get_instdone(struct intel_engine_cs *engine,
 962                               struct intel_instdone *instdone)
 963{
 964        struct drm_i915_private *dev_priv = engine->i915;
 965        struct intel_uncore *uncore = engine->uncore;
 966        u32 mmio_base = engine->mmio_base;
 967        int slice;
 968        int subslice;
 969
 970        memset(instdone, 0, sizeof(*instdone));
 971
 972        switch (INTEL_GEN(dev_priv)) {
 973        default:
 974                instdone->instdone =
 975                        intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
 976
 977                if (engine->id != RCS0)
 978                        break;
 979
 980                instdone->slice_common =
 981                        intel_uncore_read(uncore, GEN7_SC_INSTDONE);
 982                for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
 983                        instdone->sampler[slice][subslice] =
 984                                read_subslice_reg(dev_priv, slice, subslice,
 985                                                  GEN7_SAMPLER_INSTDONE);
 986                        instdone->row[slice][subslice] =
 987                                read_subslice_reg(dev_priv, slice, subslice,
 988                                                  GEN7_ROW_INSTDONE);
 989                }
 990                break;
 991        case 7:
 992                instdone->instdone =
 993                        intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
 994
 995                if (engine->id != RCS0)
 996                        break;
 997
 998                instdone->slice_common =
 999                        intel_uncore_read(uncore, GEN7_SC_INSTDONE);
1000                instdone->sampler[0][0] =
1001                        intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE);
1002                instdone->row[0][0] =
1003                        intel_uncore_read(uncore, GEN7_ROW_INSTDONE);
1004
1005                break;
1006        case 6:
1007        case 5:
1008        case 4:
1009                instdone->instdone =
1010                        intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
1011                if (engine->id == RCS0)
1012                        /* HACK: Using the wrong struct member */
1013                        instdone->slice_common =
1014                                intel_uncore_read(uncore, GEN4_INSTDONE1);
1015                break;
1016        case 3:
1017        case 2:
1018                instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE);
1019                break;
1020        }
1021}
1022
1023static bool ring_is_idle(struct intel_engine_cs *engine)
1024{
1025        struct drm_i915_private *dev_priv = engine->i915;
1026        intel_wakeref_t wakeref;
1027        bool idle = true;
1028
1029        if (I915_SELFTEST_ONLY(!engine->mmio_base))
1030                return true;
1031
1032        /* If the whole device is asleep, the engine must be idle */
1033        wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
1034        if (!wakeref)
1035                return true;
1036
1037        /* First check that no commands are left in the ring */
1038        if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) !=
1039            (ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR))
1040                idle = false;
1041
1042        /* No bit for gen2, so assume the CS parser is idle */
1043        if (INTEL_GEN(dev_priv) > 2 &&
1044            !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE))
1045                idle = false;
1046
1047        intel_runtime_pm_put(dev_priv, wakeref);
1048
1049        return idle;
1050}
1051
1052/**
1053 * intel_engine_is_idle() - Report if the engine has finished process all work
1054 * @engine: the intel_engine_cs
1055 *
1056 * Return true if there are no requests pending, nothing left to be submitted
1057 * to hardware, and that the engine is idle.
1058 */
1059bool intel_engine_is_idle(struct intel_engine_cs *engine)
1060{
1061        /* More white lies, if wedged, hw state is inconsistent */
1062        if (i915_reset_failed(engine->i915))
1063                return true;
1064
1065        /* Waiting to drain ELSP? */
1066        if (READ_ONCE(engine->execlists.active)) {
1067                struct tasklet_struct *t = &engine->execlists.tasklet;
1068
1069                local_bh_disable();
1070                if (tasklet_trylock(t)) {
1071                        /* Must wait for any GPU reset in progress. */
1072                        if (__tasklet_is_enabled(t))
1073                                t->func(t->data);
1074                        tasklet_unlock(t);
1075                }
1076                local_bh_enable();
1077
1078                /* Otherwise flush the tasklet if it was on another cpu */
1079                tasklet_unlock_wait(t);
1080
1081                if (READ_ONCE(engine->execlists.active))
1082                        return false;
1083        }
1084
1085        /* ELSP is empty, but there are ready requests? E.g. after reset */
1086        if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root))
1087                return false;
1088
1089        /* Ring stopped? */
1090        return ring_is_idle(engine);
1091}
1092
1093bool intel_engines_are_idle(struct drm_i915_private *i915)
1094{
1095        struct intel_engine_cs *engine;
1096        enum intel_engine_id id;
1097
1098        /*
1099         * If the driver is wedged, HW state may be very inconsistent and
1100         * report that it is still busy, even though we have stopped using it.
1101         */
1102        if (i915_reset_failed(i915))
1103                return true;
1104
1105        /* Already parked (and passed an idleness test); must still be idle */
1106        if (!READ_ONCE(i915->gt.awake))
1107                return true;
1108
1109        for_each_engine(engine, i915, id) {
1110                if (!intel_engine_is_idle(engine))
1111                        return false;
1112        }
1113
1114        return true;
1115}
1116
1117void intel_engines_reset_default_submission(struct drm_i915_private *i915)
1118{
1119        struct intel_engine_cs *engine;
1120        enum intel_engine_id id;
1121
1122        for_each_engine(engine, i915, id)
1123                engine->set_default_submission(engine);
1124}
1125
1126static bool reset_engines(struct drm_i915_private *i915)
1127{
1128        if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
1129                return false;
1130
1131        return intel_gpu_reset(i915, ALL_ENGINES) == 0;
1132}
1133
1134/**
1135 * intel_engines_sanitize: called after the GPU has lost power
1136 * @i915: the i915 device
1137 * @force: ignore a failed reset and sanitize engine state anyway
1138 *
1139 * Anytime we reset the GPU, either with an explicit GPU reset or through a
1140 * PCI power cycle, the GPU loses state and we must reset our state tracking
1141 * to match. Note that calling intel_engines_sanitize() if the GPU has not
1142 * been reset results in much confusion!
1143 */
1144void intel_engines_sanitize(struct drm_i915_private *i915, bool force)
1145{
1146        struct intel_engine_cs *engine;
1147        enum intel_engine_id id;
1148
1149        GEM_TRACE("\n");
1150
1151        if (!reset_engines(i915) && !force)
1152                return;
1153
1154        for_each_engine(engine, i915, id)
1155                intel_engine_reset(engine, false);
1156}
1157
1158/**
1159 * intel_engines_park: called when the GT is transitioning from busy->idle
1160 * @i915: the i915 device
1161 *
1162 * The GT is now idle and about to go to sleep (maybe never to wake again?).
1163 * Time for us to tidy and put away our toys (release resources back to the
1164 * system).
1165 */
1166void intel_engines_park(struct drm_i915_private *i915)
1167{
1168        struct intel_engine_cs *engine;
1169        enum intel_engine_id id;
1170
1171        for_each_engine(engine, i915, id) {
1172                /* Flush the residual irq tasklets first. */
1173                intel_engine_disarm_breadcrumbs(engine);
1174                tasklet_kill(&engine->execlists.tasklet);
1175
1176                /*
1177                 * We are committed now to parking the engines, make sure there
1178                 * will be no more interrupts arriving later and the engines
1179                 * are truly idle.
1180                 */
1181                if (wait_for(intel_engine_is_idle(engine), 10)) {
1182                        struct drm_printer p = drm_debug_printer(__func__);
1183
1184                        dev_err(i915->drm.dev,
1185                                "%s is not idle before parking\n",
1186                                engine->name);
1187                        intel_engine_dump(engine, &p, NULL);
1188                }
1189
1190                /* Must be reset upon idling, or we may miss the busy wakeup. */
1191                GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
1192
1193                if (engine->park)
1194                        engine->park(engine);
1195
1196                if (engine->pinned_default_state) {
1197                        i915_gem_object_unpin_map(engine->default_state);
1198                        engine->pinned_default_state = NULL;
1199                }
1200
1201                i915_gem_batch_pool_fini(&engine->batch_pool);
1202                engine->execlists.no_priolist = false;
1203        }
1204
1205        i915->gt.active_engines = 0;
1206}
1207
1208/**
1209 * intel_engines_unpark: called when the GT is transitioning from idle->busy
1210 * @i915: the i915 device
1211 *
1212 * The GT was idle and now about to fire up with some new user requests.
1213 */
1214void intel_engines_unpark(struct drm_i915_private *i915)
1215{
1216        struct intel_engine_cs *engine;
1217        enum intel_engine_id id;
1218
1219        for_each_engine(engine, i915, id) {
1220                void *map;
1221
1222                /* Pin the default state for fast resets from atomic context. */
1223                map = NULL;
1224                if (engine->default_state)
1225                        map = i915_gem_object_pin_map(engine->default_state,
1226                                                      I915_MAP_WB);
1227                if (!IS_ERR_OR_NULL(map))
1228                        engine->pinned_default_state = map;
1229
1230                if (engine->unpark)
1231                        engine->unpark(engine);
1232
1233                intel_engine_init_hangcheck(engine);
1234        }
1235}
1236
1237/**
1238 * intel_engine_lost_context: called when the GPU is reset into unknown state
1239 * @engine: the engine
1240 *
1241 * We have either reset the GPU or otherwise about to lose state tracking of
1242 * the current GPU logical state (e.g. suspend). On next use, it is therefore
1243 * imperative that we make no presumptions about the current state and load
1244 * from scratch.
1245 */
1246void intel_engine_lost_context(struct intel_engine_cs *engine)
1247{
1248        struct intel_context *ce;
1249
1250        lockdep_assert_held(&engine->i915->drm.struct_mutex);
1251
1252        ce = fetch_and_zero(&engine->last_retired_context);
1253        if (ce)
1254                intel_context_unpin(ce);
1255}
1256
1257bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
1258{
1259        switch (INTEL_GEN(engine->i915)) {
1260        case 2:
1261                return false; /* uses physical not virtual addresses */
1262        case 3:
1263                /* maybe only uses physical not virtual addresses */
1264                return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915));
1265        case 6:
1266                return engine->class != VIDEO_DECODE_CLASS; /* b0rked */
1267        default:
1268                return true;
1269        }
1270}
1271
1272unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
1273{
1274        struct intel_engine_cs *engine;
1275        enum intel_engine_id id;
1276        unsigned int which;
1277
1278        which = 0;
1279        for_each_engine(engine, i915, id)
1280                if (engine->default_state)
1281                        which |= BIT(engine->uabi_class);
1282
1283        return which;
1284}
1285
1286static int print_sched_attr(struct drm_i915_private *i915,
1287                            const struct i915_sched_attr *attr,
1288                            char *buf, int x, int len)
1289{
1290        if (attr->priority == I915_PRIORITY_INVALID)
1291                return x;
1292
1293        x += snprintf(buf + x, len - x,
1294                      " prio=%d", attr->priority);
1295
1296        return x;
1297}
1298
1299static void print_request(struct drm_printer *m,
1300                          struct i915_request *rq,
1301                          const char *prefix)
1302{
1303        const char *name = rq->fence.ops->get_timeline_name(&rq->fence);
1304        char buf[80] = "";
1305        int x = 0;
1306
1307        x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf));
1308
1309        drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n",
1310                   prefix,
1311                   rq->fence.context, rq->fence.seqno,
1312                   i915_request_completed(rq) ? "!" :
1313                   i915_request_started(rq) ? "*" :
1314                   "",
1315                   test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
1316                            &rq->fence.flags) ?  "+" : "",
1317                   buf,
1318                   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
1319                   name);
1320}
1321
1322static void hexdump(struct drm_printer *m, const void *buf, size_t len)
1323{
1324        const size_t rowsize = 8 * sizeof(u32);
1325        const void *prev = NULL;
1326        bool skip = false;
1327        size_t pos;
1328
1329        for (pos = 0; pos < len; pos += rowsize) {
1330                char line[128];
1331
1332                if (prev && !memcmp(prev, buf + pos, rowsize)) {
1333                        if (!skip) {
1334                                drm_printf(m, "*\n");
1335                                skip = true;
1336                        }
1337                        continue;
1338                }
1339
1340                WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
1341                                                rowsize, sizeof(u32),
1342                                                line, sizeof(line),
1343                                                false) >= sizeof(line));
1344                drm_printf(m, "[%04zx] %s\n", pos, line);
1345
1346                prev = buf + pos;
1347                skip = false;
1348        }
1349}
1350
1351static void intel_engine_print_registers(const struct intel_engine_cs *engine,
1352                                         struct drm_printer *m)
1353{
1354        struct drm_i915_private *dev_priv = engine->i915;
1355        const struct intel_engine_execlists * const execlists =
1356                &engine->execlists;
1357        u64 addr;
1358
1359        if (engine->id == RCS0 && IS_GEN_RANGE(dev_priv, 4, 7))
1360                drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
1361        drm_printf(m, "\tRING_START: 0x%08x\n",
1362                   ENGINE_READ(engine, RING_START));
1363        drm_printf(m, "\tRING_HEAD:  0x%08x\n",
1364                   ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR);
1365        drm_printf(m, "\tRING_TAIL:  0x%08x\n",
1366                   ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR);
1367        drm_printf(m, "\tRING_CTL:   0x%08x%s\n",
1368                   ENGINE_READ(engine, RING_CTL),
1369                   ENGINE_READ(engine, RING_CTL) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : "");
1370        if (INTEL_GEN(engine->i915) > 2) {
1371                drm_printf(m, "\tRING_MODE:  0x%08x%s\n",
1372                           ENGINE_READ(engine, RING_MI_MODE),
1373                           ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : "");
1374        }
1375
1376        if (INTEL_GEN(dev_priv) >= 6) {
1377                drm_printf(m, "\tRING_IMR: %08x\n",
1378                           ENGINE_READ(engine, RING_IMR));
1379        }
1380
1381        addr = intel_engine_get_active_head(engine);
1382        drm_printf(m, "\tACTHD:  0x%08x_%08x\n",
1383                   upper_32_bits(addr), lower_32_bits(addr));
1384        addr = intel_engine_get_last_batch_head(engine);
1385        drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
1386                   upper_32_bits(addr), lower_32_bits(addr));
1387        if (INTEL_GEN(dev_priv) >= 8)
1388                addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW);
1389        else if (INTEL_GEN(dev_priv) >= 4)
1390                addr = ENGINE_READ(engine, RING_DMA_FADD);
1391        else
1392                addr = ENGINE_READ(engine, DMA_FADD_I8XX);
1393        drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n",
1394                   upper_32_bits(addr), lower_32_bits(addr));
1395        if (INTEL_GEN(dev_priv) >= 4) {
1396                drm_printf(m, "\tIPEIR: 0x%08x\n",
1397                           ENGINE_READ(engine, RING_IPEIR));
1398                drm_printf(m, "\tIPEHR: 0x%08x\n",
1399                           ENGINE_READ(engine, RING_IPEHR));
1400        } else {
1401                drm_printf(m, "\tIPEIR: 0x%08x\n", ENGINE_READ(engine, IPEIR));
1402                drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
1403        }
1404
1405        if (HAS_EXECLISTS(dev_priv)) {
1406                const u32 *hws =
1407                        &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
1408                const u8 num_entries = execlists->csb_size;
1409                unsigned int idx;
1410                u8 read, write;
1411
1412                drm_printf(m, "\tExeclist status: 0x%08x %08x, entries %u\n",
1413                           ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
1414                           ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
1415                           num_entries);
1416
1417                read = execlists->csb_head;
1418                write = READ_ONCE(*execlists->csb_write);
1419
1420                drm_printf(m, "\tExeclist CSB read %d, write %d, tasklet queued? %s (%s)\n",
1421                           read, write,
1422                           yesno(test_bit(TASKLET_STATE_SCHED,
1423                                          &engine->execlists.tasklet.state)),
1424                           enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
1425                if (read >= num_entries)
1426                        read = 0;
1427                if (write >= num_entries)
1428                        write = 0;
1429                if (read > write)
1430                        write += num_entries;
1431                while (read < write) {
1432                        idx = ++read % num_entries;
1433                        drm_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
1434                                   idx, hws[idx * 2], hws[idx * 2 + 1]);
1435                }
1436
1437                rcu_read_lock();
1438                for (idx = 0; idx < execlists_num_ports(execlists); idx++) {
1439                        struct i915_request *rq;
1440                        unsigned int count;
1441
1442                        rq = port_unpack(&execlists->port[idx], &count);
1443                        if (rq) {
1444                                char hdr[80];
1445
1446                                snprintf(hdr, sizeof(hdr),
1447                                         "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
1448                                         idx, count,
1449                                         i915_ggtt_offset(rq->ring->vma),
1450                                         rq->timeline->hwsp_offset,
1451                                         hwsp_seqno(rq));
1452                                print_request(m, rq, hdr);
1453                        } else {
1454                                drm_printf(m, "\t\tELSP[%d] idle\n", idx);
1455                        }
1456                }
1457                drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active);
1458                rcu_read_unlock();
1459        } else if (INTEL_GEN(dev_priv) > 6) {
1460                drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
1461                           ENGINE_READ(engine, RING_PP_DIR_BASE));
1462                drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n",
1463                           ENGINE_READ(engine, RING_PP_DIR_BASE_READ));
1464                drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n",
1465                           ENGINE_READ(engine, RING_PP_DIR_DCLV));
1466        }
1467}
1468
1469static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
1470{
1471        void *ring;
1472        int size;
1473
1474        drm_printf(m,
1475                   "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
1476                   rq->head, rq->postfix, rq->tail,
1477                   rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
1478                   rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
1479
1480        size = rq->tail - rq->head;
1481        if (rq->tail < rq->head)
1482                size += rq->ring->size;
1483
1484        ring = kmalloc(size, GFP_ATOMIC);
1485        if (ring) {
1486                const void *vaddr = rq->ring->vaddr;
1487                unsigned int head = rq->head;
1488                unsigned int len = 0;
1489
1490                if (rq->tail < head) {
1491                        len = rq->ring->size - head;
1492                        memcpy(ring, vaddr + head, len);
1493                        head = 0;
1494                }
1495                memcpy(ring + len, vaddr + head, size - len);
1496
1497                hexdump(m, ring, size);
1498                kfree(ring);
1499        }
1500}
1501
1502void intel_engine_dump(struct intel_engine_cs *engine,
1503                       struct drm_printer *m,
1504                       const char *header, ...)
1505{
1506        struct i915_gpu_error * const error = &engine->i915->gpu_error;
1507        struct i915_request *rq;
1508        intel_wakeref_t wakeref;
1509
1510        if (header) {
1511                va_list ap;
1512
1513                va_start(ap, header);
1514                drm_vprintf(m, header, &ap);
1515                va_end(ap);
1516        }
1517
1518        if (i915_reset_failed(engine->i915))
1519                drm_printf(m, "*** WEDGED ***\n");
1520
1521        drm_printf(m, "\tHangcheck %x:%x [%d ms]\n",
1522                   engine->hangcheck.last_seqno,
1523                   engine->hangcheck.next_seqno,
1524                   jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
1525        drm_printf(m, "\tReset count: %d (global %d)\n",
1526                   i915_reset_engine_count(error, engine),
1527                   i915_reset_count(error));
1528
1529        rcu_read_lock();
1530
1531        drm_printf(m, "\tRequests:\n");
1532
1533        rq = list_first_entry(&engine->timeline.requests,
1534                              struct i915_request, link);
1535        if (&rq->link != &engine->timeline.requests)
1536                print_request(m, rq, "\t\tfirst  ");
1537
1538        rq = list_last_entry(&engine->timeline.requests,
1539                             struct i915_request, link);
1540        if (&rq->link != &engine->timeline.requests)
1541                print_request(m, rq, "\t\tlast   ");
1542
1543        rq = intel_engine_find_active_request(engine);
1544        if (rq) {
1545                print_request(m, rq, "\t\tactive ");
1546
1547                drm_printf(m, "\t\tring->start:  0x%08x\n",
1548                           i915_ggtt_offset(rq->ring->vma));
1549                drm_printf(m, "\t\tring->head:   0x%08x\n",
1550                           rq->ring->head);
1551                drm_printf(m, "\t\tring->tail:   0x%08x\n",
1552                           rq->ring->tail);
1553                drm_printf(m, "\t\tring->emit:   0x%08x\n",
1554                           rq->ring->emit);
1555                drm_printf(m, "\t\tring->space:  0x%08x\n",
1556                           rq->ring->space);
1557                drm_printf(m, "\t\tring->hwsp:   0x%08x\n",
1558                           rq->timeline->hwsp_offset);
1559
1560                print_request_ring(m, rq);
1561        }
1562
1563        rcu_read_unlock();
1564
1565        wakeref = intel_runtime_pm_get_if_in_use(engine->i915);
1566        if (wakeref) {
1567                intel_engine_print_registers(engine, m);
1568                intel_runtime_pm_put(engine->i915, wakeref);
1569        } else {
1570                drm_printf(m, "\tDevice is asleep; skipping register dump\n");
1571        }
1572
1573        intel_execlists_show_requests(engine, m, print_request, 8);
1574
1575        drm_printf(m, "HWSP:\n");
1576        hexdump(m, engine->status_page.addr, PAGE_SIZE);
1577
1578        drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
1579
1580        intel_engine_print_breadcrumbs(engine, m);
1581}
1582
1583static u8 user_class_map[] = {
1584        [I915_ENGINE_CLASS_RENDER] = RENDER_CLASS,
1585        [I915_ENGINE_CLASS_COPY] = COPY_ENGINE_CLASS,
1586        [I915_ENGINE_CLASS_VIDEO] = VIDEO_DECODE_CLASS,
1587        [I915_ENGINE_CLASS_VIDEO_ENHANCE] = VIDEO_ENHANCEMENT_CLASS,
1588};
1589
1590struct intel_engine_cs *
1591intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
1592{
1593        if (class >= ARRAY_SIZE(user_class_map))
1594                return NULL;
1595
1596        class = user_class_map[class];
1597
1598        GEM_BUG_ON(class > MAX_ENGINE_CLASS);
1599
1600        if (instance > MAX_ENGINE_INSTANCE)
1601                return NULL;
1602
1603        return i915->engine_class[class][instance];
1604}
1605
1606/**
1607 * intel_enable_engine_stats() - Enable engine busy tracking on engine
1608 * @engine: engine to enable stats collection
1609 *
1610 * Start collecting the engine busyness data for @engine.
1611 *
1612 * Returns 0 on success or a negative error code.
1613 */
1614int intel_enable_engine_stats(struct intel_engine_cs *engine)
1615{
1616        struct intel_engine_execlists *execlists = &engine->execlists;
1617        unsigned long flags;
1618        int err = 0;
1619
1620        if (!intel_engine_supports_stats(engine))
1621                return -ENODEV;
1622
1623        spin_lock_irqsave(&engine->timeline.lock, flags);
1624        write_seqlock(&engine->stats.lock);
1625
1626        if (unlikely(engine->stats.enabled == ~0)) {
1627                err = -EBUSY;
1628                goto unlock;
1629        }
1630
1631        if (engine->stats.enabled++ == 0) {
1632                const struct execlist_port *port = execlists->port;
1633                unsigned int num_ports = execlists_num_ports(execlists);
1634
1635                engine->stats.enabled_at = ktime_get();
1636
1637                /* XXX submission method oblivious? */
1638                while (num_ports-- && port_isset(port)) {
1639                        engine->stats.active++;
1640                        port++;
1641                }
1642
1643                if (engine->stats.active)
1644                        engine->stats.start = engine->stats.enabled_at;
1645        }
1646
1647unlock:
1648        write_sequnlock(&engine->stats.lock);
1649        spin_unlock_irqrestore(&engine->timeline.lock, flags);
1650
1651        return err;
1652}
1653
1654static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
1655{
1656        ktime_t total = engine->stats.total;
1657
1658        /*
1659         * If the engine is executing something at the moment
1660         * add it to the total.
1661         */
1662        if (engine->stats.active)
1663                total = ktime_add(total,
1664                                  ktime_sub(ktime_get(), engine->stats.start));
1665
1666        return total;
1667}
1668
1669/**
1670 * intel_engine_get_busy_time() - Return current accumulated engine busyness
1671 * @engine: engine to report on
1672 *
1673 * Returns accumulated time @engine was busy since engine stats were enabled.
1674 */
1675ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine)
1676{
1677        unsigned int seq;
1678        ktime_t total;
1679
1680        do {
1681                seq = read_seqbegin(&engine->stats.lock);
1682                total = __intel_engine_get_busy_time(engine);
1683        } while (read_seqretry(&engine->stats.lock, seq));
1684
1685        return total;
1686}
1687
1688/**
1689 * intel_disable_engine_stats() - Disable engine busy tracking on engine
1690 * @engine: engine to disable stats collection
1691 *
1692 * Stops collecting the engine busyness data for @engine.
1693 */
1694void intel_disable_engine_stats(struct intel_engine_cs *engine)
1695{
1696        unsigned long flags;
1697
1698        if (!intel_engine_supports_stats(engine))
1699                return;
1700
1701        write_seqlock_irqsave(&engine->stats.lock, flags);
1702        WARN_ON_ONCE(engine->stats.enabled == 0);
1703        if (--engine->stats.enabled == 0) {
1704                engine->stats.total = __intel_engine_get_busy_time(engine);
1705                engine->stats.active = 0;
1706        }
1707        write_sequnlock_irqrestore(&engine->stats.lock, flags);
1708}
1709
1710static bool match_ring(struct i915_request *rq)
1711{
1712        u32 ring = ENGINE_READ(rq->engine, RING_START);
1713
1714        return ring == i915_ggtt_offset(rq->ring->vma);
1715}
1716
1717struct i915_request *
1718intel_engine_find_active_request(struct intel_engine_cs *engine)
1719{
1720        struct i915_request *request, *active = NULL;
1721        unsigned long flags;
1722
1723        /*
1724         * We are called by the error capture, reset and to dump engine
1725         * state at random points in time. In particular, note that neither is
1726         * crucially ordered with an interrupt. After a hang, the GPU is dead
1727         * and we assume that no more writes can happen (we waited long enough
1728         * for all writes that were in transaction to be flushed) - adding an
1729         * extra delay for a recent interrupt is pointless. Hence, we do
1730         * not need an engine->irq_seqno_barrier() before the seqno reads.
1731         * At all other times, we must assume the GPU is still running, but
1732         * we only care about the snapshot of this moment.
1733         */
1734        spin_lock_irqsave(&engine->timeline.lock, flags);
1735        list_for_each_entry(request, &engine->timeline.requests, link) {
1736                if (i915_request_completed(request))
1737                        continue;
1738
1739                if (!i915_request_started(request))
1740                        break;
1741
1742                /* More than one preemptible request may match! */
1743                if (!match_ring(request))
1744                        break;
1745
1746                active = request;
1747                break;
1748        }
1749        spin_unlock_irqrestore(&engine->timeline.lock, flags);
1750
1751        return active;
1752}
1753
1754#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1755#include "selftests/mock_engine.c"
1756#include "selftests/intel_engine_cs.c"
1757#endif
1758