linux/drivers/gpu/drm/i915/i915_guc_submission.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2014 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 */
  24#include <linux/firmware.h>
  25#include <linux/circ_buf.h>
  26#include "i915_drv.h"
  27#include "intel_guc.h"
  28
  29/**
  30 * DOC: GuC-based command submission
  31 *
  32 * i915_guc_client:
  33 * We use the term client to avoid confusion with contexts. A i915_guc_client is
  34 * equivalent to GuC object guc_context_desc. This context descriptor is
  35 * allocated from a pool of 1024 entries. Kernel driver will allocate doorbell
  36 * and workqueue for it. Also the process descriptor (guc_process_desc), which
  37 * is mapped to client space. So the client can write Work Item then ring the
  38 * doorbell.
  39 *
  40 * To simplify the implementation, we allocate one gem object that contains all
  41 * pages for doorbell, process descriptor and workqueue.
  42 *
  43 * The Scratch registers:
  44 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
  45 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then
  46 * triggers an interrupt on the GuC via another register write (0xC4C8).
  47 * Firmware writes a success/fail code back to the action register after
  48 * processes the request. The kernel driver polls waiting for this update and
  49 * then proceeds.
  50 * See host2guc_action()
  51 *
  52 * Doorbells:
  53 * Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW)
  54 * mapped into process space.
  55 *
  56 * Work Items:
  57 * There are several types of work items that the host may place into a
  58 * workqueue, each with its own requirements and limitations. Currently only
  59 * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which
  60 * represents in-order queue. The kernel driver packs ring tail pointer and an
  61 * ELSP context descriptor dword into Work Item.
  62 * See guc_wq_item_append()
  63 *
  64 */
  65
  66/*
  67 * Read GuC command/status register (SOFT_SCRATCH_0)
  68 * Return true if it contains a response rather than a command
  69 */
  70static inline bool host2guc_action_response(struct drm_i915_private *dev_priv,
  71                                            u32 *status)
  72{
  73        u32 val = I915_READ(SOFT_SCRATCH(0));
  74        *status = val;
  75        return GUC2HOST_IS_RESPONSE(val);
  76}
  77
  78static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len)
  79{
  80        struct drm_i915_private *dev_priv = guc_to_i915(guc);
  81        u32 status;
  82        int i;
  83        int ret;
  84
  85        if (WARN_ON(len < 1 || len > 15))
  86                return -EINVAL;
  87
  88        intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
  89
  90        dev_priv->guc.action_count += 1;
  91        dev_priv->guc.action_cmd = data[0];
  92
  93        for (i = 0; i < len; i++)
  94                I915_WRITE(SOFT_SCRATCH(i), data[i]);
  95
  96        POSTING_READ(SOFT_SCRATCH(i - 1));
  97
  98        I915_WRITE(HOST2GUC_INTERRUPT, HOST2GUC_TRIGGER);
  99
 100        /*
 101         * Fast commands should complete in less than 10us, so sample quickly
 102         * up to that length of time, then switch to a slower sleep-wait loop.
 103         * No HOST2GUC command should ever take longer than 10ms.
 104         */
 105        ret = wait_for_us(host2guc_action_response(dev_priv, &status), 10);
 106        if (ret)
 107                ret = wait_for(host2guc_action_response(dev_priv, &status), 10);
 108        if (status != GUC2HOST_STATUS_SUCCESS) {
 109                /*
 110                 * Either the GuC explicitly returned an error (which
 111                 * we convert to -EIO here) or no response at all was
 112                 * received within the timeout limit (-ETIMEDOUT)
 113                 */
 114                if (ret != -ETIMEDOUT)
 115                        ret = -EIO;
 116
 117                DRM_WARN("Action 0x%X failed; ret=%d status=0x%08X response=0x%08X\n",
 118                         data[0], ret, status, I915_READ(SOFT_SCRATCH(15)));
 119
 120                dev_priv->guc.action_fail += 1;
 121                dev_priv->guc.action_err = ret;
 122        }
 123        dev_priv->guc.action_status = status;
 124
 125        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 126
 127        return ret;
 128}
 129
 130/*
 131 * Tell the GuC to allocate or deallocate a specific doorbell
 132 */
 133
 134static int host2guc_allocate_doorbell(struct intel_guc *guc,
 135                                      struct i915_guc_client *client)
 136{
 137        u32 data[2];
 138
 139        data[0] = HOST2GUC_ACTION_ALLOCATE_DOORBELL;
 140        data[1] = client->ctx_index;
 141
 142        return host2guc_action(guc, data, 2);
 143}
 144
 145static int host2guc_release_doorbell(struct intel_guc *guc,
 146                                     struct i915_guc_client *client)
 147{
 148        u32 data[2];
 149
 150        data[0] = HOST2GUC_ACTION_DEALLOCATE_DOORBELL;
 151        data[1] = client->ctx_index;
 152
 153        return host2guc_action(guc, data, 2);
 154}
 155
 156static int host2guc_sample_forcewake(struct intel_guc *guc,
 157                                     struct i915_guc_client *client)
 158{
 159        struct drm_i915_private *dev_priv = guc_to_i915(guc);
 160        u32 data[2];
 161
 162        data[0] = HOST2GUC_ACTION_SAMPLE_FORCEWAKE;
 163        /* WaRsDisableCoarsePowerGating:skl,bxt */
 164        if (!intel_enable_rc6() || NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
 165                data[1] = 0;
 166        else
 167                /* bit 0 and 1 are for Render and Media domain separately */
 168                data[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA;
 169
 170        return host2guc_action(guc, data, ARRAY_SIZE(data));
 171}
 172
 173/*
 174 * Initialise, update, or clear doorbell data shared with the GuC
 175 *
 176 * These functions modify shared data and so need access to the mapped
 177 * client object which contains the page being used for the doorbell
 178 */
 179
 180static int guc_update_doorbell_id(struct intel_guc *guc,
 181                                  struct i915_guc_client *client,
 182                                  u16 new_id)
 183{
 184        struct sg_table *sg = guc->ctx_pool_vma->pages;
 185        void *doorbell_bitmap = guc->doorbell_bitmap;
 186        struct guc_doorbell_info *doorbell;
 187        struct guc_context_desc desc;
 188        size_t len;
 189
 190        doorbell = client->client_base + client->doorbell_offset;
 191
 192        if (client->doorbell_id != GUC_INVALID_DOORBELL_ID &&
 193            test_bit(client->doorbell_id, doorbell_bitmap)) {
 194                /* Deactivate the old doorbell */
 195                doorbell->db_status = GUC_DOORBELL_DISABLED;
 196                (void)host2guc_release_doorbell(guc, client);
 197                __clear_bit(client->doorbell_id, doorbell_bitmap);
 198        }
 199
 200        /* Update the GuC's idea of the doorbell ID */
 201        len = sg_pcopy_to_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
 202                             sizeof(desc) * client->ctx_index);
 203        if (len != sizeof(desc))
 204                return -EFAULT;
 205        desc.db_id = new_id;
 206        len = sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
 207                             sizeof(desc) * client->ctx_index);
 208        if (len != sizeof(desc))
 209                return -EFAULT;
 210
 211        client->doorbell_id = new_id;
 212        if (new_id == GUC_INVALID_DOORBELL_ID)
 213                return 0;
 214
 215        /* Activate the new doorbell */
 216        __set_bit(new_id, doorbell_bitmap);
 217        doorbell->cookie = 0;
 218        doorbell->db_status = GUC_DOORBELL_ENABLED;
 219        return host2guc_allocate_doorbell(guc, client);
 220}
 221
 222static int guc_init_doorbell(struct intel_guc *guc,
 223                              struct i915_guc_client *client,
 224                              uint16_t db_id)
 225{
 226        return guc_update_doorbell_id(guc, client, db_id);
 227}
 228
 229static void guc_disable_doorbell(struct intel_guc *guc,
 230                                 struct i915_guc_client *client)
 231{
 232        (void)guc_update_doorbell_id(guc, client, GUC_INVALID_DOORBELL_ID);
 233
 234        /* XXX: wait for any interrupts */
 235        /* XXX: wait for workqueue to drain */
 236}
 237
 238static uint16_t
 239select_doorbell_register(struct intel_guc *guc, uint32_t priority)
 240{
 241        /*
 242         * The bitmap tracks which doorbell registers are currently in use.
 243         * It is split into two halves; the first half is used for normal
 244         * priority contexts, the second half for high-priority ones.
 245         * Note that logically higher priorities are numerically less than
 246         * normal ones, so the test below means "is it high-priority?"
 247         */
 248        const bool hi_pri = (priority <= GUC_CTX_PRIORITY_HIGH);
 249        const uint16_t half = GUC_MAX_DOORBELLS / 2;
 250        const uint16_t start = hi_pri ? half : 0;
 251        const uint16_t end = start + half;
 252        uint16_t id;
 253
 254        id = find_next_zero_bit(guc->doorbell_bitmap, end, start);
 255        if (id == end)
 256                id = GUC_INVALID_DOORBELL_ID;
 257
 258        DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n",
 259                        hi_pri ? "high" : "normal", id);
 260
 261        return id;
 262}
 263
 264/*
 265 * Select, assign and relase doorbell cachelines
 266 *
 267 * These functions track which doorbell cachelines are in use.
 268 * The data they manipulate is protected by the host2guc lock.
 269 */
 270
 271static uint32_t select_doorbell_cacheline(struct intel_guc *guc)
 272{
 273        const uint32_t cacheline_size = cache_line_size();
 274        uint32_t offset;
 275
 276        /* Doorbell uses a single cache line within a page */
 277        offset = offset_in_page(guc->db_cacheline);
 278
 279        /* Moving to next cache line to reduce contention */
 280        guc->db_cacheline += cacheline_size;
 281
 282        DRM_DEBUG_DRIVER("selected doorbell cacheline 0x%x, next 0x%x, linesize %u\n",
 283                        offset, guc->db_cacheline, cacheline_size);
 284
 285        return offset;
 286}
 287
 288/*
 289 * Initialise the process descriptor shared with the GuC firmware.
 290 */
 291static void guc_proc_desc_init(struct intel_guc *guc,
 292                               struct i915_guc_client *client)
 293{
 294        struct guc_process_desc *desc;
 295
 296        desc = client->client_base + client->proc_desc_offset;
 297
 298        memset(desc, 0, sizeof(*desc));
 299
 300        /*
 301         * XXX: pDoorbell and WQVBaseAddress are pointers in process address
 302         * space for ring3 clients (set them as in mmap_ioctl) or kernel
 303         * space for kernel clients (map on demand instead? May make debug
 304         * easier to have it mapped).
 305         */
 306        desc->wq_base_addr = 0;
 307        desc->db_base_addr = 0;
 308
 309        desc->context_id = client->ctx_index;
 310        desc->wq_size_bytes = client->wq_size;
 311        desc->wq_status = WQ_STATUS_ACTIVE;
 312        desc->priority = client->priority;
 313}
 314
 315/*
 316 * Initialise/clear the context descriptor shared with the GuC firmware.
 317 *
 318 * This descriptor tells the GuC where (in GGTT space) to find the important
 319 * data structures relating to this client (doorbell, process descriptor,
 320 * write queue, etc).
 321 */
 322
 323static void guc_ctx_desc_init(struct intel_guc *guc,
 324                              struct i915_guc_client *client)
 325{
 326        struct drm_i915_private *dev_priv = guc_to_i915(guc);
 327        struct intel_engine_cs *engine;
 328        struct i915_gem_context *ctx = client->owner;
 329        struct guc_context_desc desc;
 330        struct sg_table *sg;
 331        unsigned int tmp;
 332        u32 gfx_addr;
 333
 334        memset(&desc, 0, sizeof(desc));
 335
 336        desc.attribute = GUC_CTX_DESC_ATTR_ACTIVE | GUC_CTX_DESC_ATTR_KERNEL;
 337        desc.context_id = client->ctx_index;
 338        desc.priority = client->priority;
 339        desc.db_id = client->doorbell_id;
 340
 341        for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
 342                struct intel_context *ce = &ctx->engine[engine->id];
 343                uint32_t guc_engine_id = engine->guc_id;
 344                struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id];
 345
 346                /* TODO: We have a design issue to be solved here. Only when we
 347                 * receive the first batch, we know which engine is used by the
 348                 * user. But here GuC expects the lrc and ring to be pinned. It
 349                 * is not an issue for default context, which is the only one
 350                 * for now who owns a GuC client. But for future owner of GuC
 351                 * client, need to make sure lrc is pinned prior to enter here.
 352                 */
 353                if (!ce->state)
 354                        break;  /* XXX: continue? */
 355
 356                lrc->context_desc = lower_32_bits(ce->lrc_desc);
 357
 358                /* The state page is after PPHWSP */
 359                lrc->ring_lcra =
 360                        i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE;
 361                lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
 362                                (guc_engine_id << GUC_ELC_ENGINE_OFFSET);
 363
 364                lrc->ring_begin = i915_ggtt_offset(ce->ring->vma);
 365                lrc->ring_end = lrc->ring_begin + ce->ring->size - 1;
 366                lrc->ring_next_free_location = lrc->ring_begin;
 367                lrc->ring_current_tail_pointer_value = 0;
 368
 369                desc.engines_used |= (1 << guc_engine_id);
 370        }
 371
 372        DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n",
 373                        client->engines, desc.engines_used);
 374        WARN_ON(desc.engines_used == 0);
 375
 376        /*
 377         * The doorbell, process descriptor, and workqueue are all parts
 378         * of the client object, which the GuC will reference via the GGTT
 379         */
 380        gfx_addr = i915_ggtt_offset(client->vma);
 381        desc.db_trigger_phy = sg_dma_address(client->vma->pages->sgl) +
 382                                client->doorbell_offset;
 383        desc.db_trigger_cpu = (uintptr_t)client->client_base +
 384                                client->doorbell_offset;
 385        desc.db_trigger_uk = gfx_addr + client->doorbell_offset;
 386        desc.process_desc = gfx_addr + client->proc_desc_offset;
 387        desc.wq_addr = gfx_addr + client->wq_offset;
 388        desc.wq_size = client->wq_size;
 389
 390        /*
 391         * XXX: Take LRCs from an existing context if this is not an
 392         * IsKMDCreatedContext client
 393         */
 394        desc.desc_private = (uintptr_t)client;
 395
 396        /* Pool context is pinned already */
 397        sg = guc->ctx_pool_vma->pages;
 398        sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
 399                             sizeof(desc) * client->ctx_index);
 400}
 401
 402static void guc_ctx_desc_fini(struct intel_guc *guc,
 403                              struct i915_guc_client *client)
 404{
 405        struct guc_context_desc desc;
 406        struct sg_table *sg;
 407
 408        memset(&desc, 0, sizeof(desc));
 409
 410        sg = guc->ctx_pool_vma->pages;
 411        sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
 412                             sizeof(desc) * client->ctx_index);
 413}
 414
 415/**
 416 * i915_guc_wq_reserve() - reserve space in the GuC's workqueue
 417 * @request:    request associated with the commands
 418 *
 419 * Return:      0 if space is available
 420 *              -EAGAIN if space is not currently available
 421 *
 422 * This function must be called (and must return 0) before a request
 423 * is submitted to the GuC via i915_guc_submit() below. Once a result
 424 * of 0 has been returned, it must be balanced by a corresponding
 425 * call to submit().
 426 *
 427 * Reservation allows the caller to determine in advance that space
 428 * will be available for the next submission before committing resources
 429 * to it, and helps avoid late failures with complicated recovery paths.
 430 */
 431int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
 432{
 433        const size_t wqi_size = sizeof(struct guc_wq_item);
 434        struct i915_guc_client *gc = request->i915->guc.execbuf_client;
 435        struct guc_process_desc *desc = gc->client_base + gc->proc_desc_offset;
 436        u32 freespace;
 437        int ret;
 438
 439        spin_lock(&gc->wq_lock);
 440        freespace = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size);
 441        freespace -= gc->wq_rsvd;
 442        if (likely(freespace >= wqi_size)) {
 443                gc->wq_rsvd += wqi_size;
 444                ret = 0;
 445        } else {
 446                gc->no_wq_space++;
 447                ret = -EAGAIN;
 448        }
 449        spin_unlock(&gc->wq_lock);
 450
 451        return ret;
 452}
 453
 454void i915_guc_wq_unreserve(struct drm_i915_gem_request *request)
 455{
 456        const size_t wqi_size = sizeof(struct guc_wq_item);
 457        struct i915_guc_client *gc = request->i915->guc.execbuf_client;
 458
 459        GEM_BUG_ON(READ_ONCE(gc->wq_rsvd) < wqi_size);
 460
 461        spin_lock(&gc->wq_lock);
 462        gc->wq_rsvd -= wqi_size;
 463        spin_unlock(&gc->wq_lock);
 464}
 465
 466/* Construct a Work Item and append it to the GuC's Work Queue */
 467static void guc_wq_item_append(struct i915_guc_client *gc,
 468                               struct drm_i915_gem_request *rq)
 469{
 470        /* wqi_len is in DWords, and does not include the one-word header */
 471        const size_t wqi_size = sizeof(struct guc_wq_item);
 472        const u32 wqi_len = wqi_size/sizeof(u32) - 1;
 473        struct intel_engine_cs *engine = rq->engine;
 474        struct guc_process_desc *desc;
 475        struct guc_wq_item *wqi;
 476        void *base;
 477        u32 freespace, tail, wq_off, wq_page;
 478
 479        desc = gc->client_base + gc->proc_desc_offset;
 480
 481        /* Free space is guaranteed, see i915_guc_wq_reserve() above */
 482        freespace = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size);
 483        GEM_BUG_ON(freespace < wqi_size);
 484
 485        /* The GuC firmware wants the tail index in QWords, not bytes */
 486        tail = rq->tail;
 487        GEM_BUG_ON(tail & 7);
 488        tail >>= 3;
 489        GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
 490
 491        /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
 492         * should not have the case where structure wqi is across page, neither
 493         * wrapped to the beginning. This simplifies the implementation below.
 494         *
 495         * XXX: if not the case, we need save data to a temp wqi and copy it to
 496         * workqueue buffer dw by dw.
 497         */
 498        BUILD_BUG_ON(wqi_size != 16);
 499        GEM_BUG_ON(gc->wq_rsvd < wqi_size);
 500
 501        /* postincrement WQ tail for next time */
 502        wq_off = gc->wq_tail;
 503        GEM_BUG_ON(wq_off & (wqi_size - 1));
 504        gc->wq_tail += wqi_size;
 505        gc->wq_tail &= gc->wq_size - 1;
 506        gc->wq_rsvd -= wqi_size;
 507
 508        /* WQ starts from the page after doorbell / process_desc */
 509        wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT;
 510        wq_off &= PAGE_SIZE - 1;
 511        base = kmap_atomic(i915_gem_object_get_page(gc->vma->obj, wq_page));
 512        wqi = (struct guc_wq_item *)((char *)base + wq_off);
 513
 514        /* Now fill in the 4-word work queue item */
 515        wqi->header = WQ_TYPE_INORDER |
 516                        (wqi_len << WQ_LEN_SHIFT) |
 517                        (engine->guc_id << WQ_TARGET_SHIFT) |
 518                        WQ_NO_WCFLUSH_WAIT;
 519
 520        /* The GuC wants only the low-order word of the context descriptor */
 521        wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine);
 522
 523        wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
 524        wqi->fence_id = rq->fence.seqno;
 525
 526        kunmap_atomic(base);
 527}
 528
 529static int guc_ring_doorbell(struct i915_guc_client *gc)
 530{
 531        struct guc_process_desc *desc;
 532        union guc_doorbell_qw db_cmp, db_exc, db_ret;
 533        union guc_doorbell_qw *db;
 534        int attempt = 2, ret = -EAGAIN;
 535
 536        desc = gc->client_base + gc->proc_desc_offset;
 537
 538        /* Update the tail so it is visible to GuC */
 539        desc->tail = gc->wq_tail;
 540
 541        /* current cookie */
 542        db_cmp.db_status = GUC_DOORBELL_ENABLED;
 543        db_cmp.cookie = gc->cookie;
 544
 545        /* cookie to be updated */
 546        db_exc.db_status = GUC_DOORBELL_ENABLED;
 547        db_exc.cookie = gc->cookie + 1;
 548        if (db_exc.cookie == 0)
 549                db_exc.cookie = 1;
 550
 551        /* pointer of current doorbell cacheline */
 552        db = gc->client_base + gc->doorbell_offset;
 553
 554        while (attempt--) {
 555                /* lets ring the doorbell */
 556                db_ret.value_qw = atomic64_cmpxchg((atomic64_t *)db,
 557                        db_cmp.value_qw, db_exc.value_qw);
 558
 559                /* if the exchange was successfully executed */
 560                if (db_ret.value_qw == db_cmp.value_qw) {
 561                        /* db was successfully rung */
 562                        gc->cookie = db_exc.cookie;
 563                        ret = 0;
 564                        break;
 565                }
 566
 567                /* XXX: doorbell was lost and need to acquire it again */
 568                if (db_ret.db_status == GUC_DOORBELL_DISABLED)
 569                        break;
 570
 571                DRM_WARN("Cookie mismatch. Expected %d, found %d\n",
 572                         db_cmp.cookie, db_ret.cookie);
 573
 574                /* update the cookie to newly read cookie from GuC */
 575                db_cmp.cookie = db_ret.cookie;
 576                db_exc.cookie = db_ret.cookie + 1;
 577                if (db_exc.cookie == 0)
 578                        db_exc.cookie = 1;
 579        }
 580
 581        return ret;
 582}
 583
 584/**
 585 * i915_guc_submit() - Submit commands through GuC
 586 * @rq:         request associated with the commands
 587 *
 588 * Return:      0 on success, otherwise an errno.
 589 *              (Note: nonzero really shouldn't happen!)
 590 *
 591 * The caller must have already called i915_guc_wq_reserve() above with
 592 * a result of 0 (success), guaranteeing that there is space in the work
 593 * queue for the new request, so enqueuing the item cannot fail.
 594 *
 595 * Bad Things Will Happen if the caller violates this protocol e.g. calls
 596 * submit() when _reserve() says there's no space, or calls _submit()
 597 * a different number of times from (successful) calls to _reserve().
 598 *
 599 * The only error here arises if the doorbell hardware isn't functioning
 600 * as expected, which really shouln't happen.
 601 */
 602static void i915_guc_submit(struct drm_i915_gem_request *rq)
 603{
 604        unsigned int engine_id = rq->engine->id;
 605        struct intel_guc *guc = &rq->i915->guc;
 606        struct i915_guc_client *client = guc->execbuf_client;
 607        int b_ret;
 608
 609        spin_lock(&client->wq_lock);
 610        guc_wq_item_append(client, rq);
 611        b_ret = guc_ring_doorbell(client);
 612
 613        client->submissions[engine_id] += 1;
 614        client->retcode = b_ret;
 615        if (b_ret)
 616                client->b_fail += 1;
 617
 618        guc->submissions[engine_id] += 1;
 619        guc->last_seqno[engine_id] = rq->fence.seqno;
 620        spin_unlock(&client->wq_lock);
 621}
 622
 623/*
 624 * Everything below here is concerned with setup & teardown, and is
 625 * therefore not part of the somewhat time-critical batch-submission
 626 * path of i915_guc_submit() above.
 627 */
 628
 629/**
 630 * guc_allocate_vma() - Allocate a GGTT VMA for GuC usage
 631 * @guc:        the guc
 632 * @size:       size of area to allocate (both virtual space and memory)
 633 *
 634 * This is a wrapper to create an object for use with the GuC. In order to
 635 * use it inside the GuC, an object needs to be pinned lifetime, so we allocate
 636 * both some backing storage and a range inside the Global GTT. We must pin
 637 * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because that
 638 * range is reserved inside GuC.
 639 *
 640 * Return:      A i915_vma if successful, otherwise an ERR_PTR.
 641 */
 642static struct i915_vma *guc_allocate_vma(struct intel_guc *guc, u32 size)
 643{
 644        struct drm_i915_private *dev_priv = guc_to_i915(guc);
 645        struct drm_i915_gem_object *obj;
 646        struct i915_vma *vma;
 647        int ret;
 648
 649        obj = i915_gem_object_create(&dev_priv->drm, size);
 650        if (IS_ERR(obj))
 651                return ERR_CAST(obj);
 652
 653        vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL);
 654        if (IS_ERR(vma))
 655                goto err;
 656
 657        ret = i915_vma_pin(vma, 0, PAGE_SIZE,
 658                           PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
 659        if (ret) {
 660                vma = ERR_PTR(ret);
 661                goto err;
 662        }
 663
 664        /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
 665        I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
 666
 667        return vma;
 668
 669err:
 670        i915_gem_object_put(obj);
 671        return vma;
 672}
 673
 674static void
 675guc_client_free(struct drm_i915_private *dev_priv,
 676                struct i915_guc_client *client)
 677{
 678        struct intel_guc *guc = &dev_priv->guc;
 679
 680        if (!client)
 681                return;
 682
 683        /*
 684         * XXX: wait for any outstanding submissions before freeing memory.
 685         * Be sure to drop any locks
 686         */
 687
 688        if (client->client_base) {
 689                /*
 690                 * If we got as far as setting up a doorbell, make sure we
 691                 * shut it down before unmapping & deallocating the memory.
 692                 */
 693                guc_disable_doorbell(guc, client);
 694
 695                kunmap(kmap_to_page(client->client_base));
 696        }
 697
 698        i915_vma_unpin_and_release(&client->vma);
 699
 700        if (client->ctx_index != GUC_INVALID_CTX_ID) {
 701                guc_ctx_desc_fini(guc, client);
 702                ida_simple_remove(&guc->ctx_ids, client->ctx_index);
 703        }
 704
 705        kfree(client);
 706}
 707
 708/* Check that a doorbell register is in the expected state */
 709static bool guc_doorbell_check(struct intel_guc *guc, uint16_t db_id)
 710{
 711        struct drm_i915_private *dev_priv = guc_to_i915(guc);
 712        i915_reg_t drbreg = GEN8_DRBREGL(db_id);
 713        uint32_t value = I915_READ(drbreg);
 714        bool enabled = (value & GUC_DOORBELL_ENABLED) != 0;
 715        bool expected = test_bit(db_id, guc->doorbell_bitmap);
 716
 717        if (enabled == expected)
 718                return true;
 719
 720        DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) 0x%x, should be %s\n",
 721                         db_id, drbreg.reg, value,
 722                         expected ? "active" : "inactive");
 723
 724        return false;
 725}
 726
 727/*
 728 * Borrow the first client to set up & tear down each unused doorbell
 729 * in turn, to ensure that all doorbell h/w is (re)initialised.
 730 */
 731static void guc_init_doorbell_hw(struct intel_guc *guc)
 732{
 733        struct i915_guc_client *client = guc->execbuf_client;
 734        uint16_t db_id;
 735        int i, err;
 736
 737        /* Save client's original doorbell selection */
 738        db_id = client->doorbell_id;
 739
 740        for (i = 0; i < GUC_MAX_DOORBELLS; ++i) {
 741                /* Skip if doorbell is OK */
 742                if (guc_doorbell_check(guc, i))
 743                        continue;
 744
 745                err = guc_update_doorbell_id(guc, client, i);
 746                if (err)
 747                        DRM_DEBUG_DRIVER("Doorbell %d update failed, err %d\n",
 748                                        i, err);
 749        }
 750
 751        /* Restore to original value */
 752        err = guc_update_doorbell_id(guc, client, db_id);
 753        if (err)
 754                DRM_WARN("Failed to restore doorbell to %d, err %d\n",
 755                         db_id, err);
 756
 757        /* Read back & verify all doorbell registers */
 758        for (i = 0; i < GUC_MAX_DOORBELLS; ++i)
 759                (void)guc_doorbell_check(guc, i);
 760}
 761
 762/**
 763 * guc_client_alloc() - Allocate an i915_guc_client
 764 * @dev_priv:   driver private data structure
 765 * @engines:    The set of engines to enable for this client
 766 * @priority:   four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW
 767 *              The kernel client to replace ExecList submission is created with
 768 *              NORMAL priority. Priority of a client for scheduler can be HIGH,
 769 *              while a preemption context can use CRITICAL.
 770 * @ctx:        the context that owns the client (we use the default render
 771 *              context)
 772 *
 773 * Return:      An i915_guc_client object if success, else NULL.
 774 */
 775static struct i915_guc_client *
 776guc_client_alloc(struct drm_i915_private *dev_priv,
 777                 uint32_t engines,
 778                 uint32_t priority,
 779                 struct i915_gem_context *ctx)
 780{
 781        struct i915_guc_client *client;
 782        struct intel_guc *guc = &dev_priv->guc;
 783        struct i915_vma *vma;
 784        uint16_t db_id;
 785
 786        client = kzalloc(sizeof(*client), GFP_KERNEL);
 787        if (!client)
 788                return NULL;
 789
 790        client->owner = ctx;
 791        client->guc = guc;
 792        client->engines = engines;
 793        client->priority = priority;
 794        client->doorbell_id = GUC_INVALID_DOORBELL_ID;
 795
 796        client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0,
 797                        GUC_MAX_GPU_CONTEXTS, GFP_KERNEL);
 798        if (client->ctx_index >= GUC_MAX_GPU_CONTEXTS) {
 799                client->ctx_index = GUC_INVALID_CTX_ID;
 800                goto err;
 801        }
 802
 803        /* The first page is doorbell/proc_desc. Two followed pages are wq. */
 804        vma = guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE);
 805        if (IS_ERR(vma))
 806                goto err;
 807
 808        /* We'll keep just the first (doorbell/proc) page permanently kmap'd. */
 809        client->vma = vma;
 810        client->client_base = kmap(i915_vma_first_page(vma));
 811
 812        spin_lock_init(&client->wq_lock);
 813        client->wq_offset = GUC_DB_SIZE;
 814        client->wq_size = GUC_WQ_SIZE;
 815
 816        db_id = select_doorbell_register(guc, client->priority);
 817        if (db_id == GUC_INVALID_DOORBELL_ID)
 818                /* XXX: evict a doorbell instead? */
 819                goto err;
 820
 821        client->doorbell_offset = select_doorbell_cacheline(guc);
 822
 823        /*
 824         * Since the doorbell only requires a single cacheline, we can save
 825         * space by putting the application process descriptor in the same
 826         * page. Use the half of the page that doesn't include the doorbell.
 827         */
 828        if (client->doorbell_offset >= (GUC_DB_SIZE / 2))
 829                client->proc_desc_offset = 0;
 830        else
 831                client->proc_desc_offset = (GUC_DB_SIZE / 2);
 832
 833        guc_proc_desc_init(guc, client);
 834        guc_ctx_desc_init(guc, client);
 835        if (guc_init_doorbell(guc, client, db_id))
 836                goto err;
 837
 838        DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: ctx_index %u\n",
 839                priority, client, client->engines, client->ctx_index);
 840        DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%x\n",
 841                client->doorbell_id, client->doorbell_offset);
 842
 843        return client;
 844
 845err:
 846        guc_client_free(dev_priv, client);
 847        return NULL;
 848}
 849
 850static void guc_log_create(struct intel_guc *guc)
 851{
 852        struct i915_vma *vma;
 853        unsigned long offset;
 854        uint32_t size, flags;
 855
 856        if (i915.guc_log_level < GUC_LOG_VERBOSITY_MIN)
 857                return;
 858
 859        if (i915.guc_log_level > GUC_LOG_VERBOSITY_MAX)
 860                i915.guc_log_level = GUC_LOG_VERBOSITY_MAX;
 861
 862        /* The first page is to save log buffer state. Allocate one
 863         * extra page for others in case for overlap */
 864        size = (1 + GUC_LOG_DPC_PAGES + 1 +
 865                GUC_LOG_ISR_PAGES + 1 +
 866                GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT;
 867
 868        vma = guc->log_vma;
 869        if (!vma) {
 870                vma = guc_allocate_vma(guc, size);
 871                if (IS_ERR(vma)) {
 872                        /* logging will be off */
 873                        i915.guc_log_level = -1;
 874                        return;
 875                }
 876
 877                guc->log_vma = vma;
 878        }
 879
 880        /* each allocated unit is a page */
 881        flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL |
 882                (GUC_LOG_DPC_PAGES << GUC_LOG_DPC_SHIFT) |
 883                (GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) |
 884                (GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT);
 885
 886        offset = i915_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */
 887        guc->log_flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags;
 888}
 889
 890static void guc_policies_init(struct guc_policies *policies)
 891{
 892        struct guc_policy *policy;
 893        u32 p, i;
 894
 895        policies->dpc_promote_time = 500000;
 896        policies->max_num_work_items = POLICY_MAX_NUM_WI;
 897
 898        for (p = 0; p < GUC_CTX_PRIORITY_NUM; p++) {
 899                for (i = GUC_RENDER_ENGINE; i < GUC_MAX_ENGINES_NUM; i++) {
 900                        policy = &policies->policy[p][i];
 901
 902                        policy->execution_quantum = 1000000;
 903                        policy->preemption_time = 500000;
 904                        policy->fault_time = 250000;
 905                        policy->policy_flags = 0;
 906                }
 907        }
 908
 909        policies->is_valid = 1;
 910}
 911
 912static void guc_addon_create(struct intel_guc *guc)
 913{
 914        struct drm_i915_private *dev_priv = guc_to_i915(guc);
 915        struct i915_vma *vma;
 916        struct guc_ads *ads;
 917        struct guc_policies *policies;
 918        struct guc_mmio_reg_state *reg_state;
 919        struct intel_engine_cs *engine;
 920        struct page *page;
 921        u32 size;
 922
 923        /* The ads obj includes the struct itself and buffers passed to GuC */
 924        size = sizeof(struct guc_ads) + sizeof(struct guc_policies) +
 925                        sizeof(struct guc_mmio_reg_state) +
 926                        GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE;
 927
 928        vma = guc->ads_vma;
 929        if (!vma) {
 930                vma = guc_allocate_vma(guc, PAGE_ALIGN(size));
 931                if (IS_ERR(vma))
 932                        return;
 933
 934                guc->ads_vma = vma;
 935        }
 936
 937        page = i915_vma_first_page(vma);
 938        ads = kmap(page);
 939
 940        /*
 941         * The GuC requires a "Golden Context" when it reinitialises
 942         * engines after a reset. Here we use the Render ring default
 943         * context, which must already exist and be pinned in the GGTT,
 944         * so its address won't change after we've told the GuC where
 945         * to find it.
 946         */
 947        engine = &dev_priv->engine[RCS];
 948        ads->golden_context_lrca = engine->status_page.ggtt_offset;
 949
 950        for_each_engine(engine, dev_priv)
 951                ads->eng_state_size[engine->guc_id] = intel_lr_context_size(engine);
 952
 953        /* GuC scheduling policies */
 954        policies = (void *)ads + sizeof(struct guc_ads);
 955        guc_policies_init(policies);
 956
 957        ads->scheduler_policies =
 958                i915_ggtt_offset(vma) + sizeof(struct guc_ads);
 959
 960        /* MMIO reg state */
 961        reg_state = (void *)policies + sizeof(struct guc_policies);
 962
 963        for_each_engine(engine, dev_priv) {
 964                reg_state->mmio_white_list[engine->guc_id].mmio_start =
 965                        engine->mmio_base + GUC_MMIO_WHITE_LIST_START;
 966
 967                /* Nothing to be saved or restored for now. */
 968                reg_state->mmio_white_list[engine->guc_id].count = 0;
 969        }
 970
 971        ads->reg_state_addr = ads->scheduler_policies +
 972                        sizeof(struct guc_policies);
 973
 974        ads->reg_state_buffer = ads->reg_state_addr +
 975                        sizeof(struct guc_mmio_reg_state);
 976
 977        kunmap(page);
 978}
 979
 980/*
 981 * Set up the memory resources to be shared with the GuC.  At this point,
 982 * we require just one object that can be mapped through the GGTT.
 983 */
 984int i915_guc_submission_init(struct drm_i915_private *dev_priv)
 985{
 986        const size_t ctxsize = sizeof(struct guc_context_desc);
 987        const size_t poolsize = GUC_MAX_GPU_CONTEXTS * ctxsize;
 988        const size_t gemsize = round_up(poolsize, PAGE_SIZE);
 989        struct intel_guc *guc = &dev_priv->guc;
 990        struct i915_vma *vma;
 991
 992        /* Wipe bitmap & delete client in case of reinitialisation */
 993        bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS);
 994        i915_guc_submission_disable(dev_priv);
 995
 996        if (!i915.enable_guc_submission)
 997                return 0; /* not enabled  */
 998
 999        if (guc->ctx_pool_vma)
1000                return 0; /* already allocated */
1001
1002        vma = guc_allocate_vma(guc, gemsize);
1003        if (IS_ERR(vma))
1004                return PTR_ERR(vma);
1005
1006        guc->ctx_pool_vma = vma;
1007        ida_init(&guc->ctx_ids);
1008        guc_log_create(guc);
1009        guc_addon_create(guc);
1010
1011        return 0;
1012}
1013
1014int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
1015{
1016        struct intel_guc *guc = &dev_priv->guc;
1017        struct i915_guc_client *client;
1018        struct intel_engine_cs *engine;
1019        struct drm_i915_gem_request *request;
1020
1021        /* client for execbuf submission */
1022        client = guc_client_alloc(dev_priv,
1023                                  INTEL_INFO(dev_priv)->ring_mask,
1024                                  GUC_CTX_PRIORITY_KMD_NORMAL,
1025                                  dev_priv->kernel_context);
1026        if (!client) {
1027                DRM_ERROR("Failed to create normal GuC client!\n");
1028                return -ENOMEM;
1029        }
1030
1031        guc->execbuf_client = client;
1032        host2guc_sample_forcewake(guc, client);
1033        guc_init_doorbell_hw(guc);
1034
1035        /* Take over from manual control of ELSP (execlists) */
1036        for_each_engine(engine, dev_priv) {
1037                engine->submit_request = i915_guc_submit;
1038
1039                /* Replay the current set of previously submitted requests */
1040                list_for_each_entry(request, &engine->request_list, link) {
1041                        client->wq_rsvd += sizeof(struct guc_wq_item);
1042                        if (i915_sw_fence_done(&request->submit))
1043                                i915_guc_submit(request);
1044                }
1045        }
1046
1047        return 0;
1048}
1049
1050void i915_guc_submission_disable(struct drm_i915_private *dev_priv)
1051{
1052        struct intel_guc *guc = &dev_priv->guc;
1053
1054        if (!guc->execbuf_client)
1055                return;
1056
1057        /* Revert back to manual ELSP submission */
1058        intel_execlists_enable_submission(dev_priv);
1059
1060        guc_client_free(dev_priv, guc->execbuf_client);
1061        guc->execbuf_client = NULL;
1062}
1063
1064void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
1065{
1066        struct intel_guc *guc = &dev_priv->guc;
1067
1068        i915_vma_unpin_and_release(&guc->ads_vma);
1069        i915_vma_unpin_and_release(&guc->log_vma);
1070
1071        if (guc->ctx_pool_vma)
1072                ida_destroy(&guc->ctx_ids);
1073        i915_vma_unpin_and_release(&guc->ctx_pool_vma);
1074}
1075
1076/**
1077 * intel_guc_suspend() - notify GuC entering suspend state
1078 * @dev:        drm device
1079 */
1080int intel_guc_suspend(struct drm_device *dev)
1081{
1082        struct drm_i915_private *dev_priv = to_i915(dev);
1083        struct intel_guc *guc = &dev_priv->guc;
1084        struct i915_gem_context *ctx;
1085        u32 data[3];
1086
1087        if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS)
1088                return 0;
1089
1090        ctx = dev_priv->kernel_context;
1091
1092        data[0] = HOST2GUC_ACTION_ENTER_S_STATE;
1093        /* any value greater than GUC_POWER_D0 */
1094        data[1] = GUC_POWER_D1;
1095        /* first page is shared data with GuC */
1096        data[2] = i915_ggtt_offset(ctx->engine[RCS].state);
1097
1098        return host2guc_action(guc, data, ARRAY_SIZE(data));
1099}
1100
1101
1102/**
1103 * intel_guc_resume() - notify GuC resuming from suspend state
1104 * @dev:        drm device
1105 */
1106int intel_guc_resume(struct drm_device *dev)
1107{
1108        struct drm_i915_private *dev_priv = to_i915(dev);
1109        struct intel_guc *guc = &dev_priv->guc;
1110        struct i915_gem_context *ctx;
1111        u32 data[3];
1112
1113        if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS)
1114                return 0;
1115
1116        ctx = dev_priv->kernel_context;
1117
1118        data[0] = HOST2GUC_ACTION_EXIT_S_STATE;
1119        data[1] = GUC_POWER_D0;
1120        /* first page is shared data with GuC */
1121        data[2] = i915_ggtt_offset(ctx->engine[RCS].state);
1122
1123        return host2guc_action(guc, data, ARRAY_SIZE(data));
1124}
1125