linux/drivers/gpu/drm/i915/gt/intel_lrc.c
<<
>>
Prefs
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2014 Intel Corporation
   4 */
   5
   6#include "gem/i915_gem_lmem.h"
   7
   8#include "gen8_engine_cs.h"
   9#include "i915_drv.h"
  10#include "i915_perf.h"
  11#include "intel_engine.h"
  12#include "intel_gpu_commands.h"
  13#include "intel_gt.h"
  14#include "intel_lrc.h"
  15#include "intel_lrc_reg.h"
  16#include "intel_ring.h"
  17#include "shmem_utils.h"
  18
  19static void set_offsets(u32 *regs,
  20                        const u8 *data,
  21                        const struct intel_engine_cs *engine,
  22                        bool close)
  23#define NOP(x) (BIT(7) | (x))
  24#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
  25#define POSTED BIT(0)
  26#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
  27#define REG16(x) \
  28        (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
  29        (((x) >> 2) & 0x7f)
  30#define END 0
  31{
  32        const u32 base = engine->mmio_base;
  33
  34        while (*data) {
  35                u8 count, flags;
  36
  37                if (*data & BIT(7)) { /* skip */
  38                        count = *data++ & ~BIT(7);
  39                        regs += count;
  40                        continue;
  41                }
  42
  43                count = *data & 0x3f;
  44                flags = *data >> 6;
  45                data++;
  46
  47                *regs = MI_LOAD_REGISTER_IMM(count);
  48                if (flags & POSTED)
  49                        *regs |= MI_LRI_FORCE_POSTED;
  50                if (GRAPHICS_VER(engine->i915) >= 11)
  51                        *regs |= MI_LRI_LRM_CS_MMIO;
  52                regs++;
  53
  54                GEM_BUG_ON(!count);
  55                do {
  56                        u32 offset = 0;
  57                        u8 v;
  58
  59                        do {
  60                                v = *data++;
  61                                offset <<= 7;
  62                                offset |= v & ~BIT(7);
  63                        } while (v & BIT(7));
  64
  65                        regs[0] = base + (offset << 2);
  66                        regs += 2;
  67                } while (--count);
  68        }
  69
  70        if (close) {
  71                /* Close the batch; used mainly by live_lrc_layout() */
  72                *regs = MI_BATCH_BUFFER_END;
  73                if (GRAPHICS_VER(engine->i915) >= 11)
  74                        *regs |= BIT(0);
  75        }
  76}
  77
  78static const u8 gen8_xcs_offsets[] = {
  79        NOP(1),
  80        LRI(11, 0),
  81        REG16(0x244),
  82        REG(0x034),
  83        REG(0x030),
  84        REG(0x038),
  85        REG(0x03c),
  86        REG(0x168),
  87        REG(0x140),
  88        REG(0x110),
  89        REG(0x11c),
  90        REG(0x114),
  91        REG(0x118),
  92
  93        NOP(9),
  94        LRI(9, 0),
  95        REG16(0x3a8),
  96        REG16(0x28c),
  97        REG16(0x288),
  98        REG16(0x284),
  99        REG16(0x280),
 100        REG16(0x27c),
 101        REG16(0x278),
 102        REG16(0x274),
 103        REG16(0x270),
 104
 105        NOP(13),
 106        LRI(2, 0),
 107        REG16(0x200),
 108        REG(0x028),
 109
 110        END
 111};
 112
 113static const u8 gen9_xcs_offsets[] = {
 114        NOP(1),
 115        LRI(14, POSTED),
 116        REG16(0x244),
 117        REG(0x034),
 118        REG(0x030),
 119        REG(0x038),
 120        REG(0x03c),
 121        REG(0x168),
 122        REG(0x140),
 123        REG(0x110),
 124        REG(0x11c),
 125        REG(0x114),
 126        REG(0x118),
 127        REG(0x1c0),
 128        REG(0x1c4),
 129        REG(0x1c8),
 130
 131        NOP(3),
 132        LRI(9, POSTED),
 133        REG16(0x3a8),
 134        REG16(0x28c),
 135        REG16(0x288),
 136        REG16(0x284),
 137        REG16(0x280),
 138        REG16(0x27c),
 139        REG16(0x278),
 140        REG16(0x274),
 141        REG16(0x270),
 142
 143        NOP(13),
 144        LRI(1, POSTED),
 145        REG16(0x200),
 146
 147        NOP(13),
 148        LRI(44, POSTED),
 149        REG(0x028),
 150        REG(0x09c),
 151        REG(0x0c0),
 152        REG(0x178),
 153        REG(0x17c),
 154        REG16(0x358),
 155        REG(0x170),
 156        REG(0x150),
 157        REG(0x154),
 158        REG(0x158),
 159        REG16(0x41c),
 160        REG16(0x600),
 161        REG16(0x604),
 162        REG16(0x608),
 163        REG16(0x60c),
 164        REG16(0x610),
 165        REG16(0x614),
 166        REG16(0x618),
 167        REG16(0x61c),
 168        REG16(0x620),
 169        REG16(0x624),
 170        REG16(0x628),
 171        REG16(0x62c),
 172        REG16(0x630),
 173        REG16(0x634),
 174        REG16(0x638),
 175        REG16(0x63c),
 176        REG16(0x640),
 177        REG16(0x644),
 178        REG16(0x648),
 179        REG16(0x64c),
 180        REG16(0x650),
 181        REG16(0x654),
 182        REG16(0x658),
 183        REG16(0x65c),
 184        REG16(0x660),
 185        REG16(0x664),
 186        REG16(0x668),
 187        REG16(0x66c),
 188        REG16(0x670),
 189        REG16(0x674),
 190        REG16(0x678),
 191        REG16(0x67c),
 192        REG(0x068),
 193
 194        END
 195};
 196
 197static const u8 gen12_xcs_offsets[] = {
 198        NOP(1),
 199        LRI(13, POSTED),
 200        REG16(0x244),
 201        REG(0x034),
 202        REG(0x030),
 203        REG(0x038),
 204        REG(0x03c),
 205        REG(0x168),
 206        REG(0x140),
 207        REG(0x110),
 208        REG(0x1c0),
 209        REG(0x1c4),
 210        REG(0x1c8),
 211        REG(0x180),
 212        REG16(0x2b4),
 213
 214        NOP(5),
 215        LRI(9, POSTED),
 216        REG16(0x3a8),
 217        REG16(0x28c),
 218        REG16(0x288),
 219        REG16(0x284),
 220        REG16(0x280),
 221        REG16(0x27c),
 222        REG16(0x278),
 223        REG16(0x274),
 224        REG16(0x270),
 225
 226        END
 227};
 228
 229static const u8 gen8_rcs_offsets[] = {
 230        NOP(1),
 231        LRI(14, POSTED),
 232        REG16(0x244),
 233        REG(0x034),
 234        REG(0x030),
 235        REG(0x038),
 236        REG(0x03c),
 237        REG(0x168),
 238        REG(0x140),
 239        REG(0x110),
 240        REG(0x11c),
 241        REG(0x114),
 242        REG(0x118),
 243        REG(0x1c0),
 244        REG(0x1c4),
 245        REG(0x1c8),
 246
 247        NOP(3),
 248        LRI(9, POSTED),
 249        REG16(0x3a8),
 250        REG16(0x28c),
 251        REG16(0x288),
 252        REG16(0x284),
 253        REG16(0x280),
 254        REG16(0x27c),
 255        REG16(0x278),
 256        REG16(0x274),
 257        REG16(0x270),
 258
 259        NOP(13),
 260        LRI(1, 0),
 261        REG(0x0c8),
 262
 263        END
 264};
 265
 266static const u8 gen9_rcs_offsets[] = {
 267        NOP(1),
 268        LRI(14, POSTED),
 269        REG16(0x244),
 270        REG(0x34),
 271        REG(0x30),
 272        REG(0x38),
 273        REG(0x3c),
 274        REG(0x168),
 275        REG(0x140),
 276        REG(0x110),
 277        REG(0x11c),
 278        REG(0x114),
 279        REG(0x118),
 280        REG(0x1c0),
 281        REG(0x1c4),
 282        REG(0x1c8),
 283
 284        NOP(3),
 285        LRI(9, POSTED),
 286        REG16(0x3a8),
 287        REG16(0x28c),
 288        REG16(0x288),
 289        REG16(0x284),
 290        REG16(0x280),
 291        REG16(0x27c),
 292        REG16(0x278),
 293        REG16(0x274),
 294        REG16(0x270),
 295
 296        NOP(13),
 297        LRI(1, 0),
 298        REG(0xc8),
 299
 300        NOP(13),
 301        LRI(44, POSTED),
 302        REG(0x28),
 303        REG(0x9c),
 304        REG(0xc0),
 305        REG(0x178),
 306        REG(0x17c),
 307        REG16(0x358),
 308        REG(0x170),
 309        REG(0x150),
 310        REG(0x154),
 311        REG(0x158),
 312        REG16(0x41c),
 313        REG16(0x600),
 314        REG16(0x604),
 315        REG16(0x608),
 316        REG16(0x60c),
 317        REG16(0x610),
 318        REG16(0x614),
 319        REG16(0x618),
 320        REG16(0x61c),
 321        REG16(0x620),
 322        REG16(0x624),
 323        REG16(0x628),
 324        REG16(0x62c),
 325        REG16(0x630),
 326        REG16(0x634),
 327        REG16(0x638),
 328        REG16(0x63c),
 329        REG16(0x640),
 330        REG16(0x644),
 331        REG16(0x648),
 332        REG16(0x64c),
 333        REG16(0x650),
 334        REG16(0x654),
 335        REG16(0x658),
 336        REG16(0x65c),
 337        REG16(0x660),
 338        REG16(0x664),
 339        REG16(0x668),
 340        REG16(0x66c),
 341        REG16(0x670),
 342        REG16(0x674),
 343        REG16(0x678),
 344        REG16(0x67c),
 345        REG(0x68),
 346
 347        END
 348};
 349
 350static const u8 gen11_rcs_offsets[] = {
 351        NOP(1),
 352        LRI(15, POSTED),
 353        REG16(0x244),
 354        REG(0x034),
 355        REG(0x030),
 356        REG(0x038),
 357        REG(0x03c),
 358        REG(0x168),
 359        REG(0x140),
 360        REG(0x110),
 361        REG(0x11c),
 362        REG(0x114),
 363        REG(0x118),
 364        REG(0x1c0),
 365        REG(0x1c4),
 366        REG(0x1c8),
 367        REG(0x180),
 368
 369        NOP(1),
 370        LRI(9, POSTED),
 371        REG16(0x3a8),
 372        REG16(0x28c),
 373        REG16(0x288),
 374        REG16(0x284),
 375        REG16(0x280),
 376        REG16(0x27c),
 377        REG16(0x278),
 378        REG16(0x274),
 379        REG16(0x270),
 380
 381        LRI(1, POSTED),
 382        REG(0x1b0),
 383
 384        NOP(10),
 385        LRI(1, 0),
 386        REG(0x0c8),
 387
 388        END
 389};
 390
 391static const u8 gen12_rcs_offsets[] = {
 392        NOP(1),
 393        LRI(13, POSTED),
 394        REG16(0x244),
 395        REG(0x034),
 396        REG(0x030),
 397        REG(0x038),
 398        REG(0x03c),
 399        REG(0x168),
 400        REG(0x140),
 401        REG(0x110),
 402        REG(0x1c0),
 403        REG(0x1c4),
 404        REG(0x1c8),
 405        REG(0x180),
 406        REG16(0x2b4),
 407
 408        NOP(5),
 409        LRI(9, POSTED),
 410        REG16(0x3a8),
 411        REG16(0x28c),
 412        REG16(0x288),
 413        REG16(0x284),
 414        REG16(0x280),
 415        REG16(0x27c),
 416        REG16(0x278),
 417        REG16(0x274),
 418        REG16(0x270),
 419
 420        LRI(3, POSTED),
 421        REG(0x1b0),
 422        REG16(0x5a8),
 423        REG16(0x5ac),
 424
 425        NOP(6),
 426        LRI(1, 0),
 427        REG(0x0c8),
 428        NOP(3 + 9 + 1),
 429
 430        LRI(51, POSTED),
 431        REG16(0x588),
 432        REG16(0x588),
 433        REG16(0x588),
 434        REG16(0x588),
 435        REG16(0x588),
 436        REG16(0x588),
 437        REG(0x028),
 438        REG(0x09c),
 439        REG(0x0c0),
 440        REG(0x178),
 441        REG(0x17c),
 442        REG16(0x358),
 443        REG(0x170),
 444        REG(0x150),
 445        REG(0x154),
 446        REG(0x158),
 447        REG16(0x41c),
 448        REG16(0x600),
 449        REG16(0x604),
 450        REG16(0x608),
 451        REG16(0x60c),
 452        REG16(0x610),
 453        REG16(0x614),
 454        REG16(0x618),
 455        REG16(0x61c),
 456        REG16(0x620),
 457        REG16(0x624),
 458        REG16(0x628),
 459        REG16(0x62c),
 460        REG16(0x630),
 461        REG16(0x634),
 462        REG16(0x638),
 463        REG16(0x63c),
 464        REG16(0x640),
 465        REG16(0x644),
 466        REG16(0x648),
 467        REG16(0x64c),
 468        REG16(0x650),
 469        REG16(0x654),
 470        REG16(0x658),
 471        REG16(0x65c),
 472        REG16(0x660),
 473        REG16(0x664),
 474        REG16(0x668),
 475        REG16(0x66c),
 476        REG16(0x670),
 477        REG16(0x674),
 478        REG16(0x678),
 479        REG16(0x67c),
 480        REG(0x068),
 481        REG(0x084),
 482        NOP(1),
 483
 484        END
 485};
 486
 487static const u8 xehp_rcs_offsets[] = {
 488        NOP(1),
 489        LRI(13, POSTED),
 490        REG16(0x244),
 491        REG(0x034),
 492        REG(0x030),
 493        REG(0x038),
 494        REG(0x03c),
 495        REG(0x168),
 496        REG(0x140),
 497        REG(0x110),
 498        REG(0x1c0),
 499        REG(0x1c4),
 500        REG(0x1c8),
 501        REG(0x180),
 502        REG16(0x2b4),
 503
 504        NOP(5),
 505        LRI(9, POSTED),
 506        REG16(0x3a8),
 507        REG16(0x28c),
 508        REG16(0x288),
 509        REG16(0x284),
 510        REG16(0x280),
 511        REG16(0x27c),
 512        REG16(0x278),
 513        REG16(0x274),
 514        REG16(0x270),
 515
 516        LRI(3, POSTED),
 517        REG(0x1b0),
 518        REG16(0x5a8),
 519        REG16(0x5ac),
 520
 521        NOP(6),
 522        LRI(1, 0),
 523        REG(0x0c8),
 524
 525        END
 526};
 527
 528#undef END
 529#undef REG16
 530#undef REG
 531#undef LRI
 532#undef NOP
 533
 534static const u8 *reg_offsets(const struct intel_engine_cs *engine)
 535{
 536        /*
 537         * The gen12+ lists only have the registers we program in the basic
 538         * default state. We rely on the context image using relative
 539         * addressing to automatic fixup the register state between the
 540         * physical engines for virtual engine.
 541         */
 542        GEM_BUG_ON(GRAPHICS_VER(engine->i915) >= 12 &&
 543                   !intel_engine_has_relative_mmio(engine));
 544
 545        if (engine->class == RENDER_CLASS) {
 546                if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
 547                        return xehp_rcs_offsets;
 548                else if (GRAPHICS_VER(engine->i915) >= 12)
 549                        return gen12_rcs_offsets;
 550                else if (GRAPHICS_VER(engine->i915) >= 11)
 551                        return gen11_rcs_offsets;
 552                else if (GRAPHICS_VER(engine->i915) >= 9)
 553                        return gen9_rcs_offsets;
 554                else
 555                        return gen8_rcs_offsets;
 556        } else {
 557                if (GRAPHICS_VER(engine->i915) >= 12)
 558                        return gen12_xcs_offsets;
 559                else if (GRAPHICS_VER(engine->i915) >= 9)
 560                        return gen9_xcs_offsets;
 561                else
 562                        return gen8_xcs_offsets;
 563        }
 564}
 565
 566static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
 567{
 568        if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
 569                return 0x70;
 570        else if (GRAPHICS_VER(engine->i915) >= 12)
 571                return 0x60;
 572        else if (GRAPHICS_VER(engine->i915) >= 9)
 573                return 0x54;
 574        else if (engine->class == RENDER_CLASS)
 575                return 0x58;
 576        else
 577                return -1;
 578}
 579
 580static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
 581{
 582        if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
 583                return 0x84;
 584        else if (GRAPHICS_VER(engine->i915) >= 12)
 585                return 0x74;
 586        else if (GRAPHICS_VER(engine->i915) >= 9)
 587                return 0x68;
 588        else if (engine->class == RENDER_CLASS)
 589                return 0xd8;
 590        else
 591                return -1;
 592}
 593
 594static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
 595{
 596        if (GRAPHICS_VER(engine->i915) >= 12)
 597                return 0x12;
 598        else if (GRAPHICS_VER(engine->i915) >= 9 || engine->class == RENDER_CLASS)
 599                return 0x18;
 600        else
 601                return -1;
 602}
 603
 604static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
 605{
 606        int x;
 607
 608        x = lrc_ring_wa_bb_per_ctx(engine);
 609        if (x < 0)
 610                return x;
 611
 612        return x + 2;
 613}
 614
 615static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
 616{
 617        int x;
 618
 619        x = lrc_ring_indirect_ptr(engine);
 620        if (x < 0)
 621                return x;
 622
 623        return x + 2;
 624}
 625
 626static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
 627{
 628
 629        if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
 630                /*
 631                 * Note that the CSFE context has a dummy slot for CMD_BUF_CCTL
 632                 * simply to match the RCS context image layout.
 633                 */
 634                return 0xc6;
 635        else if (engine->class != RENDER_CLASS)
 636                return -1;
 637        else if (GRAPHICS_VER(engine->i915) >= 12)
 638                return 0xb6;
 639        else if (GRAPHICS_VER(engine->i915) >= 11)
 640                return 0xaa;
 641        else
 642                return -1;
 643}
 644
 645static u32
 646lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
 647{
 648        switch (GRAPHICS_VER(engine->i915)) {
 649        default:
 650                MISSING_CASE(GRAPHICS_VER(engine->i915));
 651                fallthrough;
 652        case 12:
 653                return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 654        case 11:
 655                return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 656        case 9:
 657                return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 658        case 8:
 659                return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 660        }
 661}
 662
 663static void
 664lrc_setup_indirect_ctx(u32 *regs,
 665                       const struct intel_engine_cs *engine,
 666                       u32 ctx_bb_ggtt_addr,
 667                       u32 size)
 668{
 669        GEM_BUG_ON(!size);
 670        GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
 671        GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
 672        regs[lrc_ring_indirect_ptr(engine) + 1] =
 673                ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
 674
 675        GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
 676        regs[lrc_ring_indirect_offset(engine) + 1] =
 677                lrc_ring_indirect_offset_default(engine) << 6;
 678}
 679
 680static void init_common_regs(u32 * const regs,
 681                             const struct intel_context *ce,
 682                             const struct intel_engine_cs *engine,
 683                             bool inhibit)
 684{
 685        u32 ctl;
 686
 687        ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
 688        ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
 689        if (inhibit)
 690                ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
 691        if (GRAPHICS_VER(engine->i915) < 11)
 692                ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
 693                                           CTX_CTRL_RS_CTX_ENABLE);
 694        regs[CTX_CONTEXT_CONTROL] = ctl;
 695
 696        regs[CTX_TIMESTAMP] = ce->runtime.last;
 697}
 698
 699static void init_wa_bb_regs(u32 * const regs,
 700                            const struct intel_engine_cs *engine)
 701{
 702        const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
 703
 704        if (wa_ctx->per_ctx.size) {
 705                const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
 706
 707                GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
 708                regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
 709                        (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
 710        }
 711
 712        if (wa_ctx->indirect_ctx.size) {
 713                lrc_setup_indirect_ctx(regs, engine,
 714                                       i915_ggtt_offset(wa_ctx->vma) +
 715                                       wa_ctx->indirect_ctx.offset,
 716                                       wa_ctx->indirect_ctx.size);
 717        }
 718}
 719
 720static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt)
 721{
 722        if (i915_vm_is_4lvl(&ppgtt->vm)) {
 723                /* 64b PPGTT (48bit canonical)
 724                 * PDP0_DESCRIPTOR contains the base address to PML4 and
 725                 * other PDP Descriptors are ignored.
 726                 */
 727                ASSIGN_CTX_PML4(ppgtt, regs);
 728        } else {
 729                ASSIGN_CTX_PDP(ppgtt, regs, 3);
 730                ASSIGN_CTX_PDP(ppgtt, regs, 2);
 731                ASSIGN_CTX_PDP(ppgtt, regs, 1);
 732                ASSIGN_CTX_PDP(ppgtt, regs, 0);
 733        }
 734}
 735
 736static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
 737{
 738        if (i915_is_ggtt(vm))
 739                return i915_vm_to_ggtt(vm)->alias;
 740        else
 741                return i915_vm_to_ppgtt(vm);
 742}
 743
 744static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
 745{
 746        int x;
 747
 748        x = lrc_ring_mi_mode(engine);
 749        if (x != -1) {
 750                regs[x + 1] &= ~STOP_RING;
 751                regs[x + 1] |= STOP_RING << 16;
 752        }
 753}
 754
 755static void __lrc_init_regs(u32 *regs,
 756                            const struct intel_context *ce,
 757                            const struct intel_engine_cs *engine,
 758                            bool inhibit)
 759{
 760        /*
 761         * A context is actually a big batch buffer with several
 762         * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
 763         * values we are setting here are only for the first context restore:
 764         * on a subsequent save, the GPU will recreate this batchbuffer with new
 765         * values (including all the missing MI_LOAD_REGISTER_IMM commands that
 766         * we are not initializing here).
 767         *
 768         * Must keep consistent with virtual_update_register_offsets().
 769         */
 770
 771        if (inhibit)
 772                memset(regs, 0, PAGE_SIZE);
 773
 774        set_offsets(regs, reg_offsets(engine), engine, inhibit);
 775
 776        init_common_regs(regs, ce, engine, inhibit);
 777        init_ppgtt_regs(regs, vm_alias(ce->vm));
 778
 779        init_wa_bb_regs(regs, engine);
 780
 781        __reset_stop_ring(regs, engine);
 782}
 783
 784void lrc_init_regs(const struct intel_context *ce,
 785                   const struct intel_engine_cs *engine,
 786                   bool inhibit)
 787{
 788        __lrc_init_regs(ce->lrc_reg_state, ce, engine, inhibit);
 789}
 790
 791void lrc_reset_regs(const struct intel_context *ce,
 792                    const struct intel_engine_cs *engine)
 793{
 794        __reset_stop_ring(ce->lrc_reg_state, engine);
 795}
 796
 797static void
 798set_redzone(void *vaddr, const struct intel_engine_cs *engine)
 799{
 800        if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 801                return;
 802
 803        vaddr += engine->context_size;
 804
 805        memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
 806}
 807
 808static void
 809check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
 810{
 811        if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 812                return;
 813
 814        vaddr += engine->context_size;
 815
 816        if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
 817                drm_err_once(&engine->i915->drm,
 818                             "%s context redzone overwritten!\n",
 819                             engine->name);
 820}
 821
 822void lrc_init_state(struct intel_context *ce,
 823                    struct intel_engine_cs *engine,
 824                    void *state)
 825{
 826        bool inhibit = true;
 827
 828        set_redzone(state, engine);
 829
 830        if (engine->default_state) {
 831                shmem_read(engine->default_state, 0,
 832                           state, engine->context_size);
 833                __set_bit(CONTEXT_VALID_BIT, &ce->flags);
 834                inhibit = false;
 835        }
 836
 837        /* Clear the ppHWSP (inc. per-context counters) */
 838        memset(state, 0, PAGE_SIZE);
 839
 840        /*
 841         * The second page of the context object contains some registers which
 842         * must be set up prior to the first execution.
 843         */
 844        __lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit);
 845}
 846
 847static struct i915_vma *
 848__lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
 849{
 850        struct drm_i915_gem_object *obj;
 851        struct i915_vma *vma;
 852        u32 context_size;
 853
 854        context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
 855
 856        if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 857                context_size += I915_GTT_PAGE_SIZE; /* for redzone */
 858
 859        if (GRAPHICS_VER(engine->i915) == 12) {
 860                ce->wa_bb_page = context_size / PAGE_SIZE;
 861                context_size += PAGE_SIZE;
 862        }
 863
 864        obj = i915_gem_object_create_lmem(engine->i915, context_size, 0);
 865        if (IS_ERR(obj))
 866                obj = i915_gem_object_create_shmem(engine->i915, context_size);
 867        if (IS_ERR(obj))
 868                return ERR_CAST(obj);
 869
 870        vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
 871        if (IS_ERR(vma)) {
 872                i915_gem_object_put(obj);
 873                return vma;
 874        }
 875
 876        return vma;
 877}
 878
 879static struct intel_timeline *
 880pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine)
 881{
 882        struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
 883
 884        return intel_timeline_create_from_engine(engine, page_unmask_bits(tl));
 885}
 886
 887int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
 888{
 889        struct intel_ring *ring;
 890        struct i915_vma *vma;
 891        int err;
 892
 893        GEM_BUG_ON(ce->state);
 894
 895        vma = __lrc_alloc_state(ce, engine);
 896        if (IS_ERR(vma))
 897                return PTR_ERR(vma);
 898
 899        ring = intel_engine_create_ring(engine, ce->ring_size);
 900        if (IS_ERR(ring)) {
 901                err = PTR_ERR(ring);
 902                goto err_vma;
 903        }
 904
 905        if (!page_mask_bits(ce->timeline)) {
 906                struct intel_timeline *tl;
 907
 908                /*
 909                 * Use the static global HWSP for the kernel context, and
 910                 * a dynamically allocated cacheline for everyone else.
 911                 */
 912                if (unlikely(ce->timeline))
 913                        tl = pinned_timeline(ce, engine);
 914                else
 915                        tl = intel_timeline_create(engine->gt);
 916                if (IS_ERR(tl)) {
 917                        err = PTR_ERR(tl);
 918                        goto err_ring;
 919                }
 920
 921                ce->timeline = tl;
 922        }
 923
 924        ce->ring = ring;
 925        ce->state = vma;
 926
 927        return 0;
 928
 929err_ring:
 930        intel_ring_put(ring);
 931err_vma:
 932        i915_vma_put(vma);
 933        return err;
 934}
 935
 936void lrc_reset(struct intel_context *ce)
 937{
 938        GEM_BUG_ON(!intel_context_is_pinned(ce));
 939
 940        intel_ring_reset(ce->ring, ce->ring->emit);
 941
 942        /* Scrub away the garbage */
 943        lrc_init_regs(ce, ce->engine, true);
 944        ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail);
 945}
 946
 947int
 948lrc_pre_pin(struct intel_context *ce,
 949            struct intel_engine_cs *engine,
 950            struct i915_gem_ww_ctx *ww,
 951            void **vaddr)
 952{
 953        GEM_BUG_ON(!ce->state);
 954        GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
 955
 956        *vaddr = i915_gem_object_pin_map(ce->state->obj,
 957                                         i915_coherent_map_type(ce->engine->i915,
 958                                                                ce->state->obj,
 959                                                                false) |
 960                                         I915_MAP_OVERRIDE);
 961
 962        return PTR_ERR_OR_ZERO(*vaddr);
 963}
 964
 965int
 966lrc_pin(struct intel_context *ce,
 967        struct intel_engine_cs *engine,
 968        void *vaddr)
 969{
 970        ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
 971
 972        if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags))
 973                lrc_init_state(ce, engine, vaddr);
 974
 975        ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail);
 976        return 0;
 977}
 978
 979void lrc_unpin(struct intel_context *ce)
 980{
 981        check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
 982                      ce->engine);
 983}
 984
 985void lrc_post_unpin(struct intel_context *ce)
 986{
 987        i915_gem_object_unpin_map(ce->state->obj);
 988}
 989
 990void lrc_fini(struct intel_context *ce)
 991{
 992        if (!ce->state)
 993                return;
 994
 995        intel_ring_put(fetch_and_zero(&ce->ring));
 996        i915_vma_put(fetch_and_zero(&ce->state));
 997}
 998
 999void lrc_destroy(struct kref *kref)
1000{
1001        struct intel_context *ce = container_of(kref, typeof(*ce), ref);
1002
1003        GEM_BUG_ON(!i915_active_is_idle(&ce->active));
1004        GEM_BUG_ON(intel_context_is_pinned(ce));
1005
1006        lrc_fini(ce);
1007
1008        intel_context_fini(ce);
1009        intel_context_free(ce);
1010}
1011
1012static u32 *
1013gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
1014{
1015        *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1016                MI_SRM_LRM_GLOBAL_GTT |
1017                MI_LRI_LRM_CS_MMIO;
1018        *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1019        *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1020                CTX_TIMESTAMP * sizeof(u32);
1021        *cs++ = 0;
1022
1023        *cs++ = MI_LOAD_REGISTER_REG |
1024                MI_LRR_SOURCE_CS_MMIO |
1025                MI_LRI_LRM_CS_MMIO;
1026        *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1027        *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1028
1029        *cs++ = MI_LOAD_REGISTER_REG |
1030                MI_LRR_SOURCE_CS_MMIO |
1031                MI_LRI_LRM_CS_MMIO;
1032        *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1033        *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1034
1035        return cs;
1036}
1037
1038static u32 *
1039gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
1040{
1041        GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
1042
1043        *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1044                MI_SRM_LRM_GLOBAL_GTT |
1045                MI_LRI_LRM_CS_MMIO;
1046        *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1047        *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1048                (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
1049        *cs++ = 0;
1050
1051        return cs;
1052}
1053
1054static u32 *
1055gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
1056{
1057        GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
1058
1059        *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1060                MI_SRM_LRM_GLOBAL_GTT |
1061                MI_LRI_LRM_CS_MMIO;
1062        *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1063        *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1064                (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
1065        *cs++ = 0;
1066
1067        *cs++ = MI_LOAD_REGISTER_REG |
1068                MI_LRR_SOURCE_CS_MMIO |
1069                MI_LRI_LRM_CS_MMIO;
1070        *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1071        *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
1072
1073        return cs;
1074}
1075
1076static u32 *
1077gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
1078{
1079        cs = gen12_emit_timestamp_wa(ce, cs);
1080        cs = gen12_emit_cmd_buf_wa(ce, cs);
1081        cs = gen12_emit_restore_scratch(ce, cs);
1082
1083        return cs;
1084}
1085
1086static u32 *
1087gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
1088{
1089        cs = gen12_emit_timestamp_wa(ce, cs);
1090        cs = gen12_emit_restore_scratch(ce, cs);
1091
1092        return cs;
1093}
1094
1095static u32 context_wa_bb_offset(const struct intel_context *ce)
1096{
1097        return PAGE_SIZE * ce->wa_bb_page;
1098}
1099
1100static u32 *context_indirect_bb(const struct intel_context *ce)
1101{
1102        void *ptr;
1103
1104        GEM_BUG_ON(!ce->wa_bb_page);
1105
1106        ptr = ce->lrc_reg_state;
1107        ptr -= LRC_STATE_OFFSET; /* back to start of context image */
1108        ptr += context_wa_bb_offset(ce);
1109
1110        return ptr;
1111}
1112
1113static void
1114setup_indirect_ctx_bb(const struct intel_context *ce,
1115                      const struct intel_engine_cs *engine,
1116                      u32 *(*emit)(const struct intel_context *, u32 *))
1117{
1118        u32 * const start = context_indirect_bb(ce);
1119        u32 *cs;
1120
1121        cs = emit(ce, start);
1122        GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
1123        while ((unsigned long)cs % CACHELINE_BYTES)
1124                *cs++ = MI_NOOP;
1125
1126        lrc_setup_indirect_ctx(ce->lrc_reg_state, engine,
1127                               i915_ggtt_offset(ce->state) +
1128                               context_wa_bb_offset(ce),
1129                               (cs - start) * sizeof(*cs));
1130}
1131
1132/*
1133 * The context descriptor encodes various attributes of a context,
1134 * including its GTT address and some flags. Because it's fairly
1135 * expensive to calculate, we'll just do it once and cache the result,
1136 * which remains valid until the context is unpinned.
1137 *
1138 * This is what a descriptor looks like, from LSB to MSB::
1139 *
1140 *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
1141 *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
1142 *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
1143 *      bits 53-54:    mbz, reserved for use by hardware
1144 *      bits 55-63:    group ID, currently unused and set to 0
1145 *
1146 * Starting from Gen11, the upper dword of the descriptor has a new format:
1147 *
1148 *      bits 32-36:    reserved
1149 *      bits 37-47:    SW context ID
1150 *      bits 48:53:    engine instance
1151 *      bit 54:        mbz, reserved for use by hardware
1152 *      bits 55-60:    SW counter
1153 *      bits 61-63:    engine class
1154 *
1155 * On Xe_HP, the upper dword of the descriptor has a new format:
1156 *
1157 *      bits 32-37:    virtual function number
1158 *      bit 38:        mbz, reserved for use by hardware
1159 *      bits 39-54:    SW context ID
1160 *      bits 55-57:    reserved
1161 *      bits 58-63:    SW counter
1162 *
1163 * engine info, SW context ID and SW counter need to form a unique number
1164 * (Context ID) per lrc.
1165 */
1166static u32 lrc_descriptor(const struct intel_context *ce)
1167{
1168        u32 desc;
1169
1170        desc = INTEL_LEGACY_32B_CONTEXT;
1171        if (i915_vm_is_4lvl(ce->vm))
1172                desc = INTEL_LEGACY_64B_CONTEXT;
1173        desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
1174
1175        desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
1176        if (GRAPHICS_VER(ce->vm->i915) == 8)
1177                desc |= GEN8_CTX_L3LLC_COHERENT;
1178
1179        return i915_ggtt_offset(ce->state) | desc;
1180}
1181
1182u32 lrc_update_regs(const struct intel_context *ce,
1183                    const struct intel_engine_cs *engine,
1184                    u32 head)
1185{
1186        struct intel_ring *ring = ce->ring;
1187        u32 *regs = ce->lrc_reg_state;
1188
1189        GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
1190        GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
1191
1192        regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1193        regs[CTX_RING_HEAD] = head;
1194        regs[CTX_RING_TAIL] = ring->tail;
1195        regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1196
1197        /* RPCS */
1198        if (engine->class == RENDER_CLASS) {
1199                regs[CTX_R_PWR_CLK_STATE] =
1200                        intel_sseu_make_rpcs(engine->gt, &ce->sseu);
1201
1202                i915_oa_init_reg_state(ce, engine);
1203        }
1204
1205        if (ce->wa_bb_page) {
1206                u32 *(*fn)(const struct intel_context *ce, u32 *cs);
1207
1208                fn = gen12_emit_indirect_ctx_xcs;
1209                if (ce->engine->class == RENDER_CLASS)
1210                        fn = gen12_emit_indirect_ctx_rcs;
1211
1212                /* Mutually exclusive wrt to global indirect bb */
1213                GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
1214                setup_indirect_ctx_bb(ce, engine, fn);
1215        }
1216
1217        return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE;
1218}
1219
1220void lrc_update_offsets(struct intel_context *ce,
1221                        struct intel_engine_cs *engine)
1222{
1223        set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false);
1224}
1225
1226void lrc_check_regs(const struct intel_context *ce,
1227                    const struct intel_engine_cs *engine,
1228                    const char *when)
1229{
1230        const struct intel_ring *ring = ce->ring;
1231        u32 *regs = ce->lrc_reg_state;
1232        bool valid = true;
1233        int x;
1234
1235        if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1236                pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1237                       engine->name,
1238                       regs[CTX_RING_START],
1239                       i915_ggtt_offset(ring->vma));
1240                regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1241                valid = false;
1242        }
1243
1244        if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1245            (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1246                pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1247                       engine->name,
1248                       regs[CTX_RING_CTL],
1249                       (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1250                regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1251                valid = false;
1252        }
1253
1254        x = lrc_ring_mi_mode(engine);
1255        if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1256                pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1257                       engine->name, regs[x + 1]);
1258                regs[x + 1] &= ~STOP_RING;
1259                regs[x + 1] |= STOP_RING << 16;
1260                valid = false;
1261        }
1262
1263        WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
1264}
1265
1266/*
1267 * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
1268 * PIPE_CONTROL instruction. This is required for the flush to happen correctly
1269 * but there is a slight complication as this is applied in WA batch where the
1270 * values are only initialized once so we cannot take register value at the
1271 * beginning and reuse it further; hence we save its value to memory, upload a
1272 * constant value with bit21 set and then we restore it back with the saved value.
1273 * To simplify the WA, a constant value is formed by using the default value
1274 * of this register. This shouldn't be a problem because we are only modifying
1275 * it for a short period and this batch in non-premptible. We can ofcourse
1276 * use additional instructions that read the actual value of the register
1277 * at that time and set our bit of interest but it makes the WA complicated.
1278 *
1279 * This WA is also required for Gen9 so extracting as a function avoids
1280 * code duplication.
1281 */
1282static u32 *
1283gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
1284{
1285        /* NB no one else is allowed to scribble over scratch + 256! */
1286        *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1287        *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1288        *batch++ = intel_gt_scratch_offset(engine->gt,
1289                                           INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1290        *batch++ = 0;
1291
1292        *batch++ = MI_LOAD_REGISTER_IMM(1);
1293        *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1294        *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
1295
1296        batch = gen8_emit_pipe_control(batch,
1297                                       PIPE_CONTROL_CS_STALL |
1298                                       PIPE_CONTROL_DC_FLUSH_ENABLE,
1299                                       0);
1300
1301        *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1302        *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1303        *batch++ = intel_gt_scratch_offset(engine->gt,
1304                                           INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1305        *batch++ = 0;
1306
1307        return batch;
1308}
1309
1310/*
1311 * Typically we only have one indirect_ctx and per_ctx batch buffer which are
1312 * initialized at the beginning and shared across all contexts but this field
1313 * helps us to have multiple batches at different offsets and select them based
1314 * on a criteria. At the moment this batch always start at the beginning of the page
1315 * and at this point we don't have multiple wa_ctx batch buffers.
1316 *
1317 * The number of WA applied are not known at the beginning; we use this field
1318 * to return the no of DWORDS written.
1319 *
1320 * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
1321 * so it adds NOOPs as padding to make it cacheline aligned.
1322 * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
1323 * makes a complete batch buffer.
1324 */
1325static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1326{
1327        /* WaDisableCtxRestoreArbitration:bdw,chv */
1328        *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1329
1330        /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
1331        if (IS_BROADWELL(engine->i915))
1332                batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1333
1334        /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
1335        /* Actual scratch location is at 128 bytes offset */
1336        batch = gen8_emit_pipe_control(batch,
1337                                       PIPE_CONTROL_FLUSH_L3 |
1338                                       PIPE_CONTROL_STORE_DATA_INDEX |
1339                                       PIPE_CONTROL_CS_STALL |
1340                                       PIPE_CONTROL_QW_WRITE,
1341                                       LRC_PPHWSP_SCRATCH_ADDR);
1342
1343        *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1344
1345        /* Pad to end of cacheline */
1346        while ((unsigned long)batch % CACHELINE_BYTES)
1347                *batch++ = MI_NOOP;
1348
1349        /*
1350         * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
1351         * execution depends on the length specified in terms of cache lines
1352         * in the register CTX_RCS_INDIRECT_CTX
1353         */
1354
1355        return batch;
1356}
1357
1358struct lri {
1359        i915_reg_t reg;
1360        u32 value;
1361};
1362
1363static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
1364{
1365        GEM_BUG_ON(!count || count > 63);
1366
1367        *batch++ = MI_LOAD_REGISTER_IMM(count);
1368        do {
1369                *batch++ = i915_mmio_reg_offset(lri->reg);
1370                *batch++ = lri->value;
1371        } while (lri++, --count);
1372        *batch++ = MI_NOOP;
1373
1374        return batch;
1375}
1376
1377static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1378{
1379        static const struct lri lri[] = {
1380                /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
1381                {
1382                        COMMON_SLICE_CHICKEN2,
1383                        __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
1384                                       0),
1385                },
1386
1387                /* BSpec: 11391 */
1388                {
1389                        FF_SLICE_CHICKEN,
1390                        __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
1391                                       FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
1392                },
1393
1394                /* BSpec: 11299 */
1395                {
1396                        _3D_CHICKEN3,
1397                        __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
1398                                       _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
1399                }
1400        };
1401
1402        *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1403
1404        /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
1405        batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1406
1407        /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
1408        batch = gen8_emit_pipe_control(batch,
1409                                       PIPE_CONTROL_FLUSH_L3 |
1410                                       PIPE_CONTROL_STORE_DATA_INDEX |
1411                                       PIPE_CONTROL_CS_STALL |
1412                                       PIPE_CONTROL_QW_WRITE,
1413                                       LRC_PPHWSP_SCRATCH_ADDR);
1414
1415        batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
1416
1417        /* WaMediaPoolStateCmdInWABB:bxt,glk */
1418        if (HAS_POOLED_EU(engine->i915)) {
1419                /*
1420                 * EU pool configuration is setup along with golden context
1421                 * during context initialization. This value depends on
1422                 * device type (2x6 or 3x6) and needs to be updated based
1423                 * on which subslice is disabled especially for 2x6
1424                 * devices, however it is safe to load default
1425                 * configuration of 3x6 device instead of masking off
1426                 * corresponding bits because HW ignores bits of a disabled
1427                 * subslice and drops down to appropriate config. Please
1428                 * see render_state_setup() in i915_gem_render_state.c for
1429                 * possible configurations, to avoid duplication they are
1430                 * not shown here again.
1431                 */
1432                *batch++ = GEN9_MEDIA_POOL_STATE;
1433                *batch++ = GEN9_MEDIA_POOL_ENABLE;
1434                *batch++ = 0x00777000;
1435                *batch++ = 0;
1436                *batch++ = 0;
1437                *batch++ = 0;
1438        }
1439
1440        *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1441
1442        /* Pad to end of cacheline */
1443        while ((unsigned long)batch % CACHELINE_BYTES)
1444                *batch++ = MI_NOOP;
1445
1446        return batch;
1447}
1448
1449#define CTX_WA_BB_SIZE (PAGE_SIZE)
1450
1451static int lrc_create_wa_ctx(struct intel_engine_cs *engine)
1452{
1453        struct drm_i915_gem_object *obj;
1454        struct i915_vma *vma;
1455        int err;
1456
1457        obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE);
1458        if (IS_ERR(obj))
1459                return PTR_ERR(obj);
1460
1461        vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
1462        if (IS_ERR(vma)) {
1463                err = PTR_ERR(vma);
1464                goto err;
1465        }
1466
1467        engine->wa_ctx.vma = vma;
1468        return 0;
1469
1470err:
1471        i915_gem_object_put(obj);
1472        return err;
1473}
1474
1475void lrc_fini_wa_ctx(struct intel_engine_cs *engine)
1476{
1477        i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
1478}
1479
1480typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
1481
1482void lrc_init_wa_ctx(struct intel_engine_cs *engine)
1483{
1484        struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
1485        struct i915_wa_ctx_bb *wa_bb[] = {
1486                &wa_ctx->indirect_ctx, &wa_ctx->per_ctx
1487        };
1488        wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)];
1489        struct i915_gem_ww_ctx ww;
1490        void *batch, *batch_ptr;
1491        unsigned int i;
1492        int err;
1493
1494        if (engine->class != RENDER_CLASS)
1495                return;
1496
1497        switch (GRAPHICS_VER(engine->i915)) {
1498        case 12:
1499        case 11:
1500                return;
1501        case 9:
1502                wa_bb_fn[0] = gen9_init_indirectctx_bb;
1503                wa_bb_fn[1] = NULL;
1504                break;
1505        case 8:
1506                wa_bb_fn[0] = gen8_init_indirectctx_bb;
1507                wa_bb_fn[1] = NULL;
1508                break;
1509        default:
1510                MISSING_CASE(GRAPHICS_VER(engine->i915));
1511                return;
1512        }
1513
1514        err = lrc_create_wa_ctx(engine);
1515        if (err) {
1516                /*
1517                 * We continue even if we fail to initialize WA batch
1518                 * because we only expect rare glitches but nothing
1519                 * critical to prevent us from using GPU
1520                 */
1521                drm_err(&engine->i915->drm,
1522                        "Ignoring context switch w/a allocation error:%d\n",
1523                        err);
1524                return;
1525        }
1526
1527        if (!engine->wa_ctx.vma)
1528                return;
1529
1530        i915_gem_ww_ctx_init(&ww, true);
1531retry:
1532        err = i915_gem_object_lock(wa_ctx->vma->obj, &ww);
1533        if (!err)
1534                err = i915_ggtt_pin(wa_ctx->vma, &ww, 0, PIN_HIGH);
1535        if (err)
1536                goto err;
1537
1538        batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
1539        if (IS_ERR(batch)) {
1540                err = PTR_ERR(batch);
1541                goto err_unpin;
1542        }
1543
1544        /*
1545         * Emit the two workaround batch buffers, recording the offset from the
1546         * start of the workaround batch buffer object for each and their
1547         * respective sizes.
1548         */
1549        batch_ptr = batch;
1550        for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
1551                wa_bb[i]->offset = batch_ptr - batch;
1552                if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
1553                                                  CACHELINE_BYTES))) {
1554                        err = -EINVAL;
1555                        break;
1556                }
1557                if (wa_bb_fn[i])
1558                        batch_ptr = wa_bb_fn[i](engine, batch_ptr);
1559                wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
1560        }
1561        GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE);
1562
1563        __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
1564        __i915_gem_object_release_map(wa_ctx->vma->obj);
1565
1566        /* Verify that we can handle failure to setup the wa_ctx */
1567        if (!err)
1568                err = i915_inject_probe_error(engine->i915, -ENODEV);
1569
1570err_unpin:
1571        if (err)
1572                i915_vma_unpin(wa_ctx->vma);
1573err:
1574        if (err == -EDEADLK) {
1575                err = i915_gem_ww_ctx_backoff(&ww);
1576                if (!err)
1577                        goto retry;
1578        }
1579        i915_gem_ww_ctx_fini(&ww);
1580
1581        if (err) {
1582                i915_vma_put(engine->wa_ctx.vma);
1583
1584                /* Clear all flags to prevent further use */
1585                memset(wa_ctx, 0, sizeof(*wa_ctx));
1586        }
1587}
1588
1589static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
1590{
1591#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1592        ce->runtime.num_underflow++;
1593        ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
1594#endif
1595}
1596
1597void lrc_update_runtime(struct intel_context *ce)
1598{
1599        u32 old;
1600        s32 dt;
1601
1602        if (intel_context_is_barrier(ce))
1603                return;
1604
1605        old = ce->runtime.last;
1606        ce->runtime.last = lrc_get_runtime(ce);
1607        dt = ce->runtime.last - old;
1608
1609        if (unlikely(dt < 0)) {
1610                CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
1611                         old, ce->runtime.last, dt);
1612                st_update_runtime_underflow(ce, dt);
1613                return;
1614        }
1615
1616        ewma_runtime_add(&ce->runtime.avg, dt);
1617        ce->runtime.total += dt;
1618}
1619
1620#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1621#include "selftest_lrc.c"
1622#endif
1623