drm/i915/gt: Split logical ring contexts from execlist submission
authorChris Wilson <chris@chris-wilson.co.uk>
Sat, 19 Dec 2020 02:03:42 +0000 (02:03 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Mon, 21 Dec 2020 09:53:48 +0000 (09:53 +0000)
Split the definition, construction and updating of the Logical Ring
Context from the execlist submission interface. The LRC is used by the
HW, irrespective of our different submission backends.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201219020343.22681-1-chris@chris-wilson.co.uk
14 files changed:
drivers/gpu/drm/i915/Makefile
drivers/gpu/drm/i915/gt/gen8_engine_cs.c
drivers/gpu/drm/i915/gt/intel_context_sseu.c
drivers/gpu/drm/i915/gt/intel_execlists_submission.c
drivers/gpu/drm/i915/gt/intel_execlists_submission.h
drivers/gpu/drm/i915/gt/intel_lrc.c [new file with mode: 0644]
drivers/gpu/drm/i915/gt/intel_lrc.h [new file with mode: 0644]
drivers/gpu/drm/i915/gt/intel_lrc_reg.h
drivers/gpu/drm/i915/gt/selftest_execlists.c
drivers/gpu/drm/i915/gt/selftest_lrc.c [new file with mode: 0644]
drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
drivers/gpu/drm/i915/gvt/scheduler.c
drivers/gpu/drm/i915/i915_perf.c

index f9ef519..849c7b3 100644 (file)
@@ -104,6 +104,7 @@ gt-y += \
        gt/intel_gt_requests.o \
        gt/intel_gtt.o \
        gt/intel_llc.o \
+       gt/intel_lrc.o \
        gt/intel_mocs.o \
        gt/intel_ppgtt.o \
        gt/intel_rc6.o \
index 9c6f0eb..1972dd5 100644 (file)
@@ -5,7 +5,7 @@
 
 #include "gen8_engine_cs.h"
 #include "i915_drv.h"
-#include "intel_execlists_submission.h" /* XXX */
+#include "intel_lrc.h"
 #include "intel_gpu_commands.h"
 #include "intel_ring.h"
 
index 5f94b44..8dfd8f6 100644 (file)
@@ -8,8 +8,7 @@
 #include "intel_context.h"
 #include "intel_engine_pm.h"
 #include "intel_gpu_commands.h"
-#include "intel_execlists_submission.h"
-#include "intel_lrc_reg.h"
+#include "intel_lrc.h"
 #include "intel_ring.h"
 #include "intel_sseu.h"
 
index dcecc28..358fd24 100644 (file)
 #include <linux/interrupt.h>
 
 #include "i915_drv.h"
-#include "i915_perf.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
 #include "gen8_engine_cs.h"
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
 #include "intel_gt_requests.h"
+#include "intel_lrc.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
 #include "intel_reset.h"
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
         (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
 
-#define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
-
 #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */
 #define GEN12_CTX_SWITCH_DETAIL(csb_dw)        ((csb_dw) & 0xF) /* upper csb dword */
 #define GEN12_CSB_SW_CTX_ID_MASK               GENMASK(25, 15)
@@ -205,136 +203,6 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
        return container_of(engine, struct virtual_engine, base);
 }
 
-static int __execlists_context_alloc(struct intel_context *ce,
-                                    struct intel_engine_cs *engine);
-
-static void execlists_init_reg_state(u32 *reg_state,
-                                    const struct intel_context *ce,
-                                    const struct intel_engine_cs *engine,
-                                    const struct intel_ring *ring,
-                                    bool close);
-static void
-__execlists_update_reg_state(const struct intel_context *ce,
-                            const struct intel_engine_cs *engine,
-                            u32 head);
-
-static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
-{
-       if (INTEL_GEN(engine->i915) >= 12)
-               return 0x60;
-       else if (INTEL_GEN(engine->i915) >= 9)
-               return 0x54;
-       else if (engine->class == RENDER_CLASS)
-               return 0x58;
-       else
-               return -1;
-}
-
-static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
-{
-       if (INTEL_GEN(engine->i915) >= 12)
-               return 0x74;
-       else if (INTEL_GEN(engine->i915) >= 9)
-               return 0x68;
-       else if (engine->class == RENDER_CLASS)
-               return 0xd8;
-       else
-               return -1;
-}
-
-static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
-{
-       if (INTEL_GEN(engine->i915) >= 12)
-               return 0x12;
-       else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS)
-               return 0x18;
-       else
-               return -1;
-}
-
-static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
-{
-       int x;
-
-       x = lrc_ring_wa_bb_per_ctx(engine);
-       if (x < 0)
-               return x;
-
-       return x + 2;
-}
-
-static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
-{
-       int x;
-
-       x = lrc_ring_indirect_ptr(engine);
-       if (x < 0)
-               return x;
-
-       return x + 2;
-}
-
-static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
-{
-       if (engine->class != RENDER_CLASS)
-               return -1;
-
-       if (INTEL_GEN(engine->i915) >= 12)
-               return 0xb6;
-       else if (INTEL_GEN(engine->i915) >= 11)
-               return 0xaa;
-       else
-               return -1;
-}
-
-static u32
-lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
-{
-       switch (INTEL_GEN(engine->i915)) {
-       default:
-               MISSING_CASE(INTEL_GEN(engine->i915));
-               fallthrough;
-       case 12:
-               return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
-       case 11:
-               return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
-       case 10:
-               return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
-       case 9:
-               return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
-       case 8:
-               return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
-       }
-}
-
-static void
-lrc_ring_setup_indirect_ctx(u32 *regs,
-                           const struct intel_engine_cs *engine,
-                           u32 ctx_bb_ggtt_addr,
-                           u32 size)
-{
-       GEM_BUG_ON(!size);
-       GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
-       GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
-       regs[lrc_ring_indirect_ptr(engine) + 1] =
-               ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
-
-       GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
-       regs[lrc_ring_indirect_offset(engine) + 1] =
-               lrc_ring_indirect_offset_default(engine) << 6;
-}
-
-static u32 intel_context_get_runtime(const struct intel_context *ce)
-{
-       /*
-        * We can use either ppHWSP[16] which is recorded before the context
-        * switch (and so excludes the cost of context switches) or use the
-        * value from the context image itself, which is saved/restored earlier
-        * and so includes the cost of the save.
-        */
-       return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
-}
-
 static void mark_eio(struct i915_request *rq)
 {
        if (i915_request_completed(rq))
@@ -513,568 +381,6 @@ assert_priority_queue(const struct i915_request *prev,
        return rq_prio(prev) >= rq_prio(next);
 }
 
-/*
- * The context descriptor encodes various attributes of a context,
- * including its GTT address and some flags. Because it's fairly
- * expensive to calculate, we'll just do it once and cache the result,
- * which remains valid until the context is unpinned.
- *
- * This is what a descriptor looks like, from LSB to MSB::
- *
- *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
- *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
- *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
- *      bits 53-54:    mbz, reserved for use by hardware
- *      bits 55-63:    group ID, currently unused and set to 0
- *
- * Starting from Gen11, the upper dword of the descriptor has a new format:
- *
- *      bits 32-36:    reserved
- *      bits 37-47:    SW context ID
- *      bits 48:53:    engine instance
- *      bit 54:        mbz, reserved for use by hardware
- *      bits 55-60:    SW counter
- *      bits 61-63:    engine class
- *
- * engine info, SW context ID and SW counter need to form a unique number
- * (Context ID) per lrc.
- */
-static u32
-lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
-{
-       u32 desc;
-
-       desc = INTEL_LEGACY_32B_CONTEXT;
-       if (i915_vm_is_4lvl(ce->vm))
-               desc = INTEL_LEGACY_64B_CONTEXT;
-       desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
-
-       desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
-       if (IS_GEN(engine->i915, 8))
-               desc |= GEN8_CTX_L3LLC_COHERENT;
-
-       return i915_ggtt_offset(ce->state) | desc;
-}
-
-static inline unsigned int dword_in_page(void *addr)
-{
-       return offset_in_page(addr) / sizeof(u32);
-}
-
-static void set_offsets(u32 *regs,
-                       const u8 *data,
-                       const struct intel_engine_cs *engine,
-                       bool clear)
-#define NOP(x) (BIT(7) | (x))
-#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
-#define POSTED BIT(0)
-#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
-#define REG16(x) \
-       (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
-       (((x) >> 2) & 0x7f)
-#define END(total_state_size) 0, (total_state_size)
-{
-       const u32 base = engine->mmio_base;
-
-       while (*data) {
-               u8 count, flags;
-
-               if (*data & BIT(7)) { /* skip */
-                       count = *data++ & ~BIT(7);
-                       if (clear)
-                               memset32(regs, MI_NOOP, count);
-                       regs += count;
-                       continue;
-               }
-
-               count = *data & 0x3f;
-               flags = *data >> 6;
-               data++;
-
-               *regs = MI_LOAD_REGISTER_IMM(count);
-               if (flags & POSTED)
-                       *regs |= MI_LRI_FORCE_POSTED;
-               if (INTEL_GEN(engine->i915) >= 11)
-                       *regs |= MI_LRI_LRM_CS_MMIO;
-               regs++;
-
-               GEM_BUG_ON(!count);
-               do {
-                       u32 offset = 0;
-                       u8 v;
-
-                       do {
-                               v = *data++;
-                               offset <<= 7;
-                               offset |= v & ~BIT(7);
-                       } while (v & BIT(7));
-
-                       regs[0] = base + (offset << 2);
-                       if (clear)
-                               regs[1] = 0;
-                       regs += 2;
-               } while (--count);
-       }
-
-       if (clear) {
-               u8 count = *++data;
-
-               /* Clear past the tail for HW access */
-               GEM_BUG_ON(dword_in_page(regs) > count);
-               memset32(regs, MI_NOOP, count - dword_in_page(regs));
-
-               /* Close the batch; used mainly by live_lrc_layout() */
-               *regs = MI_BATCH_BUFFER_END;
-               if (INTEL_GEN(engine->i915) >= 10)
-                       *regs |= BIT(0);
-       }
-}
-
-static const u8 gen8_xcs_offsets[] = {
-       NOP(1),
-       LRI(11, 0),
-       REG16(0x244),
-       REG(0x034),
-       REG(0x030),
-       REG(0x038),
-       REG(0x03c),
-       REG(0x168),
-       REG(0x140),
-       REG(0x110),
-       REG(0x11c),
-       REG(0x114),
-       REG(0x118),
-
-       NOP(9),
-       LRI(9, 0),
-       REG16(0x3a8),
-       REG16(0x28c),
-       REG16(0x288),
-       REG16(0x284),
-       REG16(0x280),
-       REG16(0x27c),
-       REG16(0x278),
-       REG16(0x274),
-       REG16(0x270),
-
-       NOP(13),
-       LRI(2, 0),
-       REG16(0x200),
-       REG(0x028),
-
-       END(80)
-};
-
-static const u8 gen9_xcs_offsets[] = {
-       NOP(1),
-       LRI(14, POSTED),
-       REG16(0x244),
-       REG(0x034),
-       REG(0x030),
-       REG(0x038),
-       REG(0x03c),
-       REG(0x168),
-       REG(0x140),
-       REG(0x110),
-       REG(0x11c),
-       REG(0x114),
-       REG(0x118),
-       REG(0x1c0),
-       REG(0x1c4),
-       REG(0x1c8),
-
-       NOP(3),
-       LRI(9, POSTED),
-       REG16(0x3a8),
-       REG16(0x28c),
-       REG16(0x288),
-       REG16(0x284),
-       REG16(0x280),
-       REG16(0x27c),
-       REG16(0x278),
-       REG16(0x274),
-       REG16(0x270),
-
-       NOP(13),
-       LRI(1, POSTED),
-       REG16(0x200),
-
-       NOP(13),
-       LRI(44, POSTED),
-       REG(0x028),
-       REG(0x09c),
-       REG(0x0c0),
-       REG(0x178),
-       REG(0x17c),
-       REG16(0x358),
-       REG(0x170),
-       REG(0x150),
-       REG(0x154),
-       REG(0x158),
-       REG16(0x41c),
-       REG16(0x600),
-       REG16(0x604),
-       REG16(0x608),
-       REG16(0x60c),
-       REG16(0x610),
-       REG16(0x614),
-       REG16(0x618),
-       REG16(0x61c),
-       REG16(0x620),
-       REG16(0x624),
-       REG16(0x628),
-       REG16(0x62c),
-       REG16(0x630),
-       REG16(0x634),
-       REG16(0x638),
-       REG16(0x63c),
-       REG16(0x640),
-       REG16(0x644),
-       REG16(0x648),
-       REG16(0x64c),
-       REG16(0x650),
-       REG16(0x654),
-       REG16(0x658),
-       REG16(0x65c),
-       REG16(0x660),
-       REG16(0x664),
-       REG16(0x668),
-       REG16(0x66c),
-       REG16(0x670),
-       REG16(0x674),
-       REG16(0x678),
-       REG16(0x67c),
-       REG(0x068),
-
-       END(176)
-};
-
-static const u8 gen12_xcs_offsets[] = {
-       NOP(1),
-       LRI(13, POSTED),
-       REG16(0x244),
-       REG(0x034),
-       REG(0x030),
-       REG(0x038),
-       REG(0x03c),
-       REG(0x168),
-       REG(0x140),
-       REG(0x110),
-       REG(0x1c0),
-       REG(0x1c4),
-       REG(0x1c8),
-       REG(0x180),
-       REG16(0x2b4),
-
-       NOP(5),
-       LRI(9, POSTED),
-       REG16(0x3a8),
-       REG16(0x28c),
-       REG16(0x288),
-       REG16(0x284),
-       REG16(0x280),
-       REG16(0x27c),
-       REG16(0x278),
-       REG16(0x274),
-       REG16(0x270),
-
-       END(80)
-};
-
-static const u8 gen8_rcs_offsets[] = {
-       NOP(1),
-       LRI(14, POSTED),
-       REG16(0x244),
-       REG(0x034),
-       REG(0x030),
-       REG(0x038),
-       REG(0x03c),
-       REG(0x168),
-       REG(0x140),
-       REG(0x110),
-       REG(0x11c),
-       REG(0x114),
-       REG(0x118),
-       REG(0x1c0),
-       REG(0x1c4),
-       REG(0x1c8),
-
-       NOP(3),
-       LRI(9, POSTED),
-       REG16(0x3a8),
-       REG16(0x28c),
-       REG16(0x288),
-       REG16(0x284),
-       REG16(0x280),
-       REG16(0x27c),
-       REG16(0x278),
-       REG16(0x274),
-       REG16(0x270),
-
-       NOP(13),
-       LRI(1, 0),
-       REG(0x0c8),
-
-       END(80)
-};
-
-static const u8 gen9_rcs_offsets[] = {
-       NOP(1),
-       LRI(14, POSTED),
-       REG16(0x244),
-       REG(0x34),
-       REG(0x30),
-       REG(0x38),
-       REG(0x3c),
-       REG(0x168),
-       REG(0x140),
-       REG(0x110),
-       REG(0x11c),
-       REG(0x114),
-       REG(0x118),
-       REG(0x1c0),
-       REG(0x1c4),
-       REG(0x1c8),
-
-       NOP(3),
-       LRI(9, POSTED),
-       REG16(0x3a8),
-       REG16(0x28c),
-       REG16(0x288),
-       REG16(0x284),
-       REG16(0x280),
-       REG16(0x27c),
-       REG16(0x278),
-       REG16(0x274),
-       REG16(0x270),
-
-       NOP(13),
-       LRI(1, 0),
-       REG(0xc8),
-
-       NOP(13),
-       LRI(44, POSTED),
-       REG(0x28),
-       REG(0x9c),
-       REG(0xc0),
-       REG(0x178),
-       REG(0x17c),
-       REG16(0x358),
-       REG(0x170),
-       REG(0x150),
-       REG(0x154),
-       REG(0x158),
-       REG16(0x41c),
-       REG16(0x600),
-       REG16(0x604),
-       REG16(0x608),
-       REG16(0x60c),
-       REG16(0x610),
-       REG16(0x614),
-       REG16(0x618),
-       REG16(0x61c),
-       REG16(0x620),
-       REG16(0x624),
-       REG16(0x628),
-       REG16(0x62c),
-       REG16(0x630),
-       REG16(0x634),
-       REG16(0x638),
-       REG16(0x63c),
-       REG16(0x640),
-       REG16(0x644),
-       REG16(0x648),
-       REG16(0x64c),
-       REG16(0x650),
-       REG16(0x654),
-       REG16(0x658),
-       REG16(0x65c),
-       REG16(0x660),
-       REG16(0x664),
-       REG16(0x668),
-       REG16(0x66c),
-       REG16(0x670),
-       REG16(0x674),
-       REG16(0x678),
-       REG16(0x67c),
-       REG(0x68),
-
-       END(176)
-};
-
-static const u8 gen11_rcs_offsets[] = {
-       NOP(1),
-       LRI(15, POSTED),
-       REG16(0x244),
-       REG(0x034),
-       REG(0x030),
-       REG(0x038),
-       REG(0x03c),
-       REG(0x168),
-       REG(0x140),
-       REG(0x110),
-       REG(0x11c),
-       REG(0x114),
-       REG(0x118),
-       REG(0x1c0),
-       REG(0x1c4),
-       REG(0x1c8),
-       REG(0x180),
-
-       NOP(1),
-       LRI(9, POSTED),
-       REG16(0x3a8),
-       REG16(0x28c),
-       REG16(0x288),
-       REG16(0x284),
-       REG16(0x280),
-       REG16(0x27c),
-       REG16(0x278),
-       REG16(0x274),
-       REG16(0x270),
-
-       LRI(1, POSTED),
-       REG(0x1b0),
-
-       NOP(10),
-       LRI(1, 0),
-       REG(0x0c8),
-
-       END(80)
-};
-
-static const u8 gen12_rcs_offsets[] = {
-       NOP(1),
-       LRI(13, POSTED),
-       REG16(0x244),
-       REG(0x034),
-       REG(0x030),
-       REG(0x038),
-       REG(0x03c),
-       REG(0x168),
-       REG(0x140),
-       REG(0x110),
-       REG(0x1c0),
-       REG(0x1c4),
-       REG(0x1c8),
-       REG(0x180),
-       REG16(0x2b4),
-
-       NOP(5),
-       LRI(9, POSTED),
-       REG16(0x3a8),
-       REG16(0x28c),
-       REG16(0x288),
-       REG16(0x284),
-       REG16(0x280),
-       REG16(0x27c),
-       REG16(0x278),
-       REG16(0x274),
-       REG16(0x270),
-
-       LRI(3, POSTED),
-       REG(0x1b0),
-       REG16(0x5a8),
-       REG16(0x5ac),
-
-       NOP(6),
-       LRI(1, 0),
-       REG(0x0c8),
-       NOP(3 + 9 + 1),
-
-       LRI(51, POSTED),
-       REG16(0x588),
-       REG16(0x588),
-       REG16(0x588),
-       REG16(0x588),
-       REG16(0x588),
-       REG16(0x588),
-       REG(0x028),
-       REG(0x09c),
-       REG(0x0c0),
-       REG(0x178),
-       REG(0x17c),
-       REG16(0x358),
-       REG(0x170),
-       REG(0x150),
-       REG(0x154),
-       REG(0x158),
-       REG16(0x41c),
-       REG16(0x600),
-       REG16(0x604),
-       REG16(0x608),
-       REG16(0x60c),
-       REG16(0x610),
-       REG16(0x614),
-       REG16(0x618),
-       REG16(0x61c),
-       REG16(0x620),
-       REG16(0x624),
-       REG16(0x628),
-       REG16(0x62c),
-       REG16(0x630),
-       REG16(0x634),
-       REG16(0x638),
-       REG16(0x63c),
-       REG16(0x640),
-       REG16(0x644),
-       REG16(0x648),
-       REG16(0x64c),
-       REG16(0x650),
-       REG16(0x654),
-       REG16(0x658),
-       REG16(0x65c),
-       REG16(0x660),
-       REG16(0x664),
-       REG16(0x668),
-       REG16(0x66c),
-       REG16(0x670),
-       REG16(0x674),
-       REG16(0x678),
-       REG16(0x67c),
-       REG(0x068),
-       REG(0x084),
-       NOP(1),
-
-       END(192)
-};
-
-#undef END
-#undef REG16
-#undef REG
-#undef LRI
-#undef NOP
-
-static const u8 *reg_offsets(const struct intel_engine_cs *engine)
-{
-       /*
-        * The gen12+ lists only have the registers we program in the basic
-        * default state. We rely on the context image using relative
-        * addressing to automatic fixup the register state between the
-        * physical engines for virtual engine.
-        */
-       GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
-                  !intel_engine_has_relative_mmio(engine));
-
-       if (engine->class == RENDER_CLASS) {
-               if (INTEL_GEN(engine->i915) >= 12)
-                       return gen12_rcs_offsets;
-               else if (INTEL_GEN(engine->i915) >= 11)
-                       return gen11_rcs_offsets;
-               else if (INTEL_GEN(engine->i915) >= 9)
-                       return gen9_rcs_offsets;
-               else
-                       return gen8_rcs_offsets;
-       } else {
-               if (INTEL_GEN(engine->i915) >= 12)
-                       return gen12_xcs_offsets;
-               else if (INTEL_GEN(engine->i915) >= 9)
-                       return gen9_xcs_offsets;
-               else
-                       return gen8_xcs_offsets;
-       }
-}
-
 static struct i915_request *
 __unwind_incomplete_requests(struct intel_engine_cs *engine)
 {
@@ -1187,58 +493,6 @@ static void intel_engine_context_out(struct intel_engine_cs *engine)
        write_sequnlock_irqrestore(&engine->stats.lock, flags);
 }
 
-static void
-execlists_check_context(const struct intel_context *ce,
-                       const struct intel_engine_cs *engine,
-                       const char *when)
-{
-       const struct intel_ring *ring = ce->ring;
-       u32 *regs = ce->lrc_reg_state;
-       bool valid = true;
-       int x;
-
-       if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
-               pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
-                      engine->name,
-                      regs[CTX_RING_START],
-                      i915_ggtt_offset(ring->vma));
-               regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
-               valid = false;
-       }
-
-       if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
-           (RING_CTL_SIZE(ring->size) | RING_VALID)) {
-               pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
-                      engine->name,
-                      regs[CTX_RING_CTL],
-                      (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
-               regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
-               valid = false;
-       }
-
-       x = lrc_ring_mi_mode(engine);
-       if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
-               pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
-                      engine->name, regs[x + 1]);
-               regs[x + 1] &= ~STOP_RING;
-               regs[x + 1] |= STOP_RING << 16;
-               valid = false;
-       }
-
-       WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
-}
-
-static void restore_default_state(struct intel_context *ce,
-                                 struct intel_engine_cs *engine)
-{
-       u32 *regs;
-
-       regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE);
-       execlists_init_reg_state(regs, ce, engine, ce->ring, true);
-
-       ce->runtime.last = intel_context_get_runtime(ce);
-}
-
 static void reset_active(struct i915_request *rq,
                         struct intel_engine_cs *engine)
 {
@@ -1271,42 +525,10 @@ static void reset_active(struct i915_request *rq,
        head = intel_ring_wrap(ce->ring, head);
 
        /* Scrub the context image to prevent replaying the previous batch */
-       restore_default_state(ce, engine);
-       __execlists_update_reg_state(ce, engine, head);
+       lrc_init_regs(ce, engine, true);
 
        /* We've switched away, so this should be a no-op, but intent matters */
-       ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
-}
-
-static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
-{
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-       ce->runtime.num_underflow++;
-       ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
-#endif
-}
-
-static void intel_context_update_runtime(struct intel_context *ce)
-{
-       u32 old;
-       s32 dt;
-
-       if (intel_context_is_barrier(ce))
-               return;
-
-       old = ce->runtime.last;
-       ce->runtime.last = intel_context_get_runtime(ce);
-       dt = ce->runtime.last - old;
-
-       if (unlikely(dt < 0)) {
-               CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
-                        old, ce->runtime.last, dt);
-               st_update_runtime_underflow(ce, dt);
-               return;
-       }
-
-       ewma_runtime_add(&ce->runtime.avg, dt);
-       ce->runtime.total += dt;
+       ce->lrc.lrca = lrc_update_regs(ce, engine, head);
 }
 
 static inline struct intel_engine_cs *
@@ -1321,7 +543,7 @@ __execlists_schedule_in(struct i915_request *rq)
                reset_active(rq, engine);
 
        if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
-               execlists_check_context(ce, engine, "before");
+               lrc_check_regs(ce, engine, "before");
 
        if (ce->tag) {
                /* Use a fixed tag for OA and friends */
@@ -1393,7 +615,7 @@ __execlists_schedule_out(struct i915_request *rq,
         */
 
        if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
-               execlists_check_context(ce, engine, "after");
+               lrc_check_regs(ce, engine, "after");
 
        /*
         * If we have just completed this context, the engine may now be
@@ -1411,7 +633,7 @@ __execlists_schedule_out(struct i915_request *rq,
                set_bit(ccid - 1, &engine->context_tag);
        }
 
-       intel_context_update_runtime(ce);
+       lrc_update_runtime(ce);
        intel_engine_context_out(engine);
        execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
        if (engine->fw_domain && !atomic_dec_return(&engine->fw_active))
@@ -1752,12 +974,6 @@ static bool can_merge_rq(const struct i915_request *prev,
        return true;
 }
 
-static void virtual_update_register_offsets(u32 *regs,
-                                           struct intel_engine_cs *engine)
-{
-       set_offsets(regs, reg_offsets(engine), engine, false);
-}
-
 static bool virtual_matches(const struct virtual_engine *ve,
                            const struct i915_request *rq,
                            const struct intel_engine_cs *engine)
@@ -1793,8 +1009,7 @@ static void virtual_xfer_context(struct virtual_engine *ve,
 
        GEM_BUG_ON(READ_ONCE(ve->context.inflight));
        if (!intel_engine_has_relative_mmio(engine))
-               virtual_update_register_offsets(ve->context.lrc_reg_state,
-                                               engine);
+               lrc_update_offsets(&ve->context, engine);
 
        /*
         * Move the bound engine to the top of the list for
@@ -3287,248 +2502,55 @@ static void execlists_submit_request(struct i915_request *request)
        spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
-static void __execlists_context_fini(struct intel_context *ce)
-{
-       intel_ring_put(ce->ring);
-       i915_vma_put(ce->state);
-}
-
-static void execlists_context_destroy(struct kref *kref)
+static int execlists_context_pre_pin(struct intel_context *ce,
+                                    struct i915_gem_ww_ctx *ww,
+                                    void **vaddr)
 {
-       struct intel_context *ce = container_of(kref, typeof(*ce), ref);
-
-       GEM_BUG_ON(!i915_active_is_idle(&ce->active));
-       GEM_BUG_ON(intel_context_is_pinned(ce));
-
-       if (ce->state)
-               __execlists_context_fini(ce);
-
-       intel_context_fini(ce);
-       intel_context_free(ce);
-}
-
-static void
-set_redzone(void *vaddr, const struct intel_engine_cs *engine)
-{
-       if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
-               return;
-
-       vaddr += engine->context_size;
-
-       memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
-}
-
-static void
-check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
-{
-       if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
-               return;
-
-       vaddr += engine->context_size;
-
-       if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
-               drm_err_once(&engine->i915->drm,
-                            "%s context redzone overwritten!\n",
-                            engine->name);
-}
-
-static void execlists_context_unpin(struct intel_context *ce)
-{
-       check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
-                     ce->engine);
-}
-
-static void execlists_context_post_unpin(struct intel_context *ce)
-{
-       i915_gem_object_unpin_map(ce->state->obj);
-}
-
-static u32 *
-gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
-{
-       *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
-               MI_SRM_LRM_GLOBAL_GTT |
-               MI_LRI_LRM_CS_MMIO;
-       *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
-       *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
-               CTX_TIMESTAMP * sizeof(u32);
-       *cs++ = 0;
-
-       *cs++ = MI_LOAD_REGISTER_REG |
-               MI_LRR_SOURCE_CS_MMIO |
-               MI_LRI_LRM_CS_MMIO;
-       *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
-       *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
-
-       *cs++ = MI_LOAD_REGISTER_REG |
-               MI_LRR_SOURCE_CS_MMIO |
-               MI_LRI_LRM_CS_MMIO;
-       *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
-       *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
-
-       return cs;
+       return lrc_pre_pin(ce, ce->engine, ww, vaddr);
 }
 
-static u32 *
-gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
-{
-       GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
-
-       *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
-               MI_SRM_LRM_GLOBAL_GTT |
-               MI_LRI_LRM_CS_MMIO;
-       *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
-       *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
-               (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
-       *cs++ = 0;
-
-       return cs;
-}
-
-static u32 *
-gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
-{
-       GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
-
-       *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
-               MI_SRM_LRM_GLOBAL_GTT |
-               MI_LRI_LRM_CS_MMIO;
-       *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
-       *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
-               (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
-       *cs++ = 0;
-
-       *cs++ = MI_LOAD_REGISTER_REG |
-               MI_LRR_SOURCE_CS_MMIO |
-               MI_LRI_LRM_CS_MMIO;
-       *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
-       *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
-
-       return cs;
-}
-
-static u32 *
-gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
-{
-       cs = gen12_emit_timestamp_wa(ce, cs);
-       cs = gen12_emit_cmd_buf_wa(ce, cs);
-       cs = gen12_emit_restore_scratch(ce, cs);
-
-       return cs;
-}
-
-static u32 *
-gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
-{
-       cs = gen12_emit_timestamp_wa(ce, cs);
-       cs = gen12_emit_restore_scratch(ce, cs);
-
-       return cs;
-}
-
-static inline u32 context_wa_bb_offset(const struct intel_context *ce)
-{
-       return PAGE_SIZE * ce->wa_bb_page;
-}
-
-static u32 *context_indirect_bb(const struct intel_context *ce)
-{
-       void *ptr;
-
-       GEM_BUG_ON(!ce->wa_bb_page);
-
-       ptr = ce->lrc_reg_state;
-       ptr -= LRC_STATE_OFFSET; /* back to start of context image */
-       ptr += context_wa_bb_offset(ce);
-
-       return ptr;
-}
-
-static void
-setup_indirect_ctx_bb(const struct intel_context *ce,
-                     const struct intel_engine_cs *engine,
-                     u32 *(*emit)(const struct intel_context *, u32 *))
+static int execlists_context_pin(struct intel_context *ce, void *vaddr)
 {
-       u32 * const start = context_indirect_bb(ce);
-       u32 *cs;
-
-       cs = emit(ce, start);
-       GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
-       while ((unsigned long)cs % CACHELINE_BYTES)
-               *cs++ = MI_NOOP;
-
-       lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine,
-                                   i915_ggtt_offset(ce->state) +
-                                   context_wa_bb_offset(ce),
-                                   (cs - start) * sizeof(*cs));
+       return lrc_pin(ce, ce->engine, vaddr);
 }
 
-static void
-__execlists_update_reg_state(const struct intel_context *ce,
-                            const struct intel_engine_cs *engine,
-                            u32 head)
+static int __lrc_setup(struct intel_context *ce,
+                      struct intel_engine_cs *engine)
 {
-       struct intel_ring *ring = ce->ring;
-       u32 *regs = ce->lrc_reg_state;
-
-       GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
-       GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
-
-       regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
-       regs[CTX_RING_HEAD] = head;
-       regs[CTX_RING_TAIL] = ring->tail;
-       regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
-
-       /* RPCS */
-       if (engine->class == RENDER_CLASS) {
-               regs[CTX_R_PWR_CLK_STATE] =
-                       intel_sseu_make_rpcs(engine->gt, &ce->sseu);
+       struct drm_i915_gem_object *obj = ce->state->obj;
+       void *vaddr;
 
-               i915_oa_init_reg_state(ce, engine);
+       vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+       if (IS_ERR(vaddr)) {
+               drm_dbg(&engine->i915->drm, "Could not map object pages!\n");
+               return PTR_ERR(vaddr);
        }
 
-       if (ce->wa_bb_page) {
-               u32 *(*fn)(const struct intel_context *ce, u32 *cs);
-
-               fn = gen12_emit_indirect_ctx_xcs;
-               if (ce->engine->class == RENDER_CLASS)
-                       fn = gen12_emit_indirect_ctx_rcs;
+       lrc_init_state(ce, engine, vaddr);
 
-               /* Mutually exclusive wrt to global indirect bb */
-               GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
-               setup_indirect_ctx_bb(ce, engine, fn);
-       }
+       __i915_gem_object_flush_map(obj, 0, engine->context_size);
+       i915_gem_object_unpin_map(obj);
+       return 0;
 }
 
-static int
-execlists_context_pre_pin(struct intel_context *ce,
-                         struct i915_gem_ww_ctx *ww, void **vaddr)
+static int __execlists_context_alloc(struct intel_context *ce,
+                                    struct intel_engine_cs *engine)
 {
-       GEM_BUG_ON(!ce->state);
-       GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
-
-       *vaddr = i915_gem_object_pin_map(ce->state->obj,
-                                       i915_coherent_map_type(ce->engine->i915) |
-                                       I915_MAP_OVERRIDE);
+       int err;
 
-       return PTR_ERR_OR_ZERO(*vaddr);
-}
+       err = lrc_alloc(ce, engine);
+       if (err)
+               return err;
 
-static int
-__execlists_context_pin(struct intel_context *ce,
-                       struct intel_engine_cs *engine,
-                       void *vaddr)
-{
-       ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
-       ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
-       __execlists_update_reg_state(ce, engine, ce->ring->tail);
+       err = __lrc_setup(ce, engine);
+       if (err)
+               goto err_lrc;
 
        return 0;
-}
 
-static int execlists_context_pin(struct intel_context *ce, void *vaddr)
-{
-       return __execlists_context_pin(ce, ce->engine, vaddr);
+err_lrc:
+       lrc_fini(ce);
+       return err;
 }
 
 static int execlists_context_alloc(struct intel_context *ce)
@@ -3536,34 +2558,19 @@ static int execlists_context_alloc(struct intel_context *ce)
        return __execlists_context_alloc(ce, ce->engine);
 }
 
-static void execlists_context_reset(struct intel_context *ce)
-{
-       CE_TRACE(ce, "reset\n");
-       GEM_BUG_ON(!intel_context_is_pinned(ce));
-
-       intel_ring_reset(ce->ring, ce->ring->emit);
-
-       /* Scrub away the garbage */
-       execlists_init_reg_state(ce->lrc_reg_state,
-                                ce, ce->engine, ce->ring, true);
-       __execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
-
-       ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
-}
-
 static const struct intel_context_ops execlists_context_ops = {
        .alloc = execlists_context_alloc,
 
        .pre_pin = execlists_context_pre_pin,
        .pin = execlists_context_pin,
-       .unpin = execlists_context_unpin,
-       .post_unpin = execlists_context_post_unpin,
+       .unpin = lrc_unpin,
+       .post_unpin = lrc_post_unpin,
 
        .enter = intel_context_enter_engine,
        .exit = intel_context_exit_engine,
 
-       .reset = execlists_context_reset,
-       .destroy = execlists_context_destroy,
+       .reset = lrc_reset,
+       .destroy = lrc_destroy,
 };
 
 static int emit_pdps(struct i915_request *rq)
@@ -3650,330 +2657,6 @@ static int execlists_request_alloc(struct i915_request *request)
        return 0;
 }
 
-/*
- * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
- * PIPE_CONTROL instruction. This is required for the flush to happen correctly
- * but there is a slight complication as this is applied in WA batch where the
- * values are only initialized once so we cannot take register value at the
- * beginning and reuse it further; hence we save its value to memory, upload a
- * constant value with bit21 set and then we restore it back with the saved value.
- * To simplify the WA, a constant value is formed by using the default value
- * of this register. This shouldn't be a problem because we are only modifying
- * it for a short period and this batch in non-premptible. We can ofcourse
- * use additional instructions that read the actual value of the register
- * at that time and set our bit of interest but it makes the WA complicated.
- *
- * This WA is also required for Gen9 so extracting as a function avoids
- * code duplication.
- */
-static u32 *
-gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
-{
-       /* NB no one else is allowed to scribble over scratch + 256! */
-       *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
-       *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-       *batch++ = intel_gt_scratch_offset(engine->gt,
-                                          INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
-       *batch++ = 0;
-
-       *batch++ = MI_LOAD_REGISTER_IMM(1);
-       *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-       *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
-
-       batch = gen8_emit_pipe_control(batch,
-                                      PIPE_CONTROL_CS_STALL |
-                                      PIPE_CONTROL_DC_FLUSH_ENABLE,
-                                      0);
-
-       *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
-       *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-       *batch++ = intel_gt_scratch_offset(engine->gt,
-                                          INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
-       *batch++ = 0;
-
-       return batch;
-}
-
-/*
- * Typically we only have one indirect_ctx and per_ctx batch buffer which are
- * initialized at the beginning and shared across all contexts but this field
- * helps us to have multiple batches at different offsets and select them based
- * on a criteria. At the moment this batch always start at the beginning of the page
- * and at this point we don't have multiple wa_ctx batch buffers.
- *
- * The number of WA applied are not known at the beginning; we use this field
- * to return the no of DWORDS written.
- *
- * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
- * so it adds NOOPs as padding to make it cacheline aligned.
- * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
- * makes a complete batch buffer.
- */
-static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
-{
-       /* WaDisableCtxRestoreArbitration:bdw,chv */
-       *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-
-       /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
-       if (IS_BROADWELL(engine->i915))
-               batch = gen8_emit_flush_coherentl3_wa(engine, batch);
-
-       /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
-       /* Actual scratch location is at 128 bytes offset */
-       batch = gen8_emit_pipe_control(batch,
-                                      PIPE_CONTROL_FLUSH_L3 |
-                                      PIPE_CONTROL_STORE_DATA_INDEX |
-                                      PIPE_CONTROL_CS_STALL |
-                                      PIPE_CONTROL_QW_WRITE,
-                                      LRC_PPHWSP_SCRATCH_ADDR);
-
-       *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-
-       /* Pad to end of cacheline */
-       while ((unsigned long)batch % CACHELINE_BYTES)
-               *batch++ = MI_NOOP;
-
-       /*
-        * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
-        * execution depends on the length specified in terms of cache lines
-        * in the register CTX_RCS_INDIRECT_CTX
-        */
-
-       return batch;
-}
-
-struct lri {
-       i915_reg_t reg;
-       u32 value;
-};
-
-static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
-{
-       GEM_BUG_ON(!count || count > 63);
-
-       *batch++ = MI_LOAD_REGISTER_IMM(count);
-       do {
-               *batch++ = i915_mmio_reg_offset(lri->reg);
-               *batch++ = lri->value;
-       } while (lri++, --count);
-       *batch++ = MI_NOOP;
-
-       return batch;
-}
-
-static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
-{
-       static const struct lri lri[] = {
-               /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
-               {
-                       COMMON_SLICE_CHICKEN2,
-                       __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
-                                      0),
-               },
-
-               /* BSpec: 11391 */
-               {
-                       FF_SLICE_CHICKEN,
-                       __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
-                                      FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
-               },
-
-               /* BSpec: 11299 */
-               {
-                       _3D_CHICKEN3,
-                       __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
-                                      _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
-               }
-       };
-
-       *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-
-       /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
-       batch = gen8_emit_flush_coherentl3_wa(engine, batch);
-
-       /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
-       batch = gen8_emit_pipe_control(batch,
-                                      PIPE_CONTROL_FLUSH_L3 |
-                                      PIPE_CONTROL_STORE_DATA_INDEX |
-                                      PIPE_CONTROL_CS_STALL |
-                                      PIPE_CONTROL_QW_WRITE,
-                                      LRC_PPHWSP_SCRATCH_ADDR);
-
-       batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
-
-       /* WaMediaPoolStateCmdInWABB:bxt,glk */
-       if (HAS_POOLED_EU(engine->i915)) {
-               /*
-                * EU pool configuration is setup along with golden context
-                * during context initialization. This value depends on
-                * device type (2x6 or 3x6) and needs to be updated based
-                * on which subslice is disabled especially for 2x6
-                * devices, however it is safe to load default
-                * configuration of 3x6 device instead of masking off
-                * corresponding bits because HW ignores bits of a disabled
-                * subslice and drops down to appropriate config. Please
-                * see render_state_setup() in i915_gem_render_state.c for
-                * possible configurations, to avoid duplication they are
-                * not shown here again.
-                */
-               *batch++ = GEN9_MEDIA_POOL_STATE;
-               *batch++ = GEN9_MEDIA_POOL_ENABLE;
-               *batch++ = 0x00777000;
-               *batch++ = 0;
-               *batch++ = 0;
-               *batch++ = 0;
-       }
-
-       *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-
-       /* Pad to end of cacheline */
-       while ((unsigned long)batch % CACHELINE_BYTES)
-               *batch++ = MI_NOOP;
-
-       return batch;
-}
-
-static u32 *
-gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
-{
-       int i;
-
-       /*
-        * WaPipeControlBefore3DStateSamplePattern: cnl
-        *
-        * Ensure the engine is idle prior to programming a
-        * 3DSTATE_SAMPLE_PATTERN during a context restore.
-        */
-       batch = gen8_emit_pipe_control(batch,
-                                      PIPE_CONTROL_CS_STALL,
-                                      0);
-       /*
-        * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
-        * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
-        * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
-        * confusing. Since gen8_emit_pipe_control() already advances the
-        * batch by 6 dwords, we advance the other 10 here, completing a
-        * cacheline. It's not clear if the workaround requires this padding
-        * before other commands, or if it's just the regular padding we would
-        * already have for the workaround bb, so leave it here for now.
-        */
-       for (i = 0; i < 10; i++)
-               *batch++ = MI_NOOP;
-
-       /* Pad to end of cacheline */
-       while ((unsigned long)batch % CACHELINE_BYTES)
-               *batch++ = MI_NOOP;
-
-       return batch;
-}
-
-#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
-
-static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
-{
-       struct drm_i915_gem_object *obj;
-       struct i915_vma *vma;
-       int err;
-
-       obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
-       if (IS_ERR(obj))
-               return PTR_ERR(obj);
-
-       vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
-       if (IS_ERR(vma)) {
-               err = PTR_ERR(vma);
-               goto err;
-       }
-
-       err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
-       if (err)
-               goto err;
-
-       engine->wa_ctx.vma = vma;
-       return 0;
-
-err:
-       i915_gem_object_put(obj);
-       return err;
-}
-
-static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
-{
-       i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
-}
-
-typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
-
-static int intel_init_workaround_bb(struct intel_engine_cs *engine)
-{
-       struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
-       struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
-                                           &wa_ctx->per_ctx };
-       wa_bb_func_t wa_bb_fn[2];
-       void *batch, *batch_ptr;
-       unsigned int i;
-       int ret;
-
-       if (engine->class != RENDER_CLASS)
-               return 0;
-
-       switch (INTEL_GEN(engine->i915)) {
-       case 12:
-       case 11:
-               return 0;
-       case 10:
-               wa_bb_fn[0] = gen10_init_indirectctx_bb;
-               wa_bb_fn[1] = NULL;
-               break;
-       case 9:
-               wa_bb_fn[0] = gen9_init_indirectctx_bb;
-               wa_bb_fn[1] = NULL;
-               break;
-       case 8:
-               wa_bb_fn[0] = gen8_init_indirectctx_bb;
-               wa_bb_fn[1] = NULL;
-               break;
-       default:
-               MISSING_CASE(INTEL_GEN(engine->i915));
-               return 0;
-       }
-
-       ret = lrc_setup_wa_ctx(engine);
-       if (ret) {
-               drm_dbg(&engine->i915->drm,
-                       "Failed to setup context WA page: %d\n", ret);
-               return ret;
-       }
-
-       batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
-
-       /*
-        * Emit the two workaround batch buffers, recording the offset from the
-        * start of the workaround batch buffer object for each and their
-        * respective sizes.
-        */
-       batch_ptr = batch;
-       for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
-               wa_bb[i]->offset = batch_ptr - batch;
-               if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
-                                                 CACHELINE_BYTES))) {
-                       ret = -EINVAL;
-                       break;
-               }
-               if (wa_bb_fn[i])
-                       batch_ptr = wa_bb_fn[i](engine, batch_ptr);
-               wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
-       }
-       GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
-
-       __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
-       __i915_gem_object_release_map(wa_ctx->vma->obj);
-       if (ret)
-               lrc_destroy_wa_ctx(engine);
-
-       return ret;
-}
-
 static void reset_csb_pointers(struct intel_engine_cs *engine)
 {
        struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -4185,25 +2868,6 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
        engine->execlists.reset_ccid = active_ccid(engine);
 }
 
-static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
-{
-       int x;
-
-       x = lrc_ring_mi_mode(engine);
-       if (x != -1) {
-               regs[x + 1] &= ~STOP_RING;
-               regs[x + 1] |= STOP_RING << 16;
-       }
-}
-
-static void __execlists_reset_reg_state(const struct intel_context *ce,
-                                       const struct intel_engine_cs *engine)
-{
-       u32 *regs = ce->lrc_reg_state;
-
-       __reset_stop_ring(regs, engine);
-}
-
 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 {
        struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -4287,9 +2951,8 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 out_replay:
        ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
                     head, ce->ring->tail);
-       __execlists_reset_reg_state(ce, engine);
-       __execlists_update_reg_state(ce, engine, head);
-       ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
+       lrc_reset_regs(ce, engine);
+       ce->lrc.lrca = lrc_update_regs(ce, engine, head);
 
 unwind:
        /* Push back any incomplete requests for replay after the reset. */
@@ -4487,7 +3150,7 @@ static void execlists_release(struct intel_engine_cs *engine)
        execlists_shutdown(engine);
 
        intel_engine_cleanup_common(engine);
-       lrc_destroy_wa_ctx(engine);
+       lrc_fini_wa_ctx(engine);
 }
 
 static void
@@ -4581,7 +3244,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
        if (engine->class == RENDER_CLASS)
                rcs_submission_override(engine);
 
-       if (intel_init_workaround_bb(engine))
+       if (lrc_init_wa_ctx(engine))
                /*
                 * We continue even if we fail to initialize WA batch
                 * because we only expect rare glitches but nothing
@@ -4622,218 +3285,6 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
        return 0;
 }
 
-static void init_common_reg_state(u32 * const regs,
-                                 const struct intel_engine_cs *engine,
-                                 const struct intel_ring *ring,
-                                 bool inhibit)
-{
-       u32 ctl;
-
-       ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
-       ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
-       if (inhibit)
-               ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
-       if (INTEL_GEN(engine->i915) < 11)
-               ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
-                                          CTX_CTRL_RS_CTX_ENABLE);
-       regs[CTX_CONTEXT_CONTROL] = ctl;
-
-       regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
-       regs[CTX_TIMESTAMP] = 0;
-}
-
-static void init_wa_bb_reg_state(u32 * const regs,
-                                const struct intel_engine_cs *engine)
-{
-       const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
-
-       if (wa_ctx->per_ctx.size) {
-               const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
-
-               GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
-               regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
-                       (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
-       }
-
-       if (wa_ctx->indirect_ctx.size) {
-               lrc_ring_setup_indirect_ctx(regs, engine,
-                                           i915_ggtt_offset(wa_ctx->vma) +
-                                           wa_ctx->indirect_ctx.offset,
-                                           wa_ctx->indirect_ctx.size);
-       }
-}
-
-static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
-{
-       if (i915_vm_is_4lvl(&ppgtt->vm)) {
-               /* 64b PPGTT (48bit canonical)
-                * PDP0_DESCRIPTOR contains the base address to PML4 and
-                * other PDP Descriptors are ignored.
-                */
-               ASSIGN_CTX_PML4(ppgtt, regs);
-       } else {
-               ASSIGN_CTX_PDP(ppgtt, regs, 3);
-               ASSIGN_CTX_PDP(ppgtt, regs, 2);
-               ASSIGN_CTX_PDP(ppgtt, regs, 1);
-               ASSIGN_CTX_PDP(ppgtt, regs, 0);
-       }
-}
-
-static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
-{
-       if (i915_is_ggtt(vm))
-               return i915_vm_to_ggtt(vm)->alias;
-       else
-               return i915_vm_to_ppgtt(vm);
-}
-
-static void execlists_init_reg_state(u32 *regs,
-                                    const struct intel_context *ce,
-                                    const struct intel_engine_cs *engine,
-                                    const struct intel_ring *ring,
-                                    bool inhibit)
-{
-       /*
-        * A context is actually a big batch buffer with several
-        * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
-        * values we are setting here are only for the first context restore:
-        * on a subsequent save, the GPU will recreate this batchbuffer with new
-        * values (including all the missing MI_LOAD_REGISTER_IMM commands that
-        * we are not initializing here).
-        *
-        * Must keep consistent with virtual_update_register_offsets().
-        */
-       set_offsets(regs, reg_offsets(engine), engine, inhibit);
-
-       init_common_reg_state(regs, engine, ring, inhibit);
-       init_ppgtt_reg_state(regs, vm_alias(ce->vm));
-
-       init_wa_bb_reg_state(regs, engine);
-
-       __reset_stop_ring(regs, engine);
-}
-
-static int
-populate_lr_context(struct intel_context *ce,
-                   struct drm_i915_gem_object *ctx_obj,
-                   struct intel_engine_cs *engine,
-                   struct intel_ring *ring)
-{
-       bool inhibit = true;
-       void *vaddr;
-
-       vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
-       if (IS_ERR(vaddr)) {
-               drm_dbg(&engine->i915->drm, "Could not map object pages!\n");
-               return PTR_ERR(vaddr);
-       }
-
-       set_redzone(vaddr, engine);
-
-       if (engine->default_state) {
-               shmem_read(engine->default_state, 0,
-                          vaddr, engine->context_size);
-               __set_bit(CONTEXT_VALID_BIT, &ce->flags);
-               inhibit = false;
-       }
-
-       /* Clear the ppHWSP (inc. per-context counters) */
-       memset(vaddr, 0, PAGE_SIZE);
-
-       /*
-        * The second page of the context object contains some registers which
-        * must be set up prior to the first execution.
-        */
-       execlists_init_reg_state(vaddr + LRC_STATE_OFFSET,
-                                ce, engine, ring, inhibit);
-
-       __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
-       i915_gem_object_unpin_map(ctx_obj);
-       return 0;
-}
-
-static struct intel_timeline *pinned_timeline(struct intel_context *ce)
-{
-       struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
-
-       return intel_timeline_create_from_engine(ce->engine,
-                                                page_unmask_bits(tl));
-}
-
-static int __execlists_context_alloc(struct intel_context *ce,
-                                    struct intel_engine_cs *engine)
-{
-       struct drm_i915_gem_object *ctx_obj;
-       struct intel_ring *ring;
-       struct i915_vma *vma;
-       u32 context_size;
-       int ret;
-
-       GEM_BUG_ON(ce->state);
-       context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
-
-       if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
-               context_size += I915_GTT_PAGE_SIZE; /* for redzone */
-
-       if (INTEL_GEN(engine->i915) == 12) {
-               ce->wa_bb_page = context_size / PAGE_SIZE;
-               context_size += PAGE_SIZE;
-       }
-
-       ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
-       if (IS_ERR(ctx_obj))
-               return PTR_ERR(ctx_obj);
-
-       vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
-       if (IS_ERR(vma)) {
-               ret = PTR_ERR(vma);
-               goto error_deref_obj;
-       }
-
-       if (!page_mask_bits(ce->timeline)) {
-               struct intel_timeline *tl;
-
-               /*
-                * Use the static global HWSP for the kernel context, and
-                * a dynamically allocated cacheline for everyone else.
-                */
-               if (unlikely(ce->timeline))
-                       tl = pinned_timeline(ce);
-               else
-                       tl = intel_timeline_create(engine->gt);
-               if (IS_ERR(tl)) {
-                       ret = PTR_ERR(tl);
-                       goto error_deref_obj;
-               }
-
-               ce->timeline = tl;
-       }
-
-       ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
-       if (IS_ERR(ring)) {
-               ret = PTR_ERR(ring);
-               goto error_deref_obj;
-       }
-
-       ret = populate_lr_context(ce, ctx_obj, engine, ring);
-       if (ret) {
-               drm_dbg(&engine->i915->drm,
-                       "Failed to populate LRC: %d\n", ret);
-               goto error_ring_free;
-       }
-
-       ce->ring = ring;
-       ce->state = vma;
-
-       return 0;
-
-error_ring_free:
-       intel_ring_put(ring);
-error_deref_obj:
-       i915_gem_object_put(ctx_obj);
-       return ret;
-}
-
 static struct list_head *virtual_queue(struct virtual_engine *ve)
 {
        return &ve->base.execlists.default_priolist.requests[0];
@@ -4891,8 +3342,7 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk)
        GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
        GEM_BUG_ON(!list_empty(virtual_queue(ve)));
 
-       if (ve->context.state)
-               __execlists_context_fini(&ve->context);
+       lrc_fini(&ve->context);
        intel_context_fini(&ve->context);
 
        intel_breadcrumbs_free(ve->base.breadcrumbs);
@@ -4952,12 +3402,21 @@ static int virtual_context_alloc(struct intel_context *ce)
        return __execlists_context_alloc(ce, ve->siblings[0]);
 }
 
-static int virtual_context_pin(struct intel_context *ce, void *vaddr)
+static int virtual_context_pre_pin(struct intel_context *ce,
+                                    struct i915_gem_ww_ctx *ww,
+                                    void **vaddr)
 {
        struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
 
        /* Note: we must use a real engine class for setting up reg state */
-       return __execlists_context_pin(ce, ve->siblings[0], vaddr);
+       return lrc_pre_pin(ce, ve->siblings[0], ww, vaddr);
+}
+
+static int virtual_context_pin(struct intel_context *ce, void *vaddr)
+{
+       struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+
+       return lrc_pin(ce, ve->siblings[0], vaddr);
 }
 
 static void virtual_context_enter(struct intel_context *ce)
@@ -4985,10 +3444,10 @@ static void virtual_context_exit(struct intel_context *ce)
 static const struct intel_context_ops virtual_context_ops = {
        .alloc = virtual_context_alloc,
 
-       .pre_pin = execlists_context_pre_pin,
+       .pre_pin = virtual_context_pre_pin,
        .pin = virtual_context_pin,
-       .unpin = execlists_context_unpin,
-       .post_unpin = execlists_context_post_unpin,
+       .unpin = lrc_unpin,
+       .post_unpin = lrc_post_unpin,
 
        .enter = virtual_context_enter,
        .exit = virtual_context_exit,
@@ -5470,28 +3929,6 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
        spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
-void intel_lr_context_reset(struct intel_engine_cs *engine,
-                           struct intel_context *ce,
-                           u32 head,
-                           bool scrub)
-{
-       GEM_BUG_ON(!intel_context_is_pinned(ce));
-
-       /*
-        * We want a simple context + ring to execute the breadcrumb update.
-        * We cannot rely on the context being intact across the GPU hang,
-        * so clear it and rebuild just what we need for the breadcrumb.
-        * All pending requests for this context will be zapped, and any
-        * future request will be after userspace has had the opportunity
-        * to recreate its own state.
-        */
-       if (scrub)
-               restore_default_state(ce, engine);
-
-       /* Rerun the request; its payload has been neutered (if guilty). */
-       __execlists_update_reg_state(ce, engine, head);
-}
-
 bool
 intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
 {
index 2c9d735..0c675bb 100644 (file)
@@ -22,25 +22,8 @@ enum {
 
 int intel_execlists_submission_setup(struct intel_engine_cs *engine);
 
-/* Logical Ring Contexts */
-/* At the start of the context image is its per-process HWS page */
-#define LRC_PPHWSP_PN  (0)
-#define LRC_PPHWSP_SZ  (1)
-/* After the PPHWSP we have the logical state for the context */
-#define LRC_STATE_PN   (LRC_PPHWSP_PN + LRC_PPHWSP_SZ)
-#define LRC_STATE_OFFSET (LRC_STATE_PN * PAGE_SIZE)
-
-/* Space within PPHWSP reserved to be used as scratch */
-#define LRC_PPHWSP_SCRATCH             0x34
-#define LRC_PPHWSP_SCRATCH_ADDR                (LRC_PPHWSP_SCRATCH * sizeof(u32))
-
 void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
 
-void intel_lr_context_reset(struct intel_engine_cs *engine,
-                           struct intel_context *ce,
-                           u32 head,
-                           bool scrub);
-
 void intel_execlists_show_requests(struct intel_engine_cs *engine,
                                   struct drm_printer *m,
                                   void (*show_request)(struct drm_printer *m,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
new file mode 100644 (file)
index 0000000..35f4352
--- /dev/null
@@ -0,0 +1,1561 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2014 Intel Corporation
+ */
+
+#include "gen8_engine_cs.h"
+#include "i915_drv.h"
+#include "i915_perf.h"
+#include "intel_engine.h"
+#include "intel_gpu_commands.h"
+#include "intel_gt.h"
+#include "intel_lrc.h"
+#include "intel_lrc_reg.h"
+#include "intel_ring.h"
+#include "shmem_utils.h"
+
+static inline unsigned int dword_in_page(void *addr)
+{
+       return offset_in_page(addr) / sizeof(u32);
+}
+
+static void set_offsets(u32 *regs,
+                       const u8 *data,
+                       const struct intel_engine_cs *engine,
+                       bool close)
+#define NOP(x) (BIT(7) | (x))
+#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
+#define POSTED BIT(0)
+#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
+#define REG16(x) \
+       (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
+       (((x) >> 2) & 0x7f)
+#define END 0
+{
+       const u32 base = engine->mmio_base;
+
+       while (*data) {
+               u8 count, flags;
+
+               if (*data & BIT(7)) { /* skip */
+                       count = *data++ & ~BIT(7);
+                       regs += count;
+                       continue;
+               }
+
+               count = *data & 0x3f;
+               flags = *data >> 6;
+               data++;
+
+               *regs = MI_LOAD_REGISTER_IMM(count);
+               if (flags & POSTED)
+                       *regs |= MI_LRI_FORCE_POSTED;
+               if (INTEL_GEN(engine->i915) >= 11)
+                       *regs |= MI_LRI_LRM_CS_MMIO;
+               regs++;
+
+               GEM_BUG_ON(!count);
+               do {
+                       u32 offset = 0;
+                       u8 v;
+
+                       do {
+                               v = *data++;
+                               offset <<= 7;
+                               offset |= v & ~BIT(7);
+                       } while (v & BIT(7));
+
+                       regs[0] = base + (offset << 2);
+                       regs += 2;
+               } while (--count);
+       }
+
+       if (close) {
+               /* Close the batch; used mainly by live_lrc_layout() */
+               *regs = MI_BATCH_BUFFER_END;
+               if (INTEL_GEN(engine->i915) >= 10)
+                       *regs |= BIT(0);
+       }
+}
+
+static const u8 gen8_xcs_offsets[] = {
+       NOP(1),
+       LRI(11, 0),
+       REG16(0x244),
+       REG(0x034),
+       REG(0x030),
+       REG(0x038),
+       REG(0x03c),
+       REG(0x168),
+       REG(0x140),
+       REG(0x110),
+       REG(0x11c),
+       REG(0x114),
+       REG(0x118),
+
+       NOP(9),
+       LRI(9, 0),
+       REG16(0x3a8),
+       REG16(0x28c),
+       REG16(0x288),
+       REG16(0x284),
+       REG16(0x280),
+       REG16(0x27c),
+       REG16(0x278),
+       REG16(0x274),
+       REG16(0x270),
+
+       NOP(13),
+       LRI(2, 0),
+       REG16(0x200),
+       REG(0x028),
+
+       END
+};
+
+static const u8 gen9_xcs_offsets[] = {
+       NOP(1),
+       LRI(14, POSTED),
+       REG16(0x244),
+       REG(0x034),
+       REG(0x030),
+       REG(0x038),
+       REG(0x03c),
+       REG(0x168),
+       REG(0x140),
+       REG(0x110),
+       REG(0x11c),
+       REG(0x114),
+       REG(0x118),
+       REG(0x1c0),
+       REG(0x1c4),
+       REG(0x1c8),
+
+       NOP(3),
+       LRI(9, POSTED),
+       REG16(0x3a8),
+       REG16(0x28c),
+       REG16(0x288),
+       REG16(0x284),
+       REG16(0x280),
+       REG16(0x27c),
+       REG16(0x278),
+       REG16(0x274),
+       REG16(0x270),
+
+       NOP(13),
+       LRI(1, POSTED),
+       REG16(0x200),
+
+       NOP(13),
+       LRI(44, POSTED),
+       REG(0x028),
+       REG(0x09c),
+       REG(0x0c0),
+       REG(0x178),
+       REG(0x17c),
+       REG16(0x358),
+       REG(0x170),
+       REG(0x150),
+       REG(0x154),
+       REG(0x158),
+       REG16(0x41c),
+       REG16(0x600),
+       REG16(0x604),
+       REG16(0x608),
+       REG16(0x60c),
+       REG16(0x610),
+       REG16(0x614),
+       REG16(0x618),
+       REG16(0x61c),
+       REG16(0x620),
+       REG16(0x624),
+       REG16(0x628),
+       REG16(0x62c),
+       REG16(0x630),
+       REG16(0x634),
+       REG16(0x638),
+       REG16(0x63c),
+       REG16(0x640),
+       REG16(0x644),
+       REG16(0x648),
+       REG16(0x64c),
+       REG16(0x650),
+       REG16(0x654),
+       REG16(0x658),
+       REG16(0x65c),
+       REG16(0x660),
+       REG16(0x664),
+       REG16(0x668),
+       REG16(0x66c),
+       REG16(0x670),
+       REG16(0x674),
+       REG16(0x678),
+       REG16(0x67c),
+       REG(0x068),
+
+       END
+};
+
+static const u8 gen12_xcs_offsets[] = {
+       NOP(1),
+       LRI(13, POSTED),
+       REG16(0x244),
+       REG(0x034),
+       REG(0x030),
+       REG(0x038),
+       REG(0x03c),
+       REG(0x168),
+       REG(0x140),
+       REG(0x110),
+       REG(0x1c0),
+       REG(0x1c4),
+       REG(0x1c8),
+       REG(0x180),
+       REG16(0x2b4),
+
+       NOP(5),
+       LRI(9, POSTED),
+       REG16(0x3a8),
+       REG16(0x28c),
+       REG16(0x288),
+       REG16(0x284),
+       REG16(0x280),
+       REG16(0x27c),
+       REG16(0x278),
+       REG16(0x274),
+       REG16(0x270),
+
+       END
+};
+
+static const u8 gen8_rcs_offsets[] = {
+       NOP(1),
+       LRI(14, POSTED),
+       REG16(0x244),
+       REG(0x034),
+       REG(0x030),
+       REG(0x038),
+       REG(0x03c),
+       REG(0x168),
+       REG(0x140),
+       REG(0x110),
+       REG(0x11c),
+       REG(0x114),
+       REG(0x118),
+       REG(0x1c0),
+       REG(0x1c4),
+       REG(0x1c8),
+
+       NOP(3),
+       LRI(9, POSTED),
+       REG16(0x3a8),
+       REG16(0x28c),
+       REG16(0x288),
+       REG16(0x284),
+       REG16(0x280),
+       REG16(0x27c),
+       REG16(0x278),
+       REG16(0x274),
+       REG16(0x270),
+
+       NOP(13),
+       LRI(1, 0),
+       REG(0x0c8),
+
+       END
+};
+
+static const u8 gen9_rcs_offsets[] = {
+       NOP(1),
+       LRI(14, POSTED),
+       REG16(0x244),
+       REG(0x34),
+       REG(0x30),
+       REG(0x38),
+       REG(0x3c),
+       REG(0x168),
+       REG(0x140),
+       REG(0x110),
+       REG(0x11c),
+       REG(0x114),
+       REG(0x118),
+       REG(0x1c0),
+       REG(0x1c4),
+       REG(0x1c8),
+
+       NOP(3),
+       LRI(9, POSTED),
+       REG16(0x3a8),
+       REG16(0x28c),
+       REG16(0x288),
+       REG16(0x284),
+       REG16(0x280),
+       REG16(0x27c),
+       REG16(0x278),
+       REG16(0x274),
+       REG16(0x270),
+
+       NOP(13),
+       LRI(1, 0),
+       REG(0xc8),
+
+       NOP(13),
+       LRI(44, POSTED),
+       REG(0x28),
+       REG(0x9c),
+       REG(0xc0),
+       REG(0x178),
+       REG(0x17c),
+       REG16(0x358),
+       REG(0x170),
+       REG(0x150),
+       REG(0x154),
+       REG(0x158),
+       REG16(0x41c),
+       REG16(0x600),
+       REG16(0x604),
+       REG16(0x608),
+       REG16(0x60c),
+       REG16(0x610),
+       REG16(0x614),
+       REG16(0x618),
+       REG16(0x61c),
+       REG16(0x620),
+       REG16(0x624),
+       REG16(0x628),
+       REG16(0x62c),
+       REG16(0x630),
+       REG16(0x634),
+       REG16(0x638),
+       REG16(0x63c),
+       REG16(0x640),
+       REG16(0x644),
+       REG16(0x648),
+       REG16(0x64c),
+       REG16(0x650),
+       REG16(0x654),
+       REG16(0x658),
+       REG16(0x65c),
+       REG16(0x660),
+       REG16(0x664),
+       REG16(0x668),
+       REG16(0x66c),
+       REG16(0x670),
+       REG16(0x674),
+       REG16(0x678),
+       REG16(0x67c),
+       REG(0x68),
+
+       END
+};
+
+static const u8 gen11_rcs_offsets[] = {
+       NOP(1),
+       LRI(15, POSTED),
+       REG16(0x244),
+       REG(0x034),
+       REG(0x030),
+       REG(0x038),
+       REG(0x03c),
+       REG(0x168),
+       REG(0x140),
+       REG(0x110),
+       REG(0x11c),
+       REG(0x114),
+       REG(0x118),
+       REG(0x1c0),
+       REG(0x1c4),
+       REG(0x1c8),
+       REG(0x180),
+
+       NOP(1),
+       LRI(9, POSTED),
+       REG16(0x3a8),
+       REG16(0x28c),
+       REG16(0x288),
+       REG16(0x284),
+       REG16(0x280),
+       REG16(0x27c),
+       REG16(0x278),
+       REG16(0x274),
+       REG16(0x270),
+
+       LRI(1, POSTED),
+       REG(0x1b0),
+
+       NOP(10),
+       LRI(1, 0),
+       REG(0x0c8),
+
+       END
+};
+
+static const u8 gen12_rcs_offsets[] = {
+       NOP(1),
+       LRI(13, POSTED),
+       REG16(0x244),
+       REG(0x034),
+       REG(0x030),
+       REG(0x038),
+       REG(0x03c),
+       REG(0x168),
+       REG(0x140),
+       REG(0x110),
+       REG(0x1c0),
+       REG(0x1c4),
+       REG(0x1c8),
+       REG(0x180),
+       REG16(0x2b4),
+
+       NOP(5),
+       LRI(9, POSTED),
+       REG16(0x3a8),
+       REG16(0x28c),
+       REG16(0x288),
+       REG16(0x284),
+       REG16(0x280),
+       REG16(0x27c),
+       REG16(0x278),
+       REG16(0x274),
+       REG16(0x270),
+
+       LRI(3, POSTED),
+       REG(0x1b0),
+       REG16(0x5a8),
+       REG16(0x5ac),
+
+       NOP(6),
+       LRI(1, 0),
+       REG(0x0c8),
+       NOP(3 + 9 + 1),
+
+       LRI(51, POSTED),
+       REG16(0x588),
+       REG16(0x588),
+       REG16(0x588),
+       REG16(0x588),
+       REG16(0x588),
+       REG16(0x588),
+       REG(0x028),
+       REG(0x09c),
+       REG(0x0c0),
+       REG(0x178),
+       REG(0x17c),
+       REG16(0x358),
+       REG(0x170),
+       REG(0x150),
+       REG(0x154),
+       REG(0x158),
+       REG16(0x41c),
+       REG16(0x600),
+       REG16(0x604),
+       REG16(0x608),
+       REG16(0x60c),
+       REG16(0x610),
+       REG16(0x614),
+       REG16(0x618),
+       REG16(0x61c),
+       REG16(0x620),
+       REG16(0x624),
+       REG16(0x628),
+       REG16(0x62c),
+       REG16(0x630),
+       REG16(0x634),
+       REG16(0x638),
+       REG16(0x63c),
+       REG16(0x640),
+       REG16(0x644),
+       REG16(0x648),
+       REG16(0x64c),
+       REG16(0x650),
+       REG16(0x654),
+       REG16(0x658),
+       REG16(0x65c),
+       REG16(0x660),
+       REG16(0x664),
+       REG16(0x668),
+       REG16(0x66c),
+       REG16(0x670),
+       REG16(0x674),
+       REG16(0x678),
+       REG16(0x67c),
+       REG(0x068),
+       REG(0x084),
+       NOP(1),
+
+       END
+};
+
+#undef END
+#undef REG16
+#undef REG
+#undef LRI
+#undef NOP
+
+static const u8 *reg_offsets(const struct intel_engine_cs *engine)
+{
+       /*
+        * The gen12+ lists only have the registers we program in the basic
+        * default state. We rely on the context image using relative
+        * addressing to automatic fixup the register state between the
+        * physical engines for virtual engine.
+        */
+       GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
+                  !intel_engine_has_relative_mmio(engine));
+
+       if (engine->class == RENDER_CLASS) {
+               if (INTEL_GEN(engine->i915) >= 12)
+                       return gen12_rcs_offsets;
+               else if (INTEL_GEN(engine->i915) >= 11)
+                       return gen11_rcs_offsets;
+               else if (INTEL_GEN(engine->i915) >= 9)
+                       return gen9_rcs_offsets;
+               else
+                       return gen8_rcs_offsets;
+       } else {
+               if (INTEL_GEN(engine->i915) >= 12)
+                       return gen12_xcs_offsets;
+               else if (INTEL_GEN(engine->i915) >= 9)
+                       return gen9_xcs_offsets;
+               else
+                       return gen8_xcs_offsets;
+       }
+}
+
+static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
+{
+       if (INTEL_GEN(engine->i915) >= 12)
+               return 0x60;
+       else if (INTEL_GEN(engine->i915) >= 9)
+               return 0x54;
+       else if (engine->class == RENDER_CLASS)
+               return 0x58;
+       else
+               return -1;
+}
+
+static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
+{
+       if (INTEL_GEN(engine->i915) >= 12)
+               return 0x74;
+       else if (INTEL_GEN(engine->i915) >= 9)
+               return 0x68;
+       else if (engine->class == RENDER_CLASS)
+               return 0xd8;
+       else
+               return -1;
+}
+
+static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
+{
+       if (INTEL_GEN(engine->i915) >= 12)
+               return 0x12;
+       else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS)
+               return 0x18;
+       else
+               return -1;
+}
+
+static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
+{
+       int x;
+
+       x = lrc_ring_wa_bb_per_ctx(engine);
+       if (x < 0)
+               return x;
+
+       return x + 2;
+}
+
+static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
+{
+       int x;
+
+       x = lrc_ring_indirect_ptr(engine);
+       if (x < 0)
+               return x;
+
+       return x + 2;
+}
+
+static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
+{
+       if (engine->class != RENDER_CLASS)
+               return -1;
+
+       if (INTEL_GEN(engine->i915) >= 12)
+               return 0xb6;
+       else if (INTEL_GEN(engine->i915) >= 11)
+               return 0xaa;
+       else
+               return -1;
+}
+
+static u32
+lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
+{
+       switch (INTEL_GEN(engine->i915)) {
+       default:
+               MISSING_CASE(INTEL_GEN(engine->i915));
+               fallthrough;
+       case 12:
+               return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+       case 11:
+               return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+       case 10:
+               return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+       case 9:
+               return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+       case 8:
+               return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+       }
+}
+
+static void
+lrc_setup_indirect_ctx(u32 *regs,
+                      const struct intel_engine_cs *engine,
+                      u32 ctx_bb_ggtt_addr,
+                      u32 size)
+{
+       GEM_BUG_ON(!size);
+       GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
+       GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
+       regs[lrc_ring_indirect_ptr(engine) + 1] =
+               ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
+
+       GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
+       regs[lrc_ring_indirect_offset(engine) + 1] =
+               lrc_ring_indirect_offset_default(engine) << 6;
+}
+
+static void init_common_regs(u32 * const regs,
+                            const struct intel_context *ce,
+                            const struct intel_engine_cs *engine,
+                            bool inhibit)
+{
+       u32 ctl;
+
+       ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
+       ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+       if (inhibit)
+               ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
+       if (INTEL_GEN(engine->i915) < 11)
+               ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
+                                          CTX_CTRL_RS_CTX_ENABLE);
+       regs[CTX_CONTEXT_CONTROL] = ctl;
+
+       regs[CTX_TIMESTAMP] = ce->runtime.last;
+}
+
+static void init_wa_bb_regs(u32 * const regs,
+                           const struct intel_engine_cs *engine)
+{
+       const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
+
+       if (wa_ctx->per_ctx.size) {
+               const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
+
+               GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
+               regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
+                       (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
+       }
+
+       if (wa_ctx->indirect_ctx.size) {
+               lrc_setup_indirect_ctx(regs, engine,
+                                      i915_ggtt_offset(wa_ctx->vma) +
+                                      wa_ctx->indirect_ctx.offset,
+                                      wa_ctx->indirect_ctx.size);
+       }
+}
+
+static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt)
+{
+       if (i915_vm_is_4lvl(&ppgtt->vm)) {
+               /* 64b PPGTT (48bit canonical)
+                * PDP0_DESCRIPTOR contains the base address to PML4 and
+                * other PDP Descriptors are ignored.
+                */
+               ASSIGN_CTX_PML4(ppgtt, regs);
+       } else {
+               ASSIGN_CTX_PDP(ppgtt, regs, 3);
+               ASSIGN_CTX_PDP(ppgtt, regs, 2);
+               ASSIGN_CTX_PDP(ppgtt, regs, 1);
+               ASSIGN_CTX_PDP(ppgtt, regs, 0);
+       }
+}
+
+static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
+{
+       if (i915_is_ggtt(vm))
+               return i915_vm_to_ggtt(vm)->alias;
+       else
+               return i915_vm_to_ppgtt(vm);
+}
+
+static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
+{
+       int x;
+
+       x = lrc_ring_mi_mode(engine);
+       if (x != -1) {
+               regs[x + 1] &= ~STOP_RING;
+               regs[x + 1] |= STOP_RING << 16;
+       }
+}
+
+static void __lrc_init_regs(u32 *regs,
+                           const struct intel_context *ce,
+                           const struct intel_engine_cs *engine,
+                           bool inhibit)
+{
+       /*
+        * A context is actually a big batch buffer with several
+        * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
+        * values we are setting here are only for the first context restore:
+        * on a subsequent save, the GPU will recreate this batchbuffer with new
+        * values (including all the missing MI_LOAD_REGISTER_IMM commands that
+        * we are not initializing here).
+        *
+        * Must keep consistent with virtual_update_register_offsets().
+        */
+
+       if (inhibit)
+               memset(regs, 0, PAGE_SIZE);
+
+       set_offsets(regs, reg_offsets(engine), engine, inhibit);
+
+       init_common_regs(regs, ce, engine, inhibit);
+       init_ppgtt_regs(regs, vm_alias(ce->vm));
+
+       init_wa_bb_regs(regs, engine);
+
+       __reset_stop_ring(regs, engine);
+}
+
+void lrc_init_regs(const struct intel_context *ce,
+                  const struct intel_engine_cs *engine,
+                  bool inhibit)
+{
+       __lrc_init_regs(ce->lrc_reg_state, ce, engine, inhibit);
+}
+
+void lrc_reset_regs(const struct intel_context *ce,
+                   const struct intel_engine_cs *engine)
+{
+       __reset_stop_ring(ce->lrc_reg_state, engine);
+}
+
+static void
+set_redzone(void *vaddr, const struct intel_engine_cs *engine)
+{
+       if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+               return;
+
+       vaddr += engine->context_size;
+
+       memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
+}
+
+static void
+check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
+{
+       if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+               return;
+
+       vaddr += engine->context_size;
+
+       if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
+               drm_err_once(&engine->i915->drm,
+                            "%s context redzone overwritten!\n",
+                            engine->name);
+}
+
+void lrc_init_state(struct intel_context *ce,
+                   struct intel_engine_cs *engine,
+                   void *state)
+{
+       bool inhibit = true;
+
+       set_redzone(state, engine);
+
+       if (engine->default_state) {
+               shmem_read(engine->default_state, 0,
+                          state, engine->context_size);
+               __set_bit(CONTEXT_VALID_BIT, &ce->flags);
+               inhibit = false;
+       }
+
+       /* Clear the ppHWSP (inc. per-context counters) */
+       memset(state, 0, PAGE_SIZE);
+
+       /*
+        * The second page of the context object contains some registers which
+        * must be set up prior to the first execution.
+        */
+       __lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit);
+}
+
+static struct i915_vma *
+__lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
+{
+       struct drm_i915_gem_object *obj;
+       struct i915_vma *vma;
+       u32 context_size;
+
+       context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
+
+       if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+               context_size += I915_GTT_PAGE_SIZE; /* for redzone */
+
+       if (INTEL_GEN(engine->i915) == 12) {
+               ce->wa_bb_page = context_size / PAGE_SIZE;
+               context_size += PAGE_SIZE;
+       }
+
+       obj = i915_gem_object_create_shmem(engine->i915, context_size);
+       if (IS_ERR(obj))
+               return ERR_CAST(obj);
+
+       vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
+       if (IS_ERR(vma)) {
+               i915_gem_object_put(obj);
+               return vma;
+       }
+
+       return vma;
+}
+
+static struct intel_timeline *
+pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine)
+{
+       struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
+
+       return intel_timeline_create_from_engine(engine, page_unmask_bits(tl));
+}
+
+int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
+{
+       struct intel_ring *ring;
+       struct i915_vma *vma;
+       int err;
+
+       GEM_BUG_ON(ce->state);
+
+       vma = __lrc_alloc_state(ce, engine);
+       if (IS_ERR(vma))
+               return PTR_ERR(vma);
+
+       ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
+       if (IS_ERR(ring)) {
+               err = PTR_ERR(ring);
+               goto err_vma;
+       }
+
+       if (!page_mask_bits(ce->timeline)) {
+               struct intel_timeline *tl;
+
+               /*
+                * Use the static global HWSP for the kernel context, and
+                * a dynamically allocated cacheline for everyone else.
+                */
+               if (unlikely(ce->timeline))
+                       tl = pinned_timeline(ce, engine);
+               else
+                       tl = intel_timeline_create(engine->gt);
+               if (IS_ERR(tl)) {
+                       err = PTR_ERR(tl);
+                       goto err_ring;
+               }
+
+               ce->timeline = tl;
+       }
+
+       ce->ring = ring;
+       ce->state = vma;
+
+       return 0;
+
+err_ring:
+       intel_ring_put(ring);
+err_vma:
+       i915_vma_put(vma);
+       return err;
+}
+
+void lrc_reset(struct intel_context *ce)
+{
+       CE_TRACE(ce, "reset\n");
+       GEM_BUG_ON(!intel_context_is_pinned(ce));
+
+       intel_ring_reset(ce->ring, ce->ring->emit);
+
+       /* Scrub away the garbage */
+       lrc_init_regs(ce, ce->engine, true);
+       ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail);
+}
+
+int
+lrc_pre_pin(struct intel_context *ce,
+           struct intel_engine_cs *engine,
+           struct i915_gem_ww_ctx *ww,
+           void **vaddr)
+{
+       GEM_BUG_ON(!ce->state);
+       GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
+
+       *vaddr = i915_gem_object_pin_map(ce->state->obj,
+                                        i915_coherent_map_type(ce->engine->i915) |
+                                        I915_MAP_OVERRIDE);
+
+       return PTR_ERR_OR_ZERO(*vaddr);
+}
+
+int
+lrc_pin(struct intel_context *ce,
+       struct intel_engine_cs *engine,
+       void *vaddr)
+{
+       ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
+       ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail);
+       return 0;
+}
+
+void lrc_unpin(struct intel_context *ce)
+{
+       check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
+                     ce->engine);
+}
+
+void lrc_post_unpin(struct intel_context *ce)
+{
+       i915_gem_object_unpin_map(ce->state->obj);
+}
+
+void lrc_fini(struct intel_context *ce)
+{
+       if (!ce->state)
+               return;
+
+       intel_ring_put(fetch_and_zero(&ce->ring));
+       i915_vma_put(fetch_and_zero(&ce->state));
+}
+
+void lrc_destroy(struct kref *kref)
+{
+       struct intel_context *ce = container_of(kref, typeof(*ce), ref);
+
+       GEM_BUG_ON(!i915_active_is_idle(&ce->active));
+       GEM_BUG_ON(intel_context_is_pinned(ce));
+
+       lrc_fini(ce);
+
+       intel_context_fini(ce);
+       intel_context_free(ce);
+}
+
+static u32 *
+gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
+{
+       *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
+               MI_SRM_LRM_GLOBAL_GTT |
+               MI_LRI_LRM_CS_MMIO;
+       *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+       *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
+               CTX_TIMESTAMP * sizeof(u32);
+       *cs++ = 0;
+
+       *cs++ = MI_LOAD_REGISTER_REG |
+               MI_LRR_SOURCE_CS_MMIO |
+               MI_LRI_LRM_CS_MMIO;
+       *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+       *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
+
+       *cs++ = MI_LOAD_REGISTER_REG |
+               MI_LRR_SOURCE_CS_MMIO |
+               MI_LRI_LRM_CS_MMIO;
+       *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+       *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
+
+       return cs;
+}
+
+static u32 *
+gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
+{
+       GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
+
+       *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
+               MI_SRM_LRM_GLOBAL_GTT |
+               MI_LRI_LRM_CS_MMIO;
+       *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+       *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
+               (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
+       *cs++ = 0;
+
+       return cs;
+}
+
+static u32 *
+gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
+{
+       GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
+
+       *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
+               MI_SRM_LRM_GLOBAL_GTT |
+               MI_LRI_LRM_CS_MMIO;
+       *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+       *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
+               (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
+       *cs++ = 0;
+
+       *cs++ = MI_LOAD_REGISTER_REG |
+               MI_LRR_SOURCE_CS_MMIO |
+               MI_LRI_LRM_CS_MMIO;
+       *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+       *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
+
+       return cs;
+}
+
+static u32 *
+gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
+{
+       cs = gen12_emit_timestamp_wa(ce, cs);
+       cs = gen12_emit_cmd_buf_wa(ce, cs);
+       cs = gen12_emit_restore_scratch(ce, cs);
+
+       return cs;
+}
+
+static u32 *
+gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
+{
+       cs = gen12_emit_timestamp_wa(ce, cs);
+       cs = gen12_emit_restore_scratch(ce, cs);
+
+       return cs;
+}
+
+static inline u32 context_wa_bb_offset(const struct intel_context *ce)
+{
+       return PAGE_SIZE * ce->wa_bb_page;
+}
+
+static u32 *context_indirect_bb(const struct intel_context *ce)
+{
+       void *ptr;
+
+       GEM_BUG_ON(!ce->wa_bb_page);
+
+       ptr = ce->lrc_reg_state;
+       ptr -= LRC_STATE_OFFSET; /* back to start of context image */
+       ptr += context_wa_bb_offset(ce);
+
+       return ptr;
+}
+
+static void
+setup_indirect_ctx_bb(const struct intel_context *ce,
+                     const struct intel_engine_cs *engine,
+                     u32 *(*emit)(const struct intel_context *, u32 *))
+{
+       u32 * const start = context_indirect_bb(ce);
+       u32 *cs;
+
+       cs = emit(ce, start);
+       GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
+       while ((unsigned long)cs % CACHELINE_BYTES)
+               *cs++ = MI_NOOP;
+
+       lrc_setup_indirect_ctx(ce->lrc_reg_state, engine,
+                              i915_ggtt_offset(ce->state) +
+                              context_wa_bb_offset(ce),
+                              (cs - start) * sizeof(*cs));
+}
+
+/*
+ * The context descriptor encodes various attributes of a context,
+ * including its GTT address and some flags. Because it's fairly
+ * expensive to calculate, we'll just do it once and cache the result,
+ * which remains valid until the context is unpinned.
+ *
+ * This is what a descriptor looks like, from LSB to MSB::
+ *
+ *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
+ *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
+ *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
+ *      bits 53-54:    mbz, reserved for use by hardware
+ *      bits 55-63:    group ID, currently unused and set to 0
+ *
+ * Starting from Gen11, the upper dword of the descriptor has a new format:
+ *
+ *      bits 32-36:    reserved
+ *      bits 37-47:    SW context ID
+ *      bits 48:53:    engine instance
+ *      bit 54:        mbz, reserved for use by hardware
+ *      bits 55-60:    SW counter
+ *      bits 61-63:    engine class
+ *
+ * engine info, SW context ID and SW counter need to form a unique number
+ * (Context ID) per lrc.
+ */
+static inline u32 lrc_descriptor(const struct intel_context *ce)
+{
+       u32 desc;
+
+       desc = INTEL_LEGACY_32B_CONTEXT;
+       if (i915_vm_is_4lvl(ce->vm))
+               desc = INTEL_LEGACY_64B_CONTEXT;
+       desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
+
+       desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
+       if (IS_GEN(ce->vm->i915, 8))
+               desc |= GEN8_CTX_L3LLC_COHERENT;
+
+       return i915_ggtt_offset(ce->state) | desc;
+}
+
+u32 lrc_update_regs(const struct intel_context *ce,
+                   const struct intel_engine_cs *engine,
+                   u32 head)
+{
+       struct intel_ring *ring = ce->ring;
+       u32 *regs = ce->lrc_reg_state;
+
+       GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
+       GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
+
+       regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
+       regs[CTX_RING_HEAD] = head;
+       regs[CTX_RING_TAIL] = ring->tail;
+       regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+
+       /* RPCS */
+       if (engine->class == RENDER_CLASS) {
+               regs[CTX_R_PWR_CLK_STATE] =
+                       intel_sseu_make_rpcs(engine->gt, &ce->sseu);
+
+               i915_oa_init_reg_state(ce, engine);
+       }
+
+       if (ce->wa_bb_page) {
+               u32 *(*fn)(const struct intel_context *ce, u32 *cs);
+
+               fn = gen12_emit_indirect_ctx_xcs;
+               if (ce->engine->class == RENDER_CLASS)
+                       fn = gen12_emit_indirect_ctx_rcs;
+
+               /* Mutually exclusive wrt to global indirect bb */
+               GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
+               setup_indirect_ctx_bb(ce, engine, fn);
+       }
+
+       return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE;
+}
+
+void lrc_update_offsets(struct intel_context *ce,
+                       struct intel_engine_cs *engine)
+{
+       set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false);
+}
+
+void lrc_check_regs(const struct intel_context *ce,
+                   const struct intel_engine_cs *engine,
+                   const char *when)
+{
+       const struct intel_ring *ring = ce->ring;
+       u32 *regs = ce->lrc_reg_state;
+       bool valid = true;
+       int x;
+
+       if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
+               pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
+                      engine->name,
+                      regs[CTX_RING_START],
+                      i915_ggtt_offset(ring->vma));
+               regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
+               valid = false;
+       }
+
+       if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
+           (RING_CTL_SIZE(ring->size) | RING_VALID)) {
+               pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
+                      engine->name,
+                      regs[CTX_RING_CTL],
+                      (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
+               regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+               valid = false;
+       }
+
+       x = lrc_ring_mi_mode(engine);
+       if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
+               pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
+                      engine->name, regs[x + 1]);
+               regs[x + 1] &= ~STOP_RING;
+               regs[x + 1] |= STOP_RING << 16;
+               valid = false;
+       }
+
+       WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
+}
+
+/*
+ * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
+ * PIPE_CONTROL instruction. This is required for the flush to happen correctly
+ * but there is a slight complication as this is applied in WA batch where the
+ * values are only initialized once so we cannot take register value at the
+ * beginning and reuse it further; hence we save its value to memory, upload a
+ * constant value with bit21 set and then we restore it back with the saved value.
+ * To simplify the WA, a constant value is formed by using the default value
+ * of this register. This shouldn't be a problem because we are only modifying
+ * it for a short period and this batch in non-premptible. We can ofcourse
+ * use additional instructions that read the actual value of the register
+ * at that time and set our bit of interest but it makes the WA complicated.
+ *
+ * This WA is also required for Gen9 so extracting as a function avoids
+ * code duplication.
+ */
+static u32 *
+gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
+{
+       /* NB no one else is allowed to scribble over scratch + 256! */
+       *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
+       *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+       *batch++ = intel_gt_scratch_offset(engine->gt,
+                                          INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
+       *batch++ = 0;
+
+       *batch++ = MI_LOAD_REGISTER_IMM(1);
+       *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+       *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
+
+       batch = gen8_emit_pipe_control(batch,
+                                      PIPE_CONTROL_CS_STALL |
+                                      PIPE_CONTROL_DC_FLUSH_ENABLE,
+                                      0);
+
+       *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
+       *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+       *batch++ = intel_gt_scratch_offset(engine->gt,
+                                          INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
+       *batch++ = 0;
+
+       return batch;
+}
+
+/*
+ * Typically we only have one indirect_ctx and per_ctx batch buffer which are
+ * initialized at the beginning and shared across all contexts but this field
+ * helps us to have multiple batches at different offsets and select them based
+ * on a criteria. At the moment this batch always start at the beginning of the page
+ * and at this point we don't have multiple wa_ctx batch buffers.
+ *
+ * The number of WA applied are not known at the beginning; we use this field
+ * to return the no of DWORDS written.
+ *
+ * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
+ * so it adds NOOPs as padding to make it cacheline aligned.
+ * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
+ * makes a complete batch buffer.
+ */
+static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
+{
+       /* WaDisableCtxRestoreArbitration:bdw,chv */
+       *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+       /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
+       if (IS_BROADWELL(engine->i915))
+               batch = gen8_emit_flush_coherentl3_wa(engine, batch);
+
+       /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
+       /* Actual scratch location is at 128 bytes offset */
+       batch = gen8_emit_pipe_control(batch,
+                                      PIPE_CONTROL_FLUSH_L3 |
+                                      PIPE_CONTROL_STORE_DATA_INDEX |
+                                      PIPE_CONTROL_CS_STALL |
+                                      PIPE_CONTROL_QW_WRITE,
+                                      LRC_PPHWSP_SCRATCH_ADDR);
+
+       *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+       /* Pad to end of cacheline */
+       while ((unsigned long)batch % CACHELINE_BYTES)
+               *batch++ = MI_NOOP;
+
+       /*
+        * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
+        * execution depends on the length specified in terms of cache lines
+        * in the register CTX_RCS_INDIRECT_CTX
+        */
+
+       return batch;
+}
+
+struct lri {
+       i915_reg_t reg;
+       u32 value;
+};
+
+static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
+{
+       GEM_BUG_ON(!count || count > 63);
+
+       *batch++ = MI_LOAD_REGISTER_IMM(count);
+       do {
+               *batch++ = i915_mmio_reg_offset(lri->reg);
+               *batch++ = lri->value;
+       } while (lri++, --count);
+       *batch++ = MI_NOOP;
+
+       return batch;
+}
+
+static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
+{
+       static const struct lri lri[] = {
+               /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
+               {
+                       COMMON_SLICE_CHICKEN2,
+                       __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
+                                      0),
+               },
+
+               /* BSpec: 11391 */
+               {
+                       FF_SLICE_CHICKEN,
+                       __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
+                                      FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
+               },
+
+               /* BSpec: 11299 */
+               {
+                       _3D_CHICKEN3,
+                       __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
+                                      _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
+               }
+       };
+
+       *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+       /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
+       batch = gen8_emit_flush_coherentl3_wa(engine, batch);
+
+       /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
+       batch = gen8_emit_pipe_control(batch,
+                                      PIPE_CONTROL_FLUSH_L3 |
+                                      PIPE_CONTROL_STORE_DATA_INDEX |
+                                      PIPE_CONTROL_CS_STALL |
+                                      PIPE_CONTROL_QW_WRITE,
+                                      LRC_PPHWSP_SCRATCH_ADDR);
+
+       batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
+
+       /* WaMediaPoolStateCmdInWABB:bxt,glk */
+       if (HAS_POOLED_EU(engine->i915)) {
+               /*
+                * EU pool configuration is setup along with golden context
+                * during context initialization. This value depends on
+                * device type (2x6 or 3x6) and needs to be updated based
+                * on which subslice is disabled especially for 2x6
+                * devices, however it is safe to load default
+                * configuration of 3x6 device instead of masking off
+                * corresponding bits because HW ignores bits of a disabled
+                * subslice and drops down to appropriate config. Please
+                * see render_state_setup() in i915_gem_render_state.c for
+                * possible configurations, to avoid duplication they are
+                * not shown here again.
+                */
+               *batch++ = GEN9_MEDIA_POOL_STATE;
+               *batch++ = GEN9_MEDIA_POOL_ENABLE;
+               *batch++ = 0x00777000;
+               *batch++ = 0;
+               *batch++ = 0;
+               *batch++ = 0;
+       }
+
+       *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+       /* Pad to end of cacheline */
+       while ((unsigned long)batch % CACHELINE_BYTES)
+               *batch++ = MI_NOOP;
+
+       return batch;
+}
+
+static u32 *
+gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
+{
+       int i;
+
+       /*
+        * WaPipeControlBefore3DStateSamplePattern: cnl
+        *
+        * Ensure the engine is idle prior to programming a
+        * 3DSTATE_SAMPLE_PATTERN during a context restore.
+        */
+       batch = gen8_emit_pipe_control(batch,
+                                      PIPE_CONTROL_CS_STALL,
+                                      0);
+       /*
+        * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
+        * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
+        * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
+        * confusing. Since gen8_emit_pipe_control() already advances the
+        * batch by 6 dwords, we advance the other 10 here, completing a
+        * cacheline. It's not clear if the workaround requires this padding
+        * before other commands, or if it's just the regular padding we would
+        * already have for the workaround bb, so leave it here for now.
+        */
+       for (i = 0; i < 10; i++)
+               *batch++ = MI_NOOP;
+
+       /* Pad to end of cacheline */
+       while ((unsigned long)batch % CACHELINE_BYTES)
+               *batch++ = MI_NOOP;
+
+       return batch;
+}
+
+#define CTX_WA_BB_SIZE (PAGE_SIZE)
+
+static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
+{
+       struct drm_i915_gem_object *obj;
+       struct i915_vma *vma;
+       int err;
+
+       obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE);
+       if (IS_ERR(obj))
+               return PTR_ERR(obj);
+
+       vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
+       if (IS_ERR(vma)) {
+               err = PTR_ERR(vma);
+               goto err;
+       }
+
+       err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
+       if (err)
+               goto err;
+
+       engine->wa_ctx.vma = vma;
+       return 0;
+
+err:
+       i915_gem_object_put(obj);
+       return err;
+}
+
+void lrc_fini_wa_ctx(struct intel_engine_cs *engine)
+{
+       i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
+}
+
+typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
+
+int lrc_init_wa_ctx(struct intel_engine_cs *engine)
+{
+       struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
+       struct i915_wa_ctx_bb *wa_bb[] = {
+               &wa_ctx->indirect_ctx, &wa_ctx->per_ctx
+       };
+       wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)];
+       void *batch, *batch_ptr;
+       unsigned int i;
+       int ret;
+
+       if (engine->class != RENDER_CLASS)
+               return 0;
+
+       switch (INTEL_GEN(engine->i915)) {
+       case 12:
+       case 11:
+               return 0;
+       case 10:
+               wa_bb_fn[0] = gen10_init_indirectctx_bb;
+               wa_bb_fn[1] = NULL;
+               break;
+       case 9:
+               wa_bb_fn[0] = gen9_init_indirectctx_bb;
+               wa_bb_fn[1] = NULL;
+               break;
+       case 8:
+               wa_bb_fn[0] = gen8_init_indirectctx_bb;
+               wa_bb_fn[1] = NULL;
+               break;
+       default:
+               MISSING_CASE(INTEL_GEN(engine->i915));
+               return 0;
+       }
+
+       ret = lrc_setup_wa_ctx(engine);
+       if (ret) {
+               drm_dbg(&engine->i915->drm,
+                       "Failed to setup context WA page: %d\n", ret);
+               return ret;
+       }
+
+       batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
+
+       /*
+        * Emit the two workaround batch buffers, recording the offset from the
+        * start of the workaround batch buffer object for each and their
+        * respective sizes.
+        */
+       batch_ptr = batch;
+       for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
+               wa_bb[i]->offset = batch_ptr - batch;
+               if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
+                                                 CACHELINE_BYTES))) {
+                       ret = -EINVAL;
+                       break;
+               }
+               if (wa_bb_fn[i])
+                       batch_ptr = wa_bb_fn[i](engine, batch_ptr);
+               wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
+       }
+       GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE);
+
+       __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
+       __i915_gem_object_release_map(wa_ctx->vma->obj);
+       if (ret)
+               lrc_fini_wa_ctx(engine);
+
+       return ret;
+}
+
+static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
+{
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+       ce->runtime.num_underflow++;
+       ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
+#endif
+}
+
+void lrc_update_runtime(struct intel_context *ce)
+{
+       u32 old;
+       s32 dt;
+
+       if (intel_context_is_barrier(ce))
+               return;
+
+       old = ce->runtime.last;
+       ce->runtime.last = lrc_get_runtime(ce);
+       dt = ce->runtime.last - old;
+
+       if (unlikely(dt < 0)) {
+               CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
+                        old, ce->runtime.last, dt);
+               st_update_runtime_underflow(ce, dt);
+               return;
+       }
+
+       ewma_runtime_add(&ce->runtime.avg, dt);
+       ce->runtime.total += dt;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_lrc.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
new file mode 100644 (file)
index 0000000..4e00685
--- /dev/null
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014 Intel Corporation
+ */
+
+#ifndef __INTEL_LRC_H__
+#define __INTEL_LRC_H__
+
+#include <linux/types.h>
+
+#include "intel_context.h"
+#include "intel_lrc_reg.h"
+
+struct drm_i915_gem_object;
+struct intel_engine_cs;
+struct intel_ring;
+
+/* At the start of the context image is its per-process HWS page */
+#define LRC_PPHWSP_PN  (0)
+#define LRC_PPHWSP_SZ  (1)
+/* After the PPHWSP we have the logical state for the context */
+#define LRC_STATE_PN   (LRC_PPHWSP_PN + LRC_PPHWSP_SZ)
+#define LRC_STATE_OFFSET (LRC_STATE_PN * PAGE_SIZE)
+
+/* Space within PPHWSP reserved to be used as scratch */
+#define LRC_PPHWSP_SCRATCH             0x34
+#define LRC_PPHWSP_SCRATCH_ADDR                (LRC_PPHWSP_SCRATCH * sizeof(u32))
+
+int lrc_init_wa_ctx(struct intel_engine_cs *engine);
+void lrc_fini_wa_ctx(struct intel_engine_cs *engine);
+
+int lrc_alloc(struct intel_context *ce,
+             struct intel_engine_cs *engine);
+void lrc_reset(struct intel_context *ce);
+void lrc_fini(struct intel_context *ce);
+void lrc_destroy(struct kref *kref);
+
+int
+lrc_pre_pin(struct intel_context *ce,
+           struct intel_engine_cs *engine,
+           struct i915_gem_ww_ctx *ww,
+           void **vaddr);
+int
+lrc_pin(struct intel_context *ce,
+       struct intel_engine_cs *engine,
+       void *vaddr);
+void lrc_unpin(struct intel_context *ce);
+void lrc_post_unpin(struct intel_context *ce);
+
+void lrc_init_state(struct intel_context *ce,
+                   struct intel_engine_cs *engine,
+                   void *state);
+
+void lrc_init_regs(const struct intel_context *ce,
+                  const struct intel_engine_cs *engine,
+                  bool clear);
+void lrc_reset_regs(const struct intel_context *ce,
+                   const struct intel_engine_cs *engine);
+
+u32 lrc_update_regs(const struct intel_context *ce,
+                   const struct intel_engine_cs *engine,
+                   u32 head);
+void lrc_update_offsets(struct intel_context *ce,
+                       struct intel_engine_cs *engine);
+
+void lrc_check_regs(const struct intel_context *ce,
+                   const struct intel_engine_cs *engine,
+                   const char *when);
+
+void lrc_update_runtime(struct intel_context *ce);
+static inline u32 lrc_get_runtime(const struct intel_context *ce)
+{
+       /*
+        * We can use either ppHWSP[16] which is recorded before the context
+        * switch (and so excludes the cost of context switches) or use the
+        * value from the context image itself, which is saved/restored earlier
+        * and so includes the cost of the save.
+        */
+       return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
+}
+
+#endif /* __INTEL_LRC_H__ */
index b2e03ce..65fe767 100644 (file)
@@ -9,6 +9,8 @@
 
 #include <linux/types.h>
 
+#define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
+
 /* GEN8 to GEN12 Reg State Context */
 #define CTX_CONTEXT_CONTROL            (0x02 + 1)
 #define CTX_RING_HEAD                  (0x04 + 1)
index 95d41c0..34c2bb8 100644 (file)
@@ -249,7 +249,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio)
                }
                GEM_BUG_ON(!ce[1]->ring->size);
                intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
-               __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
+               lrc_update_regs(ce[1], engine, ce[1]->ring->head);
 
                rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
                if (IS_ERR(rq[0])) {
@@ -4705,1777 +4705,3 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 
        return intel_gt_live_subtests(tests, &i915->gt);
 }
-
-static int emit_semaphore_signal(struct intel_context *ce, void *slot)
-{
-       const u32 offset =
-               i915_ggtt_offset(ce->engine->status_page.vma) +
-               offset_in_page(slot);
-       struct i915_request *rq;
-       u32 *cs;
-
-       rq = intel_context_create_request(ce);
-       if (IS_ERR(rq))
-               return PTR_ERR(rq);
-
-       cs = intel_ring_begin(rq, 4);
-       if (IS_ERR(cs)) {
-               i915_request_add(rq);
-               return PTR_ERR(cs);
-       }
-
-       *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
-       *cs++ = offset;
-       *cs++ = 0;
-       *cs++ = 1;
-
-       intel_ring_advance(rq, cs);
-
-       rq->sched.attr.priority = I915_PRIORITY_BARRIER;
-       i915_request_add(rq);
-       return 0;
-}
-
-static int context_flush(struct intel_context *ce, long timeout)
-{
-       struct i915_request *rq;
-       struct dma_fence *fence;
-       int err = 0;
-
-       rq = intel_engine_create_kernel_request(ce->engine);
-       if (IS_ERR(rq))
-               return PTR_ERR(rq);
-
-       fence = i915_active_fence_get(&ce->timeline->last_request);
-       if (fence) {
-               i915_request_await_dma_fence(rq, fence);
-               dma_fence_put(fence);
-       }
-
-       rq = i915_request_get(rq);
-       i915_request_add(rq);
-       if (i915_request_wait(rq, 0, timeout) < 0)
-               err = -ETIME;
-       i915_request_put(rq);
-
-       rmb(); /* We know the request is written, make sure all state is too! */
-       return err;
-}
-
-static int live_lrc_layout(void *arg)
-{
-       struct intel_gt *gt = arg;
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
-       u32 *lrc;
-       int err;
-
-       /*
-        * Check the registers offsets we use to create the initial reg state
-        * match the layout saved by HW.
-        */
-
-       lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
-       if (!lrc)
-               return -ENOMEM;
-
-       err = 0;
-       for_each_engine(engine, gt, id) {
-               u32 *hw;
-               int dw;
-
-               if (!engine->default_state)
-                       continue;
-
-               hw = shmem_pin_map(engine->default_state);
-               if (IS_ERR(hw)) {
-                       err = PTR_ERR(hw);
-                       break;
-               }
-               hw += LRC_STATE_OFFSET / sizeof(*hw);
-
-               execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
-                                        engine->kernel_context,
-                                        engine,
-                                        engine->kernel_context->ring,
-                                        true);
-
-               dw = 0;
-               do {
-                       u32 lri = hw[dw];
-
-                       if (lri == 0) {
-                               dw++;
-                               continue;
-                       }
-
-                       if (lrc[dw] == 0) {
-                               pr_debug("%s: skipped instruction %x at dword %d\n",
-                                        engine->name, lri, dw);
-                               dw++;
-                               continue;
-                       }
-
-                       if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
-                               pr_err("%s: Expected LRI command at dword %d, found %08x\n",
-                                      engine->name, dw, lri);
-                               err = -EINVAL;
-                               break;
-                       }
-
-                       if (lrc[dw] != lri) {
-                               pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
-                                      engine->name, dw, lri, lrc[dw]);
-                               err = -EINVAL;
-                               break;
-                       }
-
-                       lri &= 0x7f;
-                       lri++;
-                       dw++;
-
-                       while (lri) {
-                               if (hw[dw] != lrc[dw]) {
-                                       pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
-                                              engine->name, dw, hw[dw], lrc[dw]);
-                                       err = -EINVAL;
-                                       break;
-                               }
-
-                               /*
-                                * Skip over the actual register value as we
-                                * expect that to differ.
-                                */
-                               dw += 2;
-                               lri -= 2;
-                       }
-               } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
-
-               if (err) {
-                       pr_info("%s: HW register image:\n", engine->name);
-                       igt_hexdump(hw, PAGE_SIZE);
-
-                       pr_info("%s: SW register image:\n", engine->name);
-                       igt_hexdump(lrc, PAGE_SIZE);
-               }
-
-               shmem_unpin_map(engine->default_state, hw);
-               if (err)
-                       break;
-       }
-
-       kfree(lrc);
-       return err;
-}
-
-static int find_offset(const u32 *lri, u32 offset)
-{
-       int i;
-
-       for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
-               if (lri[i] == offset)
-                       return i;
-
-       return -1;
-}
-
-static int live_lrc_fixed(void *arg)
-{
-       struct intel_gt *gt = arg;
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
-       int err = 0;
-
-       /*
-        * Check the assumed register offsets match the actual locations in
-        * the context image.
-        */
-
-       for_each_engine(engine, gt, id) {
-               const struct {
-                       u32 reg;
-                       u32 offset;
-                       const char *name;
-               } tbl[] = {
-                       {
-                               i915_mmio_reg_offset(RING_START(engine->mmio_base)),
-                               CTX_RING_START - 1,
-                               "RING_START"
-                       },
-                       {
-                               i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
-                               CTX_RING_CTL - 1,
-                               "RING_CTL"
-                       },
-                       {
-                               i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
-                               CTX_RING_HEAD - 1,
-                               "RING_HEAD"
-                       },
-                       {
-                               i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
-                               CTX_RING_TAIL - 1,
-                               "RING_TAIL"
-                       },
-                       {
-                               i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
-                               lrc_ring_mi_mode(engine),
-                               "RING_MI_MODE"
-                       },
-                       {
-                               i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
-                               CTX_BB_STATE - 1,
-                               "BB_STATE"
-                       },
-                       {
-                               i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
-                               lrc_ring_wa_bb_per_ctx(engine),
-                               "RING_BB_PER_CTX_PTR"
-                       },
-                       {
-                               i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
-                               lrc_ring_indirect_ptr(engine),
-                               "RING_INDIRECT_CTX_PTR"
-                       },
-                       {
-                               i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
-                               lrc_ring_indirect_offset(engine),
-                               "RING_INDIRECT_CTX_OFFSET"
-                       },
-                       {
-                               i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
-                               CTX_TIMESTAMP - 1,
-                               "RING_CTX_TIMESTAMP"
-                       },
-                       {
-                               i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
-                               lrc_ring_gpr0(engine),
-                               "RING_CS_GPR0"
-                       },
-                       {
-                               i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
-                               lrc_ring_cmd_buf_cctl(engine),
-                               "RING_CMD_BUF_CCTL"
-                       },
-                       { },
-               }, *t;
-               u32 *hw;
-
-               if (!engine->default_state)
-                       continue;
-
-               hw = shmem_pin_map(engine->default_state);
-               if (IS_ERR(hw)) {
-                       err = PTR_ERR(hw);
-                       break;
-               }
-               hw += LRC_STATE_OFFSET / sizeof(*hw);
-
-               for (t = tbl; t->name; t++) {
-                       int dw = find_offset(hw, t->reg);
-
-                       if (dw != t->offset) {
-                               pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
-                                      engine->name,
-                                      t->name,
-                                      t->reg,
-                                      dw,
-                                      t->offset);
-                               err = -EINVAL;
-                       }
-               }
-
-               shmem_unpin_map(engine->default_state, hw);
-       }
-
-       return err;
-}
-
-static int __live_lrc_state(struct intel_engine_cs *engine,
-                           struct i915_vma *scratch)
-{
-       struct intel_context *ce;
-       struct i915_request *rq;
-       struct i915_gem_ww_ctx ww;
-       enum {
-               RING_START_IDX = 0,
-               RING_TAIL_IDX,
-               MAX_IDX
-       };
-       u32 expected[MAX_IDX];
-       u32 *cs;
-       int err;
-       int n;
-
-       ce = intel_context_create(engine);
-       if (IS_ERR(ce))
-               return PTR_ERR(ce);
-
-       i915_gem_ww_ctx_init(&ww, false);
-retry:
-       err = i915_gem_object_lock(scratch->obj, &ww);
-       if (!err)
-               err = intel_context_pin_ww(ce, &ww);
-       if (err)
-               goto err_put;
-
-       rq = i915_request_create(ce);
-       if (IS_ERR(rq)) {
-               err = PTR_ERR(rq);
-               goto err_unpin;
-       }
-
-       cs = intel_ring_begin(rq, 4 * MAX_IDX);
-       if (IS_ERR(cs)) {
-               err = PTR_ERR(cs);
-               i915_request_add(rq);
-               goto err_unpin;
-       }
-
-       *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
-       *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
-       *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
-       *cs++ = 0;
-
-       expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
-
-       *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
-       *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
-       *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
-       *cs++ = 0;
-
-       err = i915_request_await_object(rq, scratch->obj, true);
-       if (!err)
-               err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
-
-       i915_request_get(rq);
-       i915_request_add(rq);
-       if (err)
-               goto err_rq;
-
-       intel_engine_flush_submission(engine);
-       expected[RING_TAIL_IDX] = ce->ring->tail;
-
-       if (i915_request_wait(rq, 0, HZ / 5) < 0) {
-               err = -ETIME;
-               goto err_rq;
-       }
-
-       cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
-       if (IS_ERR(cs)) {
-               err = PTR_ERR(cs);
-               goto err_rq;
-       }
-
-       for (n = 0; n < MAX_IDX; n++) {
-               if (cs[n] != expected[n]) {
-                       pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
-                              engine->name, n, cs[n], expected[n]);
-                       err = -EINVAL;
-                       break;
-               }
-       }
-
-       i915_gem_object_unpin_map(scratch->obj);
-
-err_rq:
-       i915_request_put(rq);
-err_unpin:
-       intel_context_unpin(ce);
-err_put:
-       if (err == -EDEADLK) {
-               err = i915_gem_ww_ctx_backoff(&ww);
-               if (!err)
-                       goto retry;
-       }
-       i915_gem_ww_ctx_fini(&ww);
-       intel_context_put(ce);
-       return err;
-}
-
-static int live_lrc_state(void *arg)
-{
-       struct intel_gt *gt = arg;
-       struct intel_engine_cs *engine;
-       struct i915_vma *scratch;
-       enum intel_engine_id id;
-       int err = 0;
-
-       /*
-        * Check the live register state matches what we expect for this
-        * intel_context.
-        */
-
-       scratch = create_scratch(gt);
-       if (IS_ERR(scratch))
-               return PTR_ERR(scratch);
-
-       for_each_engine(engine, gt, id) {
-               err = __live_lrc_state(engine, scratch);
-               if (err)
-                       break;
-       }
-
-       if (igt_flush_test(gt->i915))
-               err = -EIO;
-
-       i915_vma_unpin_and_release(&scratch, 0);
-       return err;
-}
-
-static int gpr_make_dirty(struct intel_context *ce)
-{
-       struct i915_request *rq;
-       u32 *cs;
-       int n;
-
-       rq = intel_context_create_request(ce);
-       if (IS_ERR(rq))
-               return PTR_ERR(rq);
-
-       cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
-       if (IS_ERR(cs)) {
-               i915_request_add(rq);
-               return PTR_ERR(cs);
-       }
-
-       *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
-       for (n = 0; n < NUM_GPR_DW; n++) {
-               *cs++ = CS_GPR(ce->engine, n);
-               *cs++ = STACK_MAGIC;
-       }
-       *cs++ = MI_NOOP;
-
-       intel_ring_advance(rq, cs);
-
-       rq->sched.attr.priority = I915_PRIORITY_BARRIER;
-       i915_request_add(rq);
-
-       return 0;
-}
-
-static struct i915_request *
-__gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
-{
-       const u32 offset =
-               i915_ggtt_offset(ce->engine->status_page.vma) +
-               offset_in_page(slot);
-       struct i915_request *rq;
-       u32 *cs;
-       int err;
-       int n;
-
-       rq = intel_context_create_request(ce);
-       if (IS_ERR(rq))
-               return rq;
-
-       cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
-       if (IS_ERR(cs)) {
-               i915_request_add(rq);
-               return ERR_CAST(cs);
-       }
-
-       *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-       *cs++ = MI_NOOP;
-
-       *cs++ = MI_SEMAPHORE_WAIT |
-               MI_SEMAPHORE_GLOBAL_GTT |
-               MI_SEMAPHORE_POLL |
-               MI_SEMAPHORE_SAD_NEQ_SDD;
-       *cs++ = 0;
-       *cs++ = offset;
-       *cs++ = 0;
-
-       for (n = 0; n < NUM_GPR_DW; n++) {
-               *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
-               *cs++ = CS_GPR(ce->engine, n);
-               *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
-               *cs++ = 0;
-       }
-
-       i915_vma_lock(scratch);
-       err = i915_request_await_object(rq, scratch->obj, true);
-       if (!err)
-               err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
-       i915_vma_unlock(scratch);
-
-       i915_request_get(rq);
-       i915_request_add(rq);
-       if (err) {
-               i915_request_put(rq);
-               rq = ERR_PTR(err);
-       }
-
-       return rq;
-}
-
-static int __live_lrc_gpr(struct intel_engine_cs *engine,
-                         struct i915_vma *scratch,
-                         bool preempt)
-{
-       u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
-       struct intel_context *ce;
-       struct i915_request *rq;
-       u32 *cs;
-       int err;
-       int n;
-
-       if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
-               return 0; /* GPR only on rcs0 for gen8 */
-
-       err = gpr_make_dirty(engine->kernel_context);
-       if (err)
-               return err;
-
-       ce = intel_context_create(engine);
-       if (IS_ERR(ce))
-               return PTR_ERR(ce);
-
-       rq = __gpr_read(ce, scratch, slot);
-       if (IS_ERR(rq)) {
-               err = PTR_ERR(rq);
-               goto err_put;
-       }
-
-       err = wait_for_submit(engine, rq, HZ / 2);
-       if (err)
-               goto err_rq;
-
-       if (preempt) {
-               err = gpr_make_dirty(engine->kernel_context);
-               if (err)
-                       goto err_rq;
-
-               err = emit_semaphore_signal(engine->kernel_context, slot);
-               if (err)
-                       goto err_rq;
-       } else {
-               slot[0] = 1;
-               wmb();
-       }
-
-       if (i915_request_wait(rq, 0, HZ / 5) < 0) {
-               err = -ETIME;
-               goto err_rq;
-       }
-
-       cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
-       if (IS_ERR(cs)) {
-               err = PTR_ERR(cs);
-               goto err_rq;
-       }
-
-       for (n = 0; n < NUM_GPR_DW; n++) {
-               if (cs[n]) {
-                       pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
-                              engine->name,
-                              n / 2, n & 1 ? "udw" : "ldw",
-                              cs[n]);
-                       err = -EINVAL;
-                       break;
-               }
-       }
-
-       i915_gem_object_unpin_map(scratch->obj);
-
-err_rq:
-       memset32(&slot[0], -1, 4);
-       wmb();
-       i915_request_put(rq);
-err_put:
-       intel_context_put(ce);
-       return err;
-}
-
-static int live_lrc_gpr(void *arg)
-{
-       struct intel_gt *gt = arg;
-       struct intel_engine_cs *engine;
-       struct i915_vma *scratch;
-       enum intel_engine_id id;
-       int err = 0;
-
-       /*
-        * Check that GPR registers are cleared in new contexts as we need
-        * to avoid leaking any information from previous contexts.
-        */
-
-       scratch = create_scratch(gt);
-       if (IS_ERR(scratch))
-               return PTR_ERR(scratch);
-
-       for_each_engine(engine, gt, id) {
-               st_engine_heartbeat_disable(engine);
-
-               err = __live_lrc_gpr(engine, scratch, false);
-               if (err)
-                       goto err;
-
-               err = __live_lrc_gpr(engine, scratch, true);
-               if (err)
-                       goto err;
-
-err:
-               st_engine_heartbeat_enable(engine);
-               if (igt_flush_test(gt->i915))
-                       err = -EIO;
-               if (err)
-                       break;
-       }
-
-       i915_vma_unpin_and_release(&scratch, 0);
-       return err;
-}
-
-static struct i915_request *
-create_timestamp(struct intel_context *ce, void *slot, int idx)
-{
-       const u32 offset =
-               i915_ggtt_offset(ce->engine->status_page.vma) +
-               offset_in_page(slot);
-       struct i915_request *rq;
-       u32 *cs;
-       int err;
-
-       rq = intel_context_create_request(ce);
-       if (IS_ERR(rq))
-               return rq;
-
-       cs = intel_ring_begin(rq, 10);
-       if (IS_ERR(cs)) {
-               err = PTR_ERR(cs);
-               goto err;
-       }
-
-       *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-       *cs++ = MI_NOOP;
-
-       *cs++ = MI_SEMAPHORE_WAIT |
-               MI_SEMAPHORE_GLOBAL_GTT |
-               MI_SEMAPHORE_POLL |
-               MI_SEMAPHORE_SAD_NEQ_SDD;
-       *cs++ = 0;
-       *cs++ = offset;
-       *cs++ = 0;
-
-       *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
-       *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
-       *cs++ = offset + idx * sizeof(u32);
-       *cs++ = 0;
-
-       intel_ring_advance(rq, cs);
-
-       rq->sched.attr.priority = I915_PRIORITY_MASK;
-       err = 0;
-err:
-       i915_request_get(rq);
-       i915_request_add(rq);
-       if (err) {
-               i915_request_put(rq);
-               return ERR_PTR(err);
-       }
-
-       return rq;
-}
-
-struct lrc_timestamp {
-       struct intel_engine_cs *engine;
-       struct intel_context *ce[2];
-       u32 poison;
-};
-
-static bool timestamp_advanced(u32 start, u32 end)
-{
-       return (s32)(end - start) > 0;
-}
-
-static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
-{
-       u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
-       struct i915_request *rq;
-       u32 timestamp;
-       int err = 0;
-
-       arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
-       rq = create_timestamp(arg->ce[0], slot, 1);
-       if (IS_ERR(rq))
-               return PTR_ERR(rq);
-
-       err = wait_for_submit(rq->engine, rq, HZ / 2);
-       if (err)
-               goto err;
-
-       if (preempt) {
-               arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
-               err = emit_semaphore_signal(arg->ce[1], slot);
-               if (err)
-                       goto err;
-       } else {
-               slot[0] = 1;
-               wmb();
-       }
-
-       /* And wait for switch to kernel (to save our context to memory) */
-       err = context_flush(arg->ce[0], HZ / 2);
-       if (err)
-               goto err;
-
-       if (!timestamp_advanced(arg->poison, slot[1])) {
-               pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
-                      arg->engine->name, preempt ? "preempt" : "simple",
-                      arg->poison, slot[1]);
-               err = -EINVAL;
-       }
-
-       timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
-       if (!timestamp_advanced(slot[1], timestamp)) {
-               pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
-                      arg->engine->name, preempt ? "preempt" : "simple",
-                      slot[1], timestamp);
-               err = -EINVAL;
-       }
-
-err:
-       memset32(slot, -1, 4);
-       i915_request_put(rq);
-       return err;
-}
-
-static int live_lrc_timestamp(void *arg)
-{
-       struct lrc_timestamp data = {};
-       struct intel_gt *gt = arg;
-       enum intel_engine_id id;
-       const u32 poison[] = {
-               0,
-               S32_MAX,
-               (u32)S32_MAX + 1,
-               U32_MAX,
-       };
-
-       /*
-        * We want to verify that the timestamp is saved and restore across
-        * context switches and is monotonic.
-        *
-        * So we do this with a little bit of LRC poisoning to check various
-        * boundary conditions, and see what happens if we preempt the context
-        * with a second request (carrying more poison into the timestamp).
-        */
-
-       for_each_engine(data.engine, gt, id) {
-               int i, err = 0;
-
-               st_engine_heartbeat_disable(data.engine);
-
-               for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
-                       struct intel_context *tmp;
-
-                       tmp = intel_context_create(data.engine);
-                       if (IS_ERR(tmp)) {
-                               err = PTR_ERR(tmp);
-                               goto err;
-                       }
-
-                       err = intel_context_pin(tmp);
-                       if (err) {
-                               intel_context_put(tmp);
-                               goto err;
-                       }
-
-                       data.ce[i] = tmp;
-               }
-
-               for (i = 0; i < ARRAY_SIZE(poison); i++) {
-                       data.poison = poison[i];
-
-                       err = __lrc_timestamp(&data, false);
-                       if (err)
-                               break;
-
-                       err = __lrc_timestamp(&data, true);
-                       if (err)
-                               break;
-               }
-
-err:
-               st_engine_heartbeat_enable(data.engine);
-               for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
-                       if (!data.ce[i])
-                               break;
-
-                       intel_context_unpin(data.ce[i]);
-                       intel_context_put(data.ce[i]);
-               }
-
-               if (igt_flush_test(gt->i915))
-                       err = -EIO;
-               if (err)
-                       return err;
-       }
-
-       return 0;
-}
-
-static struct i915_vma *
-create_user_vma(struct i915_address_space *vm, unsigned long size)
-{
-       struct drm_i915_gem_object *obj;
-       struct i915_vma *vma;
-       int err;
-
-       obj = i915_gem_object_create_internal(vm->i915, size);
-       if (IS_ERR(obj))
-               return ERR_CAST(obj);
-
-       vma = i915_vma_instance(obj, vm, NULL);
-       if (IS_ERR(vma)) {
-               i915_gem_object_put(obj);
-               return vma;
-       }
-
-       err = i915_vma_pin(vma, 0, 0, PIN_USER);
-       if (err) {
-               i915_gem_object_put(obj);
-               return ERR_PTR(err);
-       }
-
-       return vma;
-}
-
-static struct i915_vma *
-store_context(struct intel_context *ce, struct i915_vma *scratch)
-{
-       struct i915_vma *batch;
-       u32 dw, x, *cs, *hw;
-       u32 *defaults;
-
-       batch = create_user_vma(ce->vm, SZ_64K);
-       if (IS_ERR(batch))
-               return batch;
-
-       cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
-       if (IS_ERR(cs)) {
-               i915_vma_put(batch);
-               return ERR_CAST(cs);
-       }
-
-       defaults = shmem_pin_map(ce->engine->default_state);
-       if (!defaults) {
-               i915_gem_object_unpin_map(batch->obj);
-               i915_vma_put(batch);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       x = 0;
-       dw = 0;
-       hw = defaults;
-       hw += LRC_STATE_OFFSET / sizeof(*hw);
-       do {
-               u32 len = hw[dw] & 0x7f;
-
-               if (hw[dw] == 0) {
-                       dw++;
-                       continue;
-               }
-
-               if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
-                       dw += len + 2;
-                       continue;
-               }
-
-               dw++;
-               len = (len + 1) / 2;
-               while (len--) {
-                       *cs++ = MI_STORE_REGISTER_MEM_GEN8;
-                       *cs++ = hw[dw];
-                       *cs++ = lower_32_bits(scratch->node.start + x);
-                       *cs++ = upper_32_bits(scratch->node.start + x);
-
-                       dw += 2;
-                       x += 4;
-               }
-       } while (dw < PAGE_SIZE / sizeof(u32) &&
-                (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
-
-       *cs++ = MI_BATCH_BUFFER_END;
-
-       shmem_unpin_map(ce->engine->default_state, defaults);
-
-       i915_gem_object_flush_map(batch->obj);
-       i915_gem_object_unpin_map(batch->obj);
-
-       return batch;
-}
-
-static int move_to_active(struct i915_request *rq,
-                         struct i915_vma *vma,
-                         unsigned int flags)
-{
-       int err;
-
-       i915_vma_lock(vma);
-       err = i915_request_await_object(rq, vma->obj, flags);
-       if (!err)
-               err = i915_vma_move_to_active(vma, rq, flags);
-       i915_vma_unlock(vma);
-
-       return err;
-}
-
-static struct i915_request *
-record_registers(struct intel_context *ce,
-                struct i915_vma *before,
-                struct i915_vma *after,
-                u32 *sema)
-{
-       struct i915_vma *b_before, *b_after;
-       struct i915_request *rq;
-       u32 *cs;
-       int err;
-
-       b_before = store_context(ce, before);
-       if (IS_ERR(b_before))
-               return ERR_CAST(b_before);
-
-       b_after = store_context(ce, after);
-       if (IS_ERR(b_after)) {
-               rq = ERR_CAST(b_after);
-               goto err_before;
-       }
-
-       rq = intel_context_create_request(ce);
-       if (IS_ERR(rq))
-               goto err_after;
-
-       err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
-       if (err)
-               goto err_rq;
-
-       err = move_to_active(rq, b_before, 0);
-       if (err)
-               goto err_rq;
-
-       err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
-       if (err)
-               goto err_rq;
-
-       err = move_to_active(rq, b_after, 0);
-       if (err)
-               goto err_rq;
-
-       cs = intel_ring_begin(rq, 14);
-       if (IS_ERR(cs)) {
-               err = PTR_ERR(cs);
-               goto err_rq;
-       }
-
-       *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-       *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
-       *cs++ = lower_32_bits(b_before->node.start);
-       *cs++ = upper_32_bits(b_before->node.start);
-
-       *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-       *cs++ = MI_SEMAPHORE_WAIT |
-               MI_SEMAPHORE_GLOBAL_GTT |
-               MI_SEMAPHORE_POLL |
-               MI_SEMAPHORE_SAD_NEQ_SDD;
-       *cs++ = 0;
-       *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
-               offset_in_page(sema);
-       *cs++ = 0;
-       *cs++ = MI_NOOP;
-
-       *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-       *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
-       *cs++ = lower_32_bits(b_after->node.start);
-       *cs++ = upper_32_bits(b_after->node.start);
-
-       intel_ring_advance(rq, cs);
-
-       WRITE_ONCE(*sema, 0);
-       i915_request_get(rq);
-       i915_request_add(rq);
-err_after:
-       i915_vma_put(b_after);
-err_before:
-       i915_vma_put(b_before);
-       return rq;
-
-err_rq:
-       i915_request_add(rq);
-       rq = ERR_PTR(err);
-       goto err_after;
-}
-
-static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
-{
-       struct i915_vma *batch;
-       u32 dw, *cs, *hw;
-       u32 *defaults;
-
-       batch = create_user_vma(ce->vm, SZ_64K);
-       if (IS_ERR(batch))
-               return batch;
-
-       cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
-       if (IS_ERR(cs)) {
-               i915_vma_put(batch);
-               return ERR_CAST(cs);
-       }
-
-       defaults = shmem_pin_map(ce->engine->default_state);
-       if (!defaults) {
-               i915_gem_object_unpin_map(batch->obj);
-               i915_vma_put(batch);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       dw = 0;
-       hw = defaults;
-       hw += LRC_STATE_OFFSET / sizeof(*hw);
-       do {
-               u32 len = hw[dw] & 0x7f;
-
-               if (hw[dw] == 0) {
-                       dw++;
-                       continue;
-               }
-
-               if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
-                       dw += len + 2;
-                       continue;
-               }
-
-               dw++;
-               len = (len + 1) / 2;
-               *cs++ = MI_LOAD_REGISTER_IMM(len);
-               while (len--) {
-                       *cs++ = hw[dw];
-                       *cs++ = poison;
-                       dw += 2;
-               }
-       } while (dw < PAGE_SIZE / sizeof(u32) &&
-                (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
-
-       *cs++ = MI_BATCH_BUFFER_END;
-
-       shmem_unpin_map(ce->engine->default_state, defaults);
-
-       i915_gem_object_flush_map(batch->obj);
-       i915_gem_object_unpin_map(batch->obj);
-
-       return batch;
-}
-
-static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
-{
-       struct i915_request *rq;
-       struct i915_vma *batch;
-       u32 *cs;
-       int err;
-
-       batch = load_context(ce, poison);
-       if (IS_ERR(batch))
-               return PTR_ERR(batch);
-
-       rq = intel_context_create_request(ce);
-       if (IS_ERR(rq)) {
-               err = PTR_ERR(rq);
-               goto err_batch;
-       }
-
-       err = move_to_active(rq, batch, 0);
-       if (err)
-               goto err_rq;
-
-       cs = intel_ring_begin(rq, 8);
-       if (IS_ERR(cs)) {
-               err = PTR_ERR(cs);
-               goto err_rq;
-       }
-
-       *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-       *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
-       *cs++ = lower_32_bits(batch->node.start);
-       *cs++ = upper_32_bits(batch->node.start);
-
-       *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
-       *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
-               offset_in_page(sema);
-       *cs++ = 0;
-       *cs++ = 1;
-
-       intel_ring_advance(rq, cs);
-
-       rq->sched.attr.priority = I915_PRIORITY_BARRIER;
-err_rq:
-       i915_request_add(rq);
-err_batch:
-       i915_vma_put(batch);
-       return err;
-}
-
-static bool is_moving(u32 a, u32 b)
-{
-       return a != b;
-}
-
-static int compare_isolation(struct intel_engine_cs *engine,
-                            struct i915_vma *ref[2],
-                            struct i915_vma *result[2],
-                            struct intel_context *ce,
-                            u32 poison)
-{
-       u32 x, dw, *hw, *lrc;
-       u32 *A[2], *B[2];
-       u32 *defaults;
-       int err = 0;
-
-       A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
-       if (IS_ERR(A[0]))
-               return PTR_ERR(A[0]);
-
-       A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC);
-       if (IS_ERR(A[1])) {
-               err = PTR_ERR(A[1]);
-               goto err_A0;
-       }
-
-       B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC);
-       if (IS_ERR(B[0])) {
-               err = PTR_ERR(B[0]);
-               goto err_A1;
-       }
-
-       B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC);
-       if (IS_ERR(B[1])) {
-               err = PTR_ERR(B[1]);
-               goto err_B0;
-       }
-
-       lrc = i915_gem_object_pin_map(ce->state->obj,
-                                     i915_coherent_map_type(engine->i915));
-       if (IS_ERR(lrc)) {
-               err = PTR_ERR(lrc);
-               goto err_B1;
-       }
-       lrc += LRC_STATE_OFFSET / sizeof(*hw);
-
-       defaults = shmem_pin_map(ce->engine->default_state);
-       if (!defaults) {
-               err = -ENOMEM;
-               goto err_lrc;
-       }
-
-       x = 0;
-       dw = 0;
-       hw = defaults;
-       hw += LRC_STATE_OFFSET / sizeof(*hw);
-       do {
-               u32 len = hw[dw] & 0x7f;
-
-               if (hw[dw] == 0) {
-                       dw++;
-                       continue;
-               }
-
-               if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
-                       dw += len + 2;
-                       continue;
-               }
-
-               dw++;
-               len = (len + 1) / 2;
-               while (len--) {
-                       if (!is_moving(A[0][x], A[1][x]) &&
-                           (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
-                               switch (hw[dw] & 4095) {
-                               case 0x30: /* RING_HEAD */
-                               case 0x34: /* RING_TAIL */
-                                       break;
-
-                               default:
-                                       pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
-                                              engine->name, dw,
-                                              hw[dw], hw[dw + 1],
-                                              A[0][x], B[0][x], B[1][x],
-                                              poison, lrc[dw + 1]);
-                                       err = -EINVAL;
-                               }
-                       }
-                       dw += 2;
-                       x++;
-               }
-       } while (dw < PAGE_SIZE / sizeof(u32) &&
-                (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
-
-       shmem_unpin_map(ce->engine->default_state, defaults);
-err_lrc:
-       i915_gem_object_unpin_map(ce->state->obj);
-err_B1:
-       i915_gem_object_unpin_map(result[1]->obj);
-err_B0:
-       i915_gem_object_unpin_map(result[0]->obj);
-err_A1:
-       i915_gem_object_unpin_map(ref[1]->obj);
-err_A0:
-       i915_gem_object_unpin_map(ref[0]->obj);
-       return err;
-}
-
-static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
-{
-       u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
-       struct i915_vma *ref[2], *result[2];
-       struct intel_context *A, *B;
-       struct i915_request *rq;
-       int err;
-
-       A = intel_context_create(engine);
-       if (IS_ERR(A))
-               return PTR_ERR(A);
-
-       B = intel_context_create(engine);
-       if (IS_ERR(B)) {
-               err = PTR_ERR(B);
-               goto err_A;
-       }
-
-       ref[0] = create_user_vma(A->vm, SZ_64K);
-       if (IS_ERR(ref[0])) {
-               err = PTR_ERR(ref[0]);
-               goto err_B;
-       }
-
-       ref[1] = create_user_vma(A->vm, SZ_64K);
-       if (IS_ERR(ref[1])) {
-               err = PTR_ERR(ref[1]);
-               goto err_ref0;
-       }
-
-       rq = record_registers(A, ref[0], ref[1], sema);
-       if (IS_ERR(rq)) {
-               err = PTR_ERR(rq);
-               goto err_ref1;
-       }
-
-       WRITE_ONCE(*sema, 1);
-       wmb();
-
-       if (i915_request_wait(rq, 0, HZ / 2) < 0) {
-               i915_request_put(rq);
-               err = -ETIME;
-               goto err_ref1;
-       }
-       i915_request_put(rq);
-
-       result[0] = create_user_vma(A->vm, SZ_64K);
-       if (IS_ERR(result[0])) {
-               err = PTR_ERR(result[0]);
-               goto err_ref1;
-       }
-
-       result[1] = create_user_vma(A->vm, SZ_64K);
-       if (IS_ERR(result[1])) {
-               err = PTR_ERR(result[1]);
-               goto err_result0;
-       }
-
-       rq = record_registers(A, result[0], result[1], sema);
-       if (IS_ERR(rq)) {
-               err = PTR_ERR(rq);
-               goto err_result1;
-       }
-
-       err = poison_registers(B, poison, sema);
-       if (err) {
-               WRITE_ONCE(*sema, -1);
-               i915_request_put(rq);
-               goto err_result1;
-       }
-
-       if (i915_request_wait(rq, 0, HZ / 2) < 0) {
-               i915_request_put(rq);
-               err = -ETIME;
-               goto err_result1;
-       }
-       i915_request_put(rq);
-
-       err = compare_isolation(engine, ref, result, A, poison);
-
-err_result1:
-       i915_vma_put(result[1]);
-err_result0:
-       i915_vma_put(result[0]);
-err_ref1:
-       i915_vma_put(ref[1]);
-err_ref0:
-       i915_vma_put(ref[0]);
-err_B:
-       intel_context_put(B);
-err_A:
-       intel_context_put(A);
-       return err;
-}
-
-static bool skip_isolation(const struct intel_engine_cs *engine)
-{
-       if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9)
-               return true;
-
-       if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11)
-               return true;
-
-       return false;
-}
-
-static int live_lrc_isolation(void *arg)
-{
-       struct intel_gt *gt = arg;
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
-       const u32 poison[] = {
-               STACK_MAGIC,
-               0x3a3a3a3a,
-               0x5c5c5c5c,
-               0xffffffff,
-               0xffff0000,
-       };
-       int err = 0;
-
-       /*
-        * Our goal is try and verify that per-context state cannot be
-        * tampered with by another non-privileged client.
-        *
-        * We take the list of context registers from the LRI in the default
-        * context image and attempt to modify that list from a remote context.
-        */
-
-       for_each_engine(engine, gt, id) {
-               int i;
-
-               /* Just don't even ask */
-               if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
-                   skip_isolation(engine))
-                       continue;
-
-               intel_engine_pm_get(engine);
-               for (i = 0; i < ARRAY_SIZE(poison); i++) {
-                       int result;
-
-                       result = __lrc_isolation(engine, poison[i]);
-                       if (result && !err)
-                               err = result;
-
-                       result = __lrc_isolation(engine, ~poison[i]);
-                       if (result && !err)
-                               err = result;
-               }
-               intel_engine_pm_put(engine);
-               if (igt_flush_test(gt->i915)) {
-                       err = -EIO;
-                       break;
-               }
-       }
-
-       return err;
-}
-
-static int indirect_ctx_submit_req(struct intel_context *ce)
-{
-       struct i915_request *rq;
-       int err = 0;
-
-       rq = intel_context_create_request(ce);
-       if (IS_ERR(rq))
-               return PTR_ERR(rq);
-
-       i915_request_get(rq);
-       i915_request_add(rq);
-
-       if (i915_request_wait(rq, 0, HZ / 5) < 0)
-               err = -ETIME;
-
-       i915_request_put(rq);
-
-       return err;
-}
-
-#define CTX_BB_CANARY_OFFSET (3 * 1024)
-#define CTX_BB_CANARY_INDEX  (CTX_BB_CANARY_OFFSET / sizeof(u32))
-
-static u32 *
-emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
-{
-       *cs++ = MI_STORE_REGISTER_MEM_GEN8 |
-               MI_SRM_LRM_GLOBAL_GTT |
-               MI_LRI_LRM_CS_MMIO;
-       *cs++ = i915_mmio_reg_offset(RING_START(0));
-       *cs++ = i915_ggtt_offset(ce->state) +
-               context_wa_bb_offset(ce) +
-               CTX_BB_CANARY_OFFSET;
-       *cs++ = 0;
-
-       return cs;
-}
-
-static void
-indirect_ctx_bb_setup(struct intel_context *ce)
-{
-       u32 *cs = context_indirect_bb(ce);
-
-       cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
-
-       setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
-}
-
-static bool check_ring_start(struct intel_context *ce)
-{
-       const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
-               LRC_STATE_OFFSET + context_wa_bb_offset(ce);
-
-       if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
-               return true;
-
-       pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
-              ctx_bb[CTX_BB_CANARY_INDEX],
-              ce->lrc_reg_state[CTX_RING_START]);
-
-       return false;
-}
-
-static int indirect_ctx_bb_check(struct intel_context *ce)
-{
-       int err;
-
-       err = indirect_ctx_submit_req(ce);
-       if (err)
-               return err;
-
-       if (!check_ring_start(ce))
-               return -EINVAL;
-
-       return 0;
-}
-
-static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
-{
-       struct intel_context *a, *b;
-       int err;
-
-       a = intel_context_create(engine);
-       if (IS_ERR(a))
-               return PTR_ERR(a);
-       err = intel_context_pin(a);
-       if (err)
-               goto put_a;
-
-       b = intel_context_create(engine);
-       if (IS_ERR(b)) {
-               err = PTR_ERR(b);
-               goto unpin_a;
-       }
-       err = intel_context_pin(b);
-       if (err)
-               goto put_b;
-
-       /* We use the already reserved extra page in context state */
-       if (!a->wa_bb_page) {
-               GEM_BUG_ON(b->wa_bb_page);
-               GEM_BUG_ON(INTEL_GEN(engine->i915) == 12);
-               goto unpin_b;
-       }
-
-       /*
-        * In order to test that our per context bb is truly per context,
-        * and executes at the intended spot on context restoring process,
-        * make the batch store the ring start value to memory.
-        * As ring start is restored apriori of starting the indirect ctx bb and
-        * as it will be different for each context, it fits to this purpose.
-        */
-       indirect_ctx_bb_setup(a);
-       indirect_ctx_bb_setup(b);
-
-       err = indirect_ctx_bb_check(a);
-       if (err)
-               goto unpin_b;
-
-       err = indirect_ctx_bb_check(b);
-
-unpin_b:
-       intel_context_unpin(b);
-put_b:
-       intel_context_put(b);
-unpin_a:
-       intel_context_unpin(a);
-put_a:
-       intel_context_put(a);
-
-       return err;
-}
-
-static int live_lrc_indirect_ctx_bb(void *arg)
-{
-       struct intel_gt *gt = arg;
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
-       int err = 0;
-
-       for_each_engine(engine, gt, id) {
-               intel_engine_pm_get(engine);
-               err = __live_lrc_indirect_ctx_bb(engine);
-               intel_engine_pm_put(engine);
-
-               if (igt_flush_test(gt->i915))
-                       err = -EIO;
-
-               if (err)
-                       break;
-       }
-
-       return err;
-}
-
-static void garbage_reset(struct intel_engine_cs *engine,
-                         struct i915_request *rq)
-{
-       const unsigned int bit = I915_RESET_ENGINE + engine->id;
-       unsigned long *lock = &engine->gt->reset.flags;
-
-       if (test_and_set_bit(bit, lock))
-               return;
-
-       tasklet_disable(&engine->execlists.tasklet);
-
-       if (!rq->fence.error)
-               intel_engine_reset(engine, NULL);
-
-       tasklet_enable(&engine->execlists.tasklet);
-       clear_and_wake_up_bit(bit, lock);
-}
-
-static struct i915_request *garbage(struct intel_context *ce,
-                                   struct rnd_state *prng)
-{
-       struct i915_request *rq;
-       int err;
-
-       err = intel_context_pin(ce);
-       if (err)
-               return ERR_PTR(err);
-
-       prandom_bytes_state(prng,
-                           ce->lrc_reg_state,
-                           ce->engine->context_size -
-                           LRC_STATE_OFFSET);
-
-       rq = intel_context_create_request(ce);
-       if (IS_ERR(rq)) {
-               err = PTR_ERR(rq);
-               goto err_unpin;
-       }
-
-       i915_request_get(rq);
-       i915_request_add(rq);
-       return rq;
-
-err_unpin:
-       intel_context_unpin(ce);
-       return ERR_PTR(err);
-}
-
-static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
-{
-       struct intel_context *ce;
-       struct i915_request *hang;
-       int err = 0;
-
-       ce = intel_context_create(engine);
-       if (IS_ERR(ce))
-               return PTR_ERR(ce);
-
-       hang = garbage(ce, prng);
-       if (IS_ERR(hang)) {
-               err = PTR_ERR(hang);
-               goto err_ce;
-       }
-
-       if (wait_for_submit(engine, hang, HZ / 2)) {
-               i915_request_put(hang);
-               err = -ETIME;
-               goto err_ce;
-       }
-
-       intel_context_set_banned(ce);
-       garbage_reset(engine, hang);
-
-       intel_engine_flush_submission(engine);
-       if (!hang->fence.error) {
-               i915_request_put(hang);
-               pr_err("%s: corrupted context was not reset\n",
-                      engine->name);
-               err = -EINVAL;
-               goto err_ce;
-       }
-
-       if (i915_request_wait(hang, 0, HZ / 2) < 0) {
-               pr_err("%s: corrupted context did not recover\n",
-                      engine->name);
-               i915_request_put(hang);
-               err = -EIO;
-               goto err_ce;
-       }
-       i915_request_put(hang);
-
-err_ce:
-       intel_context_put(ce);
-       return err;
-}
-
-static int live_lrc_garbage(void *arg)
-{
-       struct intel_gt *gt = arg;
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
-
-       /*
-        * Verify that we can recover if one context state is completely
-        * corrupted.
-        */
-
-       if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
-               return 0;
-
-       for_each_engine(engine, gt, id) {
-               I915_RND_STATE(prng);
-               int err = 0, i;
-
-               if (!intel_has_reset_engine(engine->gt))
-                       continue;
-
-               intel_engine_pm_get(engine);
-               for (i = 0; i < 3; i++) {
-                       err = __lrc_garbage(engine, &prng);
-                       if (err)
-                               break;
-               }
-               intel_engine_pm_put(engine);
-
-               if (igt_flush_test(gt->i915))
-                       err = -EIO;
-               if (err)
-                       return err;
-       }
-
-       return 0;
-}
-
-static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
-{
-       struct intel_context *ce;
-       struct i915_request *rq;
-       IGT_TIMEOUT(end_time);
-       int err;
-
-       ce = intel_context_create(engine);
-       if (IS_ERR(ce))
-               return PTR_ERR(ce);
-
-       ce->runtime.num_underflow = 0;
-       ce->runtime.max_underflow = 0;
-
-       do {
-               unsigned int loop = 1024;
-
-               while (loop) {
-                       rq = intel_context_create_request(ce);
-                       if (IS_ERR(rq)) {
-                               err = PTR_ERR(rq);
-                               goto err_rq;
-                       }
-
-                       if (--loop == 0)
-                               i915_request_get(rq);
-
-                       i915_request_add(rq);
-               }
-
-               if (__igt_timeout(end_time, NULL))
-                       break;
-
-               i915_request_put(rq);
-       } while (1);
-
-       err = i915_request_wait(rq, 0, HZ / 5);
-       if (err < 0) {
-               pr_err("%s: request not completed!\n", engine->name);
-               goto err_wait;
-       }
-
-       igt_flush_test(engine->i915);
-
-       pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
-               engine->name,
-               intel_context_get_total_runtime_ns(ce),
-               intel_context_get_avg_runtime_ns(ce));
-
-       err = 0;
-       if (ce->runtime.num_underflow) {
-               pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
-                      engine->name,
-                      ce->runtime.num_underflow,
-                      ce->runtime.max_underflow);
-               GEM_TRACE_DUMP();
-               err = -EOVERFLOW;
-       }
-
-err_wait:
-       i915_request_put(rq);
-err_rq:
-       intel_context_put(ce);
-       return err;
-}
-
-static int live_pphwsp_runtime(void *arg)
-{
-       struct intel_gt *gt = arg;
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
-       int err = 0;
-
-       /*
-        * Check that cumulative context runtime as stored in the pphwsp[16]
-        * is monotonic.
-        */
-
-       for_each_engine(engine, gt, id) {
-               err = __live_pphwsp_runtime(engine);
-               if (err)
-                       break;
-       }
-
-       if (igt_flush_test(gt->i915))
-               err = -EIO;
-
-       return err;
-}
-
-int intel_lrc_live_selftests(struct drm_i915_private *i915)
-{
-       static const struct i915_subtest tests[] = {
-               SUBTEST(live_lrc_layout),
-               SUBTEST(live_lrc_fixed),
-               SUBTEST(live_lrc_state),
-               SUBTEST(live_lrc_gpr),
-               SUBTEST(live_lrc_isolation),
-               SUBTEST(live_lrc_timestamp),
-               SUBTEST(live_lrc_garbage),
-               SUBTEST(live_pphwsp_runtime),
-               SUBTEST(live_lrc_indirect_ctx_bb),
-       };
-
-       if (!HAS_LOGICAL_RING_CONTEXTS(i915))
-               return 0;
-
-       return intel_gt_live_subtests(tests, &i915->gt);
-}
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
new file mode 100644 (file)
index 0000000..b761773
--- /dev/null
@@ -0,0 +1,1861 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include <linux/prime_numbers.h>
+
+#include "i915_selftest.h"
+#include "intel_engine_heartbeat.h"
+#include "intel_engine_pm.h"
+#include "intel_reset.h"
+#include "intel_ring.h"
+#include "selftest_engine_heartbeat.h"
+#include "selftests/i915_random.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/igt_live_test.h"
+#include "selftests/igt_spinner.h"
+#include "selftests/lib_sw_fence.h"
+#include "shmem_utils.h"
+
+#include "gem/selftests/igt_gem_utils.h"
+#include "gem/selftests/mock_context.h"
+
+#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
+#define NUM_GPR 16
+#define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
+
+static struct i915_vma *create_scratch(struct intel_gt *gt)
+{
+       struct drm_i915_gem_object *obj;
+       struct i915_vma *vma;
+       int err;
+
+       obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+       if (IS_ERR(obj))
+               return ERR_CAST(obj);
+
+       i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
+
+       vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+       if (IS_ERR(vma)) {
+               i915_gem_object_put(obj);
+               return vma;
+       }
+
+       err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+       if (err) {
+               i915_gem_object_put(obj);
+               return ERR_PTR(err);
+       }
+
+       return vma;
+}
+
+static bool is_active(struct i915_request *rq)
+{
+       if (i915_request_is_active(rq))
+               return true;
+
+       if (i915_request_on_hold(rq))
+               return true;
+
+       if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
+               return true;
+
+       return false;
+}
+
+static int wait_for_submit(struct intel_engine_cs *engine,
+                          struct i915_request *rq,
+                          unsigned long timeout)
+{
+       timeout += jiffies;
+       do {
+               bool done = time_after(jiffies, timeout);
+
+               if (i915_request_completed(rq)) /* that was quick! */
+                       return 0;
+
+               /* Wait until the HW has acknowleged the submission (or err) */
+               intel_engine_flush_submission(engine);
+               if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
+                       return 0;
+
+               if (done)
+                       return -ETIME;
+
+               cond_resched();
+       } while (1);
+}
+
+static int emit_semaphore_signal(struct intel_context *ce, void *slot)
+{
+       const u32 offset =
+               i915_ggtt_offset(ce->engine->status_page.vma) +
+               offset_in_page(slot);
+       struct i915_request *rq;
+       u32 *cs;
+
+       rq = intel_context_create_request(ce);
+       if (IS_ERR(rq))
+               return PTR_ERR(rq);
+
+       cs = intel_ring_begin(rq, 4);
+       if (IS_ERR(cs)) {
+               i915_request_add(rq);
+               return PTR_ERR(cs);
+       }
+
+       *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+       *cs++ = offset;
+       *cs++ = 0;
+       *cs++ = 1;
+
+       intel_ring_advance(rq, cs);
+
+       rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+       i915_request_add(rq);
+       return 0;
+}
+
+static int context_flush(struct intel_context *ce, long timeout)
+{
+       struct i915_request *rq;
+       struct dma_fence *fence;
+       int err = 0;
+
+       rq = intel_engine_create_kernel_request(ce->engine);
+       if (IS_ERR(rq))
+               return PTR_ERR(rq);
+
+       fence = i915_active_fence_get(&ce->timeline->last_request);
+       if (fence) {
+               i915_request_await_dma_fence(rq, fence);
+               dma_fence_put(fence);
+       }
+
+       rq = i915_request_get(rq);
+       i915_request_add(rq);
+       if (i915_request_wait(rq, 0, timeout) < 0)
+               err = -ETIME;
+       i915_request_put(rq);
+
+       rmb(); /* We know the request is written, make sure all state is too! */
+       return err;
+}
+
+static int live_lrc_layout(void *arg)
+{
+       struct intel_gt *gt = arg;
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       u32 *lrc;
+       int err;
+
+       /*
+        * Check the registers offsets we use to create the initial reg state
+        * match the layout saved by HW.
+        */
+
+       lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!lrc)
+               return -ENOMEM;
+
+       err = 0;
+       for_each_engine(engine, gt, id) {
+               u32 *hw;
+               int dw;
+
+               if (!engine->default_state)
+                       continue;
+
+               hw = shmem_pin_map(engine->default_state);
+               if (IS_ERR(hw)) {
+                       err = PTR_ERR(hw);
+                       break;
+               }
+               hw += LRC_STATE_OFFSET / sizeof(*hw);
+
+               __lrc_init_regs(memset(lrc, POISON_INUSE, PAGE_SIZE),
+                               engine->kernel_context, engine, true);
+
+               dw = 0;
+               do {
+                       u32 lri = hw[dw];
+
+                       if (lri == 0) {
+                               dw++;
+                               continue;
+                       }
+
+                       if (lrc[dw] == 0) {
+                               pr_debug("%s: skipped instruction %x at dword %d\n",
+                                        engine->name, lri, dw);
+                               dw++;
+                               continue;
+                       }
+
+                       if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+                               pr_err("%s: Expected LRI command at dword %d, found %08x\n",
+                                      engine->name, dw, lri);
+                               err = -EINVAL;
+                               break;
+                       }
+
+                       if (lrc[dw] != lri) {
+                               pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
+                                      engine->name, dw, lri, lrc[dw]);
+                               err = -EINVAL;
+                               break;
+                       }
+
+                       lri &= 0x7f;
+                       lri++;
+                       dw++;
+
+                       while (lri) {
+                               if (hw[dw] != lrc[dw]) {
+                                       pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
+                                              engine->name, dw, hw[dw], lrc[dw]);
+                                       err = -EINVAL;
+                                       break;
+                               }
+
+                               /*
+                                * Skip over the actual register value as we
+                                * expect that to differ.
+                                */
+                               dw += 2;
+                               lri -= 2;
+                       }
+               } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+               if (err) {
+                       pr_info("%s: HW register image:\n", engine->name);
+                       igt_hexdump(hw, PAGE_SIZE);
+
+                       pr_info("%s: SW register image:\n", engine->name);
+                       igt_hexdump(lrc, PAGE_SIZE);
+               }
+
+               shmem_unpin_map(engine->default_state, hw);
+               if (err)
+                       break;
+       }
+
+       kfree(lrc);
+       return err;
+}
+
+static int find_offset(const u32 *lri, u32 offset)
+{
+       int i;
+
+       for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
+               if (lri[i] == offset)
+                       return i;
+
+       return -1;
+}
+
+static int live_lrc_fixed(void *arg)
+{
+       struct intel_gt *gt = arg;
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       int err = 0;
+
+       /*
+        * Check the assumed register offsets match the actual locations in
+        * the context image.
+        */
+
+       for_each_engine(engine, gt, id) {
+               const struct {
+                       u32 reg;
+                       u32 offset;
+                       const char *name;
+               } tbl[] = {
+                       {
+                               i915_mmio_reg_offset(RING_START(engine->mmio_base)),
+                               CTX_RING_START - 1,
+                               "RING_START"
+                       },
+                       {
+                               i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
+                               CTX_RING_CTL - 1,
+                               "RING_CTL"
+                       },
+                       {
+                               i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
+                               CTX_RING_HEAD - 1,
+                               "RING_HEAD"
+                       },
+                       {
+                               i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
+                               CTX_RING_TAIL - 1,
+                               "RING_TAIL"
+                       },
+                       {
+                               i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
+                               lrc_ring_mi_mode(engine),
+                               "RING_MI_MODE"
+                       },
+                       {
+                               i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
+                               CTX_BB_STATE - 1,
+                               "BB_STATE"
+                       },
+                       {
+                               i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
+                               lrc_ring_wa_bb_per_ctx(engine),
+                               "RING_BB_PER_CTX_PTR"
+                       },
+                       {
+                               i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
+                               lrc_ring_indirect_ptr(engine),
+                               "RING_INDIRECT_CTX_PTR"
+                       },
+                       {
+                               i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
+                               lrc_ring_indirect_offset(engine),
+                               "RING_INDIRECT_CTX_OFFSET"
+                       },
+                       {
+                               i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
+                               CTX_TIMESTAMP - 1,
+                               "RING_CTX_TIMESTAMP"
+                       },
+                       {
+                               i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
+                               lrc_ring_gpr0(engine),
+                               "RING_CS_GPR0"
+                       },
+                       {
+                               i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
+                               lrc_ring_cmd_buf_cctl(engine),
+                               "RING_CMD_BUF_CCTL"
+                       },
+                       { },
+               }, *t;
+               u32 *hw;
+
+               if (!engine->default_state)
+                       continue;
+
+               hw = shmem_pin_map(engine->default_state);
+               if (IS_ERR(hw)) {
+                       err = PTR_ERR(hw);
+                       break;
+               }
+               hw += LRC_STATE_OFFSET / sizeof(*hw);
+
+               for (t = tbl; t->name; t++) {
+                       int dw = find_offset(hw, t->reg);
+
+                       if (dw != t->offset) {
+                               pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
+                                      engine->name,
+                                      t->name,
+                                      t->reg,
+                                      dw,
+                                      t->offset);
+                               err = -EINVAL;
+                       }
+               }
+
+               shmem_unpin_map(engine->default_state, hw);
+       }
+
+       return err;
+}
+
+static int __live_lrc_state(struct intel_engine_cs *engine,
+                           struct i915_vma *scratch)
+{
+       struct intel_context *ce;
+       struct i915_request *rq;
+       struct i915_gem_ww_ctx ww;
+       enum {
+               RING_START_IDX = 0,
+               RING_TAIL_IDX,
+               MAX_IDX
+       };
+       u32 expected[MAX_IDX];
+       u32 *cs;
+       int err;
+       int n;
+
+       ce = intel_context_create(engine);
+       if (IS_ERR(ce))
+               return PTR_ERR(ce);
+
+       i915_gem_ww_ctx_init(&ww, false);
+retry:
+       err = i915_gem_object_lock(scratch->obj, &ww);
+       if (!err)
+               err = intel_context_pin_ww(ce, &ww);
+       if (err)
+               goto err_put;
+
+       rq = i915_request_create(ce);
+       if (IS_ERR(rq)) {
+               err = PTR_ERR(rq);
+               goto err_unpin;
+       }
+
+       cs = intel_ring_begin(rq, 4 * MAX_IDX);
+       if (IS_ERR(cs)) {
+               err = PTR_ERR(cs);
+               i915_request_add(rq);
+               goto err_unpin;
+       }
+
+       *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+       *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
+       *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
+       *cs++ = 0;
+
+       expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
+
+       *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+       *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
+       *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
+       *cs++ = 0;
+
+       err = i915_request_await_object(rq, scratch->obj, true);
+       if (!err)
+               err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
+
+       i915_request_get(rq);
+       i915_request_add(rq);
+       if (err)
+               goto err_rq;
+
+       intel_engine_flush_submission(engine);
+       expected[RING_TAIL_IDX] = ce->ring->tail;
+
+       if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+               err = -ETIME;
+               goto err_rq;
+       }
+
+       cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+       if (IS_ERR(cs)) {
+               err = PTR_ERR(cs);
+               goto err_rq;
+       }
+
+       for (n = 0; n < MAX_IDX; n++) {
+               if (cs[n] != expected[n]) {
+                       pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
+                              engine->name, n, cs[n], expected[n]);
+                       err = -EINVAL;
+                       break;
+               }
+       }
+
+       i915_gem_object_unpin_map(scratch->obj);
+
+err_rq:
+       i915_request_put(rq);
+err_unpin:
+       intel_context_unpin(ce);
+err_put:
+       if (err == -EDEADLK) {
+               err = i915_gem_ww_ctx_backoff(&ww);
+               if (!err)
+                       goto retry;
+       }
+       i915_gem_ww_ctx_fini(&ww);
+       intel_context_put(ce);
+       return err;
+}
+
+static int live_lrc_state(void *arg)
+{
+       struct intel_gt *gt = arg;
+       struct intel_engine_cs *engine;
+       struct i915_vma *scratch;
+       enum intel_engine_id id;
+       int err = 0;
+
+       /*
+        * Check the live register state matches what we expect for this
+        * intel_context.
+        */
+
+       scratch = create_scratch(gt);
+       if (IS_ERR(scratch))
+               return PTR_ERR(scratch);
+
+       for_each_engine(engine, gt, id) {
+               err = __live_lrc_state(engine, scratch);
+               if (err)
+                       break;
+       }
+
+       if (igt_flush_test(gt->i915))
+               err = -EIO;
+
+       i915_vma_unpin_and_release(&scratch, 0);
+       return err;
+}
+
+static int gpr_make_dirty(struct intel_context *ce)
+{
+       struct i915_request *rq;
+       u32 *cs;
+       int n;
+
+       rq = intel_context_create_request(ce);
+       if (IS_ERR(rq))
+               return PTR_ERR(rq);
+
+       cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
+       if (IS_ERR(cs)) {
+               i915_request_add(rq);
+               return PTR_ERR(cs);
+       }
+
+       *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
+       for (n = 0; n < NUM_GPR_DW; n++) {
+               *cs++ = CS_GPR(ce->engine, n);
+               *cs++ = STACK_MAGIC;
+       }
+       *cs++ = MI_NOOP;
+
+       intel_ring_advance(rq, cs);
+
+       rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+       i915_request_add(rq);
+
+       return 0;
+}
+
+static struct i915_request *
+__gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
+{
+       const u32 offset =
+               i915_ggtt_offset(ce->engine->status_page.vma) +
+               offset_in_page(slot);
+       struct i915_request *rq;
+       u32 *cs;
+       int err;
+       int n;
+
+       rq = intel_context_create_request(ce);
+       if (IS_ERR(rq))
+               return rq;
+
+       cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
+       if (IS_ERR(cs)) {
+               i915_request_add(rq);
+               return ERR_CAST(cs);
+       }
+
+       *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+       *cs++ = MI_NOOP;
+
+       *cs++ = MI_SEMAPHORE_WAIT |
+               MI_SEMAPHORE_GLOBAL_GTT |
+               MI_SEMAPHORE_POLL |
+               MI_SEMAPHORE_SAD_NEQ_SDD;
+       *cs++ = 0;
+       *cs++ = offset;
+       *cs++ = 0;
+
+       for (n = 0; n < NUM_GPR_DW; n++) {
+               *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+               *cs++ = CS_GPR(ce->engine, n);
+               *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
+               *cs++ = 0;
+       }
+
+       i915_vma_lock(scratch);
+       err = i915_request_await_object(rq, scratch->obj, true);
+       if (!err)
+               err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
+       i915_vma_unlock(scratch);
+
+       i915_request_get(rq);
+       i915_request_add(rq);
+       if (err) {
+               i915_request_put(rq);
+               rq = ERR_PTR(err);
+       }
+
+       return rq;
+}
+
+static int __live_lrc_gpr(struct intel_engine_cs *engine,
+                         struct i915_vma *scratch,
+                         bool preempt)
+{
+       u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
+       struct intel_context *ce;
+       struct i915_request *rq;
+       u32 *cs;
+       int err;
+       int n;
+
+       if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
+               return 0; /* GPR only on rcs0 for gen8 */
+
+       err = gpr_make_dirty(engine->kernel_context);
+       if (err)
+               return err;
+
+       ce = intel_context_create(engine);
+       if (IS_ERR(ce))
+               return PTR_ERR(ce);
+
+       rq = __gpr_read(ce, scratch, slot);
+       if (IS_ERR(rq)) {
+               err = PTR_ERR(rq);
+               goto err_put;
+       }
+
+       err = wait_for_submit(engine, rq, HZ / 2);
+       if (err)
+               goto err_rq;
+
+       if (preempt) {
+               err = gpr_make_dirty(engine->kernel_context);
+               if (err)
+                       goto err_rq;
+
+               err = emit_semaphore_signal(engine->kernel_context, slot);
+               if (err)
+                       goto err_rq;
+       } else {
+               slot[0] = 1;
+               wmb();
+       }
+
+       if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+               err = -ETIME;
+               goto err_rq;
+       }
+
+       cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+       if (IS_ERR(cs)) {
+               err = PTR_ERR(cs);
+               goto err_rq;
+       }
+
+       for (n = 0; n < NUM_GPR_DW; n++) {
+               if (cs[n]) {
+                       pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
+                              engine->name,
+                              n / 2, n & 1 ? "udw" : "ldw",
+                              cs[n]);
+                       err = -EINVAL;
+                       break;
+               }
+       }
+
+       i915_gem_object_unpin_map(scratch->obj);
+
+err_rq:
+       memset32(&slot[0], -1, 4);
+       wmb();
+       i915_request_put(rq);
+err_put:
+       intel_context_put(ce);
+       return err;
+}
+
+static int live_lrc_gpr(void *arg)
+{
+       struct intel_gt *gt = arg;
+       struct intel_engine_cs *engine;
+       struct i915_vma *scratch;
+       enum intel_engine_id id;
+       int err = 0;
+
+       /*
+        * Check that GPR registers are cleared in new contexts as we need
+        * to avoid leaking any information from previous contexts.
+        */
+
+       scratch = create_scratch(gt);
+       if (IS_ERR(scratch))
+               return PTR_ERR(scratch);
+
+       for_each_engine(engine, gt, id) {
+               st_engine_heartbeat_disable(engine);
+
+               err = __live_lrc_gpr(engine, scratch, false);
+               if (err)
+                       goto err;
+
+               err = __live_lrc_gpr(engine, scratch, true);
+               if (err)
+                       goto err;
+
+err:
+               st_engine_heartbeat_enable(engine);
+               if (igt_flush_test(gt->i915))
+                       err = -EIO;
+               if (err)
+                       break;
+       }
+
+       i915_vma_unpin_and_release(&scratch, 0);
+       return err;
+}
+
+static struct i915_request *
+create_timestamp(struct intel_context *ce, void *slot, int idx)
+{
+       const u32 offset =
+               i915_ggtt_offset(ce->engine->status_page.vma) +
+               offset_in_page(slot);
+       struct i915_request *rq;
+       u32 *cs;
+       int err;
+
+       rq = intel_context_create_request(ce);
+       if (IS_ERR(rq))
+               return rq;
+
+       cs = intel_ring_begin(rq, 10);
+       if (IS_ERR(cs)) {
+               err = PTR_ERR(cs);
+               goto err;
+       }
+
+       *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+       *cs++ = MI_NOOP;
+
+       *cs++ = MI_SEMAPHORE_WAIT |
+               MI_SEMAPHORE_GLOBAL_GTT |
+               MI_SEMAPHORE_POLL |
+               MI_SEMAPHORE_SAD_NEQ_SDD;
+       *cs++ = 0;
+       *cs++ = offset;
+       *cs++ = 0;
+
+       *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+       *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
+       *cs++ = offset + idx * sizeof(u32);
+       *cs++ = 0;
+
+       intel_ring_advance(rq, cs);
+
+       rq->sched.attr.priority = I915_PRIORITY_MASK;
+       err = 0;
+err:
+       i915_request_get(rq);
+       i915_request_add(rq);
+       if (err) {
+               i915_request_put(rq);
+               return ERR_PTR(err);
+       }
+
+       return rq;
+}
+
+struct lrc_timestamp {
+       struct intel_engine_cs *engine;
+       struct intel_context *ce[2];
+       u32 poison;
+};
+
+static bool timestamp_advanced(u32 start, u32 end)
+{
+       return (s32)(end - start) > 0;
+}
+
+static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
+{
+       u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
+       struct i915_request *rq;
+       u32 timestamp;
+       int err = 0;
+
+       arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
+       rq = create_timestamp(arg->ce[0], slot, 1);
+       if (IS_ERR(rq))
+               return PTR_ERR(rq);
+
+       err = wait_for_submit(rq->engine, rq, HZ / 2);
+       if (err)
+               goto err;
+
+       if (preempt) {
+               arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
+               err = emit_semaphore_signal(arg->ce[1], slot);
+               if (err)
+                       goto err;
+       } else {
+               slot[0] = 1;
+               wmb();
+       }
+
+       /* And wait for switch to kernel (to save our context to memory) */
+       err = context_flush(arg->ce[0], HZ / 2);
+       if (err)
+               goto err;
+
+       if (!timestamp_advanced(arg->poison, slot[1])) {
+               pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
+                      arg->engine->name, preempt ? "preempt" : "simple",
+                      arg->poison, slot[1]);
+               err = -EINVAL;
+       }
+
+       timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
+       if (!timestamp_advanced(slot[1], timestamp)) {
+               pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
+                      arg->engine->name, preempt ? "preempt" : "simple",
+                      slot[1], timestamp);
+               err = -EINVAL;
+       }
+
+err:
+       memset32(slot, -1, 4);
+       i915_request_put(rq);
+       return err;
+}
+
+static int live_lrc_timestamp(void *arg)
+{
+       struct lrc_timestamp data = {};
+       struct intel_gt *gt = arg;
+       enum intel_engine_id id;
+       const u32 poison[] = {
+               0,
+               S32_MAX,
+               (u32)S32_MAX + 1,
+               U32_MAX,
+       };
+
+       /*
+        * We want to verify that the timestamp is saved and restore across
+        * context switches and is monotonic.
+        *
+        * So we do this with a little bit of LRC poisoning to check various
+        * boundary conditions, and see what happens if we preempt the context
+        * with a second request (carrying more poison into the timestamp).
+        */
+
+       for_each_engine(data.engine, gt, id) {
+               int i, err = 0;
+
+               st_engine_heartbeat_disable(data.engine);
+
+               for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
+                       struct intel_context *tmp;
+
+                       tmp = intel_context_create(data.engine);
+                       if (IS_ERR(tmp)) {
+                               err = PTR_ERR(tmp);
+                               goto err;
+                       }
+
+                       err = intel_context_pin(tmp);
+                       if (err) {
+                               intel_context_put(tmp);
+                               goto err;
+                       }
+
+                       data.ce[i] = tmp;
+               }
+
+               for (i = 0; i < ARRAY_SIZE(poison); i++) {
+                       data.poison = poison[i];
+
+                       err = __lrc_timestamp(&data, false);
+                       if (err)
+                               break;
+
+                       err = __lrc_timestamp(&data, true);
+                       if (err)
+                               break;
+               }
+
+err:
+               st_engine_heartbeat_enable(data.engine);
+               for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
+                       if (!data.ce[i])
+                               break;
+
+                       intel_context_unpin(data.ce[i]);
+                       intel_context_put(data.ce[i]);
+               }
+
+               if (igt_flush_test(gt->i915))
+                       err = -EIO;
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+static struct i915_vma *
+create_user_vma(struct i915_address_space *vm, unsigned long size)
+{
+       struct drm_i915_gem_object *obj;
+       struct i915_vma *vma;
+       int err;
+
+       obj = i915_gem_object_create_internal(vm->i915, size);
+       if (IS_ERR(obj))
+               return ERR_CAST(obj);
+
+       vma = i915_vma_instance(obj, vm, NULL);
+       if (IS_ERR(vma)) {
+               i915_gem_object_put(obj);
+               return vma;
+       }
+
+       err = i915_vma_pin(vma, 0, 0, PIN_USER);
+       if (err) {
+               i915_gem_object_put(obj);
+               return ERR_PTR(err);
+       }
+
+       return vma;
+}
+
+static struct i915_vma *
+store_context(struct intel_context *ce, struct i915_vma *scratch)
+{
+       struct i915_vma *batch;
+       u32 dw, x, *cs, *hw;
+       u32 *defaults;
+
+       batch = create_user_vma(ce->vm, SZ_64K);
+       if (IS_ERR(batch))
+               return batch;
+
+       cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
+       if (IS_ERR(cs)) {
+               i915_vma_put(batch);
+               return ERR_CAST(cs);
+       }
+
+       defaults = shmem_pin_map(ce->engine->default_state);
+       if (!defaults) {
+               i915_gem_object_unpin_map(batch->obj);
+               i915_vma_put(batch);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       x = 0;
+       dw = 0;
+       hw = defaults;
+       hw += LRC_STATE_OFFSET / sizeof(*hw);
+       do {
+               u32 len = hw[dw] & 0x7f;
+
+               if (hw[dw] == 0) {
+                       dw++;
+                       continue;
+               }
+
+               if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+                       dw += len + 2;
+                       continue;
+               }
+
+               dw++;
+               len = (len + 1) / 2;
+               while (len--) {
+                       *cs++ = MI_STORE_REGISTER_MEM_GEN8;
+                       *cs++ = hw[dw];
+                       *cs++ = lower_32_bits(scratch->node.start + x);
+                       *cs++ = upper_32_bits(scratch->node.start + x);
+
+                       dw += 2;
+                       x += 4;
+               }
+       } while (dw < PAGE_SIZE / sizeof(u32) &&
+                (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+       *cs++ = MI_BATCH_BUFFER_END;
+
+       shmem_unpin_map(ce->engine->default_state, defaults);
+
+       i915_gem_object_flush_map(batch->obj);
+       i915_gem_object_unpin_map(batch->obj);
+
+       return batch;
+}
+
+static int move_to_active(struct i915_request *rq,
+                         struct i915_vma *vma,
+                         unsigned int flags)
+{
+       int err;
+
+       i915_vma_lock(vma);
+       err = i915_request_await_object(rq, vma->obj, flags);
+       if (!err)
+               err = i915_vma_move_to_active(vma, rq, flags);
+       i915_vma_unlock(vma);
+
+       return err;
+}
+
+static struct i915_request *
+record_registers(struct intel_context *ce,
+                struct i915_vma *before,
+                struct i915_vma *after,
+                u32 *sema)
+{
+       struct i915_vma *b_before, *b_after;
+       struct i915_request *rq;
+       u32 *cs;
+       int err;
+
+       b_before = store_context(ce, before);
+       if (IS_ERR(b_before))
+               return ERR_CAST(b_before);
+
+       b_after = store_context(ce, after);
+       if (IS_ERR(b_after)) {
+               rq = ERR_CAST(b_after);
+               goto err_before;
+       }
+
+       rq = intel_context_create_request(ce);
+       if (IS_ERR(rq))
+               goto err_after;
+
+       err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
+       if (err)
+               goto err_rq;
+
+       err = move_to_active(rq, b_before, 0);
+       if (err)
+               goto err_rq;
+
+       err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
+       if (err)
+               goto err_rq;
+
+       err = move_to_active(rq, b_after, 0);
+       if (err)
+               goto err_rq;
+
+       cs = intel_ring_begin(rq, 14);
+       if (IS_ERR(cs)) {
+               err = PTR_ERR(cs);
+               goto err_rq;
+       }
+
+       *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+       *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
+       *cs++ = lower_32_bits(b_before->node.start);
+       *cs++ = upper_32_bits(b_before->node.start);
+
+       *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+       *cs++ = MI_SEMAPHORE_WAIT |
+               MI_SEMAPHORE_GLOBAL_GTT |
+               MI_SEMAPHORE_POLL |
+               MI_SEMAPHORE_SAD_NEQ_SDD;
+       *cs++ = 0;
+       *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
+               offset_in_page(sema);
+       *cs++ = 0;
+       *cs++ = MI_NOOP;
+
+       *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+       *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
+       *cs++ = lower_32_bits(b_after->node.start);
+       *cs++ = upper_32_bits(b_after->node.start);
+
+       intel_ring_advance(rq, cs);
+
+       WRITE_ONCE(*sema, 0);
+       i915_request_get(rq);
+       i915_request_add(rq);
+err_after:
+       i915_vma_put(b_after);
+err_before:
+       i915_vma_put(b_before);
+       return rq;
+
+err_rq:
+       i915_request_add(rq);
+       rq = ERR_PTR(err);
+       goto err_after;
+}
+
+static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
+{
+       struct i915_vma *batch;
+       u32 dw, *cs, *hw;
+       u32 *defaults;
+
+       batch = create_user_vma(ce->vm, SZ_64K);
+       if (IS_ERR(batch))
+               return batch;
+
+       cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
+       if (IS_ERR(cs)) {
+               i915_vma_put(batch);
+               return ERR_CAST(cs);
+       }
+
+       defaults = shmem_pin_map(ce->engine->default_state);
+       if (!defaults) {
+               i915_gem_object_unpin_map(batch->obj);
+               i915_vma_put(batch);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       dw = 0;
+       hw = defaults;
+       hw += LRC_STATE_OFFSET / sizeof(*hw);
+       do {
+               u32 len = hw[dw] & 0x7f;
+
+               if (hw[dw] == 0) {
+                       dw++;
+                       continue;
+               }
+
+               if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+                       dw += len + 2;
+                       continue;
+               }
+
+               dw++;
+               len = (len + 1) / 2;
+               *cs++ = MI_LOAD_REGISTER_IMM(len);
+               while (len--) {
+                       *cs++ = hw[dw];
+                       *cs++ = poison;
+                       dw += 2;
+               }
+       } while (dw < PAGE_SIZE / sizeof(u32) &&
+                (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+       *cs++ = MI_BATCH_BUFFER_END;
+
+       shmem_unpin_map(ce->engine->default_state, defaults);
+
+       i915_gem_object_flush_map(batch->obj);
+       i915_gem_object_unpin_map(batch->obj);
+
+       return batch;
+}
+
+static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
+{
+       struct i915_request *rq;
+       struct i915_vma *batch;
+       u32 *cs;
+       int err;
+
+       batch = load_context(ce, poison);
+       if (IS_ERR(batch))
+               return PTR_ERR(batch);
+
+       rq = intel_context_create_request(ce);
+       if (IS_ERR(rq)) {
+               err = PTR_ERR(rq);
+               goto err_batch;
+       }
+
+       err = move_to_active(rq, batch, 0);
+       if (err)
+               goto err_rq;
+
+       cs = intel_ring_begin(rq, 8);
+       if (IS_ERR(cs)) {
+               err = PTR_ERR(cs);
+               goto err_rq;
+       }
+
+       *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+       *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
+       *cs++ = lower_32_bits(batch->node.start);
+       *cs++ = upper_32_bits(batch->node.start);
+
+       *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+       *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
+               offset_in_page(sema);
+       *cs++ = 0;
+       *cs++ = 1;
+
+       intel_ring_advance(rq, cs);
+
+       rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+err_rq:
+       i915_request_add(rq);
+err_batch:
+       i915_vma_put(batch);
+       return err;
+}
+
+static bool is_moving(u32 a, u32 b)
+{
+       return a != b;
+}
+
+static int compare_isolation(struct intel_engine_cs *engine,
+                            struct i915_vma *ref[2],
+                            struct i915_vma *result[2],
+                            struct intel_context *ce,
+                            u32 poison)
+{
+       u32 x, dw, *hw, *lrc;
+       u32 *A[2], *B[2];
+       u32 *defaults;
+       int err = 0;
+
+       A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
+       if (IS_ERR(A[0]))
+               return PTR_ERR(A[0]);
+
+       A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC);
+       if (IS_ERR(A[1])) {
+               err = PTR_ERR(A[1]);
+               goto err_A0;
+       }
+
+       B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC);
+       if (IS_ERR(B[0])) {
+               err = PTR_ERR(B[0]);
+               goto err_A1;
+       }
+
+       B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC);
+       if (IS_ERR(B[1])) {
+               err = PTR_ERR(B[1]);
+               goto err_B0;
+       }
+
+       lrc = i915_gem_object_pin_map(ce->state->obj,
+                                     i915_coherent_map_type(engine->i915));
+       if (IS_ERR(lrc)) {
+               err = PTR_ERR(lrc);
+               goto err_B1;
+       }
+       lrc += LRC_STATE_OFFSET / sizeof(*hw);
+
+       defaults = shmem_pin_map(ce->engine->default_state);
+       if (!defaults) {
+               err = -ENOMEM;
+               goto err_lrc;
+       }
+
+       x = 0;
+       dw = 0;
+       hw = defaults;
+       hw += LRC_STATE_OFFSET / sizeof(*hw);
+       do {
+               u32 len = hw[dw] & 0x7f;
+
+               if (hw[dw] == 0) {
+                       dw++;
+                       continue;
+               }
+
+               if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+                       dw += len + 2;
+                       continue;
+               }
+
+               dw++;
+               len = (len + 1) / 2;
+               while (len--) {
+                       if (!is_moving(A[0][x], A[1][x]) &&
+                           (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
+                               switch (hw[dw] & 4095) {
+                               case 0x30: /* RING_HEAD */
+                               case 0x34: /* RING_TAIL */
+                                       break;
+
+                               default:
+                                       pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
+                                              engine->name, dw,
+                                              hw[dw], hw[dw + 1],
+                                              A[0][x], B[0][x], B[1][x],
+                                              poison, lrc[dw + 1]);
+                                       err = -EINVAL;
+                               }
+                       }
+                       dw += 2;
+                       x++;
+               }
+       } while (dw < PAGE_SIZE / sizeof(u32) &&
+                (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+       shmem_unpin_map(ce->engine->default_state, defaults);
+err_lrc:
+       i915_gem_object_unpin_map(ce->state->obj);
+err_B1:
+       i915_gem_object_unpin_map(result[1]->obj);
+err_B0:
+       i915_gem_object_unpin_map(result[0]->obj);
+err_A1:
+       i915_gem_object_unpin_map(ref[1]->obj);
+err_A0:
+       i915_gem_object_unpin_map(ref[0]->obj);
+       return err;
+}
+
+static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
+{
+       u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
+       struct i915_vma *ref[2], *result[2];
+       struct intel_context *A, *B;
+       struct i915_request *rq;
+       int err;
+
+       A = intel_context_create(engine);
+       if (IS_ERR(A))
+               return PTR_ERR(A);
+
+       B = intel_context_create(engine);
+       if (IS_ERR(B)) {
+               err = PTR_ERR(B);
+               goto err_A;
+       }
+
+       ref[0] = create_user_vma(A->vm, SZ_64K);
+       if (IS_ERR(ref[0])) {
+               err = PTR_ERR(ref[0]);
+               goto err_B;
+       }
+
+       ref[1] = create_user_vma(A->vm, SZ_64K);
+       if (IS_ERR(ref[1])) {
+               err = PTR_ERR(ref[1]);
+               goto err_ref0;
+       }
+
+       rq = record_registers(A, ref[0], ref[1], sema);
+       if (IS_ERR(rq)) {
+               err = PTR_ERR(rq);
+               goto err_ref1;
+       }
+
+       WRITE_ONCE(*sema, 1);
+       wmb();
+
+       if (i915_request_wait(rq, 0, HZ / 2) < 0) {
+               i915_request_put(rq);
+               err = -ETIME;
+               goto err_ref1;
+       }
+       i915_request_put(rq);
+
+       result[0] = create_user_vma(A->vm, SZ_64K);
+       if (IS_ERR(result[0])) {
+               err = PTR_ERR(result[0]);
+               goto err_ref1;
+       }
+
+       result[1] = create_user_vma(A->vm, SZ_64K);
+       if (IS_ERR(result[1])) {
+               err = PTR_ERR(result[1]);
+               goto err_result0;
+       }
+
+       rq = record_registers(A, result[0], result[1], sema);
+       if (IS_ERR(rq)) {
+               err = PTR_ERR(rq);
+               goto err_result1;
+       }
+
+       err = poison_registers(B, poison, sema);
+       if (err) {
+               WRITE_ONCE(*sema, -1);
+               i915_request_put(rq);
+               goto err_result1;
+       }
+
+       if (i915_request_wait(rq, 0, HZ / 2) < 0) {
+               i915_request_put(rq);
+               err = -ETIME;
+               goto err_result1;
+       }
+       i915_request_put(rq);
+
+       err = compare_isolation(engine, ref, result, A, poison);
+
+err_result1:
+       i915_vma_put(result[1]);
+err_result0:
+       i915_vma_put(result[0]);
+err_ref1:
+       i915_vma_put(ref[1]);
+err_ref0:
+       i915_vma_put(ref[0]);
+err_B:
+       intel_context_put(B);
+err_A:
+       intel_context_put(A);
+       return err;
+}
+
+static bool skip_isolation(const struct intel_engine_cs *engine)
+{
+       if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9)
+               return true;
+
+       if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11)
+               return true;
+
+       return false;
+}
+
+static int live_lrc_isolation(void *arg)
+{
+       struct intel_gt *gt = arg;
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       const u32 poison[] = {
+               STACK_MAGIC,
+               0x3a3a3a3a,
+               0x5c5c5c5c,
+               0xffffffff,
+               0xffff0000,
+       };
+       int err = 0;
+
+       /*
+        * Our goal is try and verify that per-context state cannot be
+        * tampered with by another non-privileged client.
+        *
+        * We take the list of context registers from the LRI in the default
+        * context image and attempt to modify that list from a remote context.
+        */
+
+       for_each_engine(engine, gt, id) {
+               int i;
+
+               /* Just don't even ask */
+               if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
+                   skip_isolation(engine))
+                       continue;
+
+               intel_engine_pm_get(engine);
+               for (i = 0; i < ARRAY_SIZE(poison); i++) {
+                       int result;
+
+                       result = __lrc_isolation(engine, poison[i]);
+                       if (result && !err)
+                               err = result;
+
+                       result = __lrc_isolation(engine, ~poison[i]);
+                       if (result && !err)
+                               err = result;
+               }
+               intel_engine_pm_put(engine);
+               if (igt_flush_test(gt->i915)) {
+                       err = -EIO;
+                       break;
+               }
+       }
+
+       return err;
+}
+
+static int indirect_ctx_submit_req(struct intel_context *ce)
+{
+       struct i915_request *rq;
+       int err = 0;
+
+       rq = intel_context_create_request(ce);
+       if (IS_ERR(rq))
+               return PTR_ERR(rq);
+
+       i915_request_get(rq);
+       i915_request_add(rq);
+
+       if (i915_request_wait(rq, 0, HZ / 5) < 0)
+               err = -ETIME;
+
+       i915_request_put(rq);
+
+       return err;
+}
+
+#define CTX_BB_CANARY_OFFSET (3 * 1024)
+#define CTX_BB_CANARY_INDEX  (CTX_BB_CANARY_OFFSET / sizeof(u32))
+
+static u32 *
+emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
+{
+       *cs++ = MI_STORE_REGISTER_MEM_GEN8 |
+               MI_SRM_LRM_GLOBAL_GTT |
+               MI_LRI_LRM_CS_MMIO;
+       *cs++ = i915_mmio_reg_offset(RING_START(0));
+       *cs++ = i915_ggtt_offset(ce->state) +
+               context_wa_bb_offset(ce) +
+               CTX_BB_CANARY_OFFSET;
+       *cs++ = 0;
+
+       return cs;
+}
+
+static void
+indirect_ctx_bb_setup(struct intel_context *ce)
+{
+       u32 *cs = context_indirect_bb(ce);
+
+       cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
+
+       setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
+}
+
+static bool check_ring_start(struct intel_context *ce)
+{
+       const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
+               LRC_STATE_OFFSET + context_wa_bb_offset(ce);
+
+       if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
+               return true;
+
+       pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
+              ctx_bb[CTX_BB_CANARY_INDEX],
+              ce->lrc_reg_state[CTX_RING_START]);
+
+       return false;
+}
+
+static int indirect_ctx_bb_check(struct intel_context *ce)
+{
+       int err;
+
+       err = indirect_ctx_submit_req(ce);
+       if (err)
+               return err;
+
+       if (!check_ring_start(ce))
+               return -EINVAL;
+
+       return 0;
+}
+
+static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
+{
+       struct intel_context *a, *b;
+       int err;
+
+       a = intel_context_create(engine);
+       if (IS_ERR(a))
+               return PTR_ERR(a);
+       err = intel_context_pin(a);
+       if (err)
+               goto put_a;
+
+       b = intel_context_create(engine);
+       if (IS_ERR(b)) {
+               err = PTR_ERR(b);
+               goto unpin_a;
+       }
+       err = intel_context_pin(b);
+       if (err)
+               goto put_b;
+
+       /* We use the already reserved extra page in context state */
+       if (!a->wa_bb_page) {
+               GEM_BUG_ON(b->wa_bb_page);
+               GEM_BUG_ON(INTEL_GEN(engine->i915) == 12);
+               goto unpin_b;
+       }
+
+       /*
+        * In order to test that our per context bb is truly per context,
+        * and executes at the intended spot on context restoring process,
+        * make the batch store the ring start value to memory.
+        * As ring start is restored apriori of starting the indirect ctx bb and
+        * as it will be different for each context, it fits to this purpose.
+        */
+       indirect_ctx_bb_setup(a);
+       indirect_ctx_bb_setup(b);
+
+       err = indirect_ctx_bb_check(a);
+       if (err)
+               goto unpin_b;
+
+       err = indirect_ctx_bb_check(b);
+
+unpin_b:
+       intel_context_unpin(b);
+put_b:
+       intel_context_put(b);
+unpin_a:
+       intel_context_unpin(a);
+put_a:
+       intel_context_put(a);
+
+       return err;
+}
+
+static int live_lrc_indirect_ctx_bb(void *arg)
+{
+       struct intel_gt *gt = arg;
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       int err = 0;
+
+       for_each_engine(engine, gt, id) {
+               intel_engine_pm_get(engine);
+               err = __live_lrc_indirect_ctx_bb(engine);
+               intel_engine_pm_put(engine);
+
+               if (igt_flush_test(gt->i915))
+                       err = -EIO;
+
+               if (err)
+                       break;
+       }
+
+       return err;
+}
+
+static void garbage_reset(struct intel_engine_cs *engine,
+                         struct i915_request *rq)
+{
+       const unsigned int bit = I915_RESET_ENGINE + engine->id;
+       unsigned long *lock = &engine->gt->reset.flags;
+
+       if (test_and_set_bit(bit, lock))
+               return;
+
+       tasklet_disable(&engine->execlists.tasklet);
+
+       if (!rq->fence.error)
+               intel_engine_reset(engine, NULL);
+
+       tasklet_enable(&engine->execlists.tasklet);
+       clear_and_wake_up_bit(bit, lock);
+}
+
+static struct i915_request *garbage(struct intel_context *ce,
+                                   struct rnd_state *prng)
+{
+       struct i915_request *rq;
+       int err;
+
+       err = intel_context_pin(ce);
+       if (err)
+               return ERR_PTR(err);
+
+       prandom_bytes_state(prng,
+                           ce->lrc_reg_state,
+                           ce->engine->context_size -
+                           LRC_STATE_OFFSET);
+
+       rq = intel_context_create_request(ce);
+       if (IS_ERR(rq)) {
+               err = PTR_ERR(rq);
+               goto err_unpin;
+       }
+
+       i915_request_get(rq);
+       i915_request_add(rq);
+       return rq;
+
+err_unpin:
+       intel_context_unpin(ce);
+       return ERR_PTR(err);
+}
+
+static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
+{
+       struct intel_context *ce;
+       struct i915_request *hang;
+       int err = 0;
+
+       ce = intel_context_create(engine);
+       if (IS_ERR(ce))
+               return PTR_ERR(ce);
+
+       hang = garbage(ce, prng);
+       if (IS_ERR(hang)) {
+               err = PTR_ERR(hang);
+               goto err_ce;
+       }
+
+       if (wait_for_submit(engine, hang, HZ / 2)) {
+               i915_request_put(hang);
+               err = -ETIME;
+               goto err_ce;
+       }
+
+       intel_context_set_banned(ce);
+       garbage_reset(engine, hang);
+
+       intel_engine_flush_submission(engine);
+       if (!hang->fence.error) {
+               i915_request_put(hang);
+               pr_err("%s: corrupted context was not reset\n",
+                      engine->name);
+               err = -EINVAL;
+               goto err_ce;
+       }
+
+       if (i915_request_wait(hang, 0, HZ / 2) < 0) {
+               pr_err("%s: corrupted context did not recover\n",
+                      engine->name);
+               i915_request_put(hang);
+               err = -EIO;
+               goto err_ce;
+       }
+       i915_request_put(hang);
+
+err_ce:
+       intel_context_put(ce);
+       return err;
+}
+
+static int live_lrc_garbage(void *arg)
+{
+       struct intel_gt *gt = arg;
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+
+       /*
+        * Verify that we can recover if one context state is completely
+        * corrupted.
+        */
+
+       if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
+               return 0;
+
+       for_each_engine(engine, gt, id) {
+               I915_RND_STATE(prng);
+               int err = 0, i;
+
+               if (!intel_has_reset_engine(engine->gt))
+                       continue;
+
+               intel_engine_pm_get(engine);
+               for (i = 0; i < 3; i++) {
+                       err = __lrc_garbage(engine, &prng);
+                       if (err)
+                               break;
+               }
+               intel_engine_pm_put(engine);
+
+               if (igt_flush_test(gt->i915))
+                       err = -EIO;
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
+{
+       struct intel_context *ce;
+       struct i915_request *rq;
+       IGT_TIMEOUT(end_time);
+       int err;
+
+       ce = intel_context_create(engine);
+       if (IS_ERR(ce))
+               return PTR_ERR(ce);
+
+       ce->runtime.num_underflow = 0;
+       ce->runtime.max_underflow = 0;
+
+       do {
+               unsigned int loop = 1024;
+
+               while (loop) {
+                       rq = intel_context_create_request(ce);
+                       if (IS_ERR(rq)) {
+                               err = PTR_ERR(rq);
+                               goto err_rq;
+                       }
+
+                       if (--loop == 0)
+                               i915_request_get(rq);
+
+                       i915_request_add(rq);
+               }
+
+               if (__igt_timeout(end_time, NULL))
+                       break;
+
+               i915_request_put(rq);
+       } while (1);
+
+       err = i915_request_wait(rq, 0, HZ / 5);
+       if (err < 0) {
+               pr_err("%s: request not completed!\n", engine->name);
+               goto err_wait;
+       }
+
+       igt_flush_test(engine->i915);
+
+       pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
+               engine->name,
+               intel_context_get_total_runtime_ns(ce),
+               intel_context_get_avg_runtime_ns(ce));
+
+       err = 0;
+       if (ce->runtime.num_underflow) {
+               pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
+                      engine->name,
+                      ce->runtime.num_underflow,
+                      ce->runtime.max_underflow);
+               GEM_TRACE_DUMP();
+               err = -EOVERFLOW;
+       }
+
+err_wait:
+       i915_request_put(rq);
+err_rq:
+       intel_context_put(ce);
+       return err;
+}
+
+static int live_pphwsp_runtime(void *arg)
+{
+       struct intel_gt *gt = arg;
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       int err = 0;
+
+       /*
+        * Check that cumulative context runtime as stored in the pphwsp[16]
+        * is monotonic.
+        */
+
+       for_each_engine(engine, gt, id) {
+               err = __live_pphwsp_runtime(engine);
+               if (err)
+                       break;
+       }
+
+       if (igt_flush_test(gt->i915))
+               err = -EIO;
+
+       return err;
+}
+
+int intel_lrc_live_selftests(struct drm_i915_private *i915)
+{
+       static const struct i915_subtest tests[] = {
+               SUBTEST(live_lrc_layout),
+               SUBTEST(live_lrc_fixed),
+               SUBTEST(live_lrc_state),
+               SUBTEST(live_lrc_gpr),
+               SUBTEST(live_lrc_isolation),
+               SUBTEST(live_lrc_timestamp),
+               SUBTEST(live_lrc_garbage),
+               SUBTEST(live_pphwsp_runtime),
+               SUBTEST(live_lrc_indirect_ctx_bb),
+       };
+
+       if (!HAS_LOGICAL_RING_CONTEXTS(i915))
+               return 0;
+
+       return intel_gt_live_subtests(tests, &i915->gt);
+}
index 1a2e4f6..1752671 100644 (file)
@@ -3,8 +3,8 @@
  * Copyright © 2014-2019 Intel Corporation
  */
 
-#include "gt/intel_execlists_submission.h" /* lrc layout */
 #include "gt/intel_gt.h"
+#include "gt/intel_lrc.h"
 #include "intel_guc_ads.h"
 #include "intel_uc.h"
 #include "i915_drv.h"
index 8528ab5..694ee42 100644 (file)
@@ -11,7 +11,7 @@
 #include "gt/intel_execlists_submission.h" /* XXX */
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
-#include "gt/intel_lrc_reg.h"
+#include "gt/intel_lrc.h"
 #include "gt/intel_ring.h"
 
 #include "intel_guc_submission.h"
@@ -402,6 +402,28 @@ cancel_port_requests(struct intel_engine_execlists * const execlists)
                memset(execlists->inflight, 0, sizeof(execlists->inflight));
 }
 
+static void guc_reset_state(struct intel_context *ce,
+                           struct intel_engine_cs *engine,
+                           u32 head,
+                           bool scrub)
+{
+       GEM_BUG_ON(!intel_context_is_pinned(ce));
+
+       /*
+        * We want a simple context + ring to execute the breadcrumb update.
+        * We cannot rely on the context being intact across the GPU hang,
+        * so clear it and rebuild just what we need for the breadcrumb.
+        * All pending requests for this context will be zapped, and any
+        * future request will be after userspace has had the opportunity
+        * to recreate its own state.
+        */
+       if (scrub)
+               lrc_init_regs(ce, engine, true);
+
+       /* Rerun the request; its payload has been neutered (if guilty). */
+       lrc_update_regs(ce, engine, head);
+}
+
 static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled)
 {
        struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -421,7 +443,7 @@ static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled)
                stalled = false;
 
        __i915_request_reset(rq, stalled);
-       intel_lr_context_reset(engine, rq->context, rq->head, stalled);
+       guc_reset_state(rq->context, engine, rq->head, stalled);
 
 out_unlock:
        spin_unlock_irqrestore(&engine->active.lock, flags);
index ed30fdd..6af5c06 100644 (file)
@@ -38,6 +38,7 @@
 #include "gem/i915_gem_pm.h"
 #include "gt/intel_context.h"
 #include "gt/intel_execlists_submission.h"
+#include "gt/intel_lrc.h"
 #include "gt/intel_ring.h"
 
 #include "i915_drv.h"
index ff44346..d691ce1 100644 (file)
 #include "gt/intel_execlists_submission.h"
 #include "gt/intel_gpu_commands.h"
 #include "gt/intel_gt.h"
-#include "gt/intel_lrc_reg.h"
+#include "gt/intel_lrc.h"
 #include "gt/intel_ring.h"
 
 #include "i915_drv.h"