drm/i915/gt: Clear LRC image inline
authorChris Wilson <chris@chris-wilson.co.uk>
Thu, 2 Jan 2020 13:17:04 +0000 (13:17 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Fri, 3 Jan 2020 11:26:01 +0000 (11:26 +0000)
When creating the initial LRC image, we also want to clear the MI_NOOPs
and register values. Rather than use a blanket memset beforehand, apply
the clears inline, close the context image and force inhibition of the
uninitialised reminder.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200102131707.1463945-2-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/gt/intel_lrc.c
drivers/gpu/drm/i915/gt/selftest_lrc.c

index 029496d2dfb5f34c0f6be490aa2cfbb81e49a70e..b36fd108f0c6ddcd593d6f93b39016eb419089f4 100644 (file)
@@ -488,9 +488,15 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
        return desc;
 }
 
-static u32 *set_offsets(u32 *regs,
+static inline unsigned int dword_in_page(void *addr)
+{
+       return offset_in_page(addr) / sizeof(u32);
+}
+
+static void set_offsets(u32 *regs,
                        const u8 *data,
-                       const struct intel_engine_cs *engine)
+                       const struct intel_engine_cs *engine,
+                       bool clear)
 #define NOP(x) (BIT(7) | (x))
 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
 #define POSTED BIT(0)
@@ -498,7 +504,7 @@ static u32 *set_offsets(u32 *regs,
 #define REG16(x) \
        (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
        (((x) >> 2) & 0x7f)
-#define END() 0
+#define END(x) 0, (x)
 {
        const u32 base = engine->mmio_base;
 
@@ -506,7 +512,10 @@ static u32 *set_offsets(u32 *regs,
                u8 count, flags;
 
                if (*data & BIT(7)) { /* skip */
-                       regs += *data++ & ~BIT(7);
+                       count = *data++ & ~BIT(7);
+                       if (clear)
+                               memset32(regs, MI_NOOP, count);
+                       regs += count;
                        continue;
                }
 
@@ -532,12 +541,25 @@ static u32 *set_offsets(u32 *regs,
                                offset |= v & ~BIT(7);
                        } while (v & BIT(7));
 
-                       *regs = base + (offset << 2);
+                       regs[0] = base + (offset << 2);
+                       if (clear)
+                               regs[1] = 0;
                        regs += 2;
                } while (--count);
        }
 
-       return regs;
+       if (clear) {
+               u8 count = *++data;
+
+               /* Clear past the tail for HW access */
+               GEM_BUG_ON(dword_in_page(regs) > count);
+               memset32(regs, MI_NOOP, count - dword_in_page(regs));
+
+               /* Close the batch; used mainly by live_lrc_layout() */
+               *regs = MI_BATCH_BUFFER_END;
+               if (INTEL_GEN(engine->i915) >= 10)
+                       *regs |= BIT(0);
+       }
 }
 
 static const u8 gen8_xcs_offsets[] = {
@@ -572,7 +594,7 @@ static const u8 gen8_xcs_offsets[] = {
        REG16(0x200),
        REG(0x028),
 
-       END(),
+       END(80)
 };
 
 static const u8 gen9_xcs_offsets[] = {
@@ -656,7 +678,7 @@ static const u8 gen9_xcs_offsets[] = {
        REG16(0x67c),
        REG(0x068),
 
-       END(),
+       END(176)
 };
 
 static const u8 gen12_xcs_offsets[] = {
@@ -688,7 +710,7 @@ static const u8 gen12_xcs_offsets[] = {
        REG16(0x274),
        REG16(0x270),
 
-       END(),
+       END(80)
 };
 
 static const u8 gen8_rcs_offsets[] = {
@@ -725,7 +747,7 @@ static const u8 gen8_rcs_offsets[] = {
        LRI(1, 0),
        REG(0x0c8),
 
-       END(),
+       END(80)
 };
 
 static const u8 gen9_rcs_offsets[] = {
@@ -809,7 +831,7 @@ static const u8 gen9_rcs_offsets[] = {
        REG16(0x67c),
        REG(0x68),
 
-       END()
+       END(176)
 };
 
 static const u8 gen11_rcs_offsets[] = {
@@ -850,7 +872,7 @@ static const u8 gen11_rcs_offsets[] = {
        LRI(1, 0),
        REG(0x0c8),
 
-       END(),
+       END(80)
 };
 
 static const u8 gen12_rcs_offsets[] = {
@@ -891,7 +913,7 @@ static const u8 gen12_rcs_offsets[] = {
        LRI(1, 0),
        REG(0x0c8),
 
-       END(),
+       END(80)
 };
 
 #undef END
@@ -1529,7 +1551,7 @@ static bool can_merge_rq(const struct i915_request *prev,
 static void virtual_update_register_offsets(u32 *regs,
                                            struct intel_engine_cs *engine)
 {
-       set_offsets(regs, reg_offsets(engine), engine);
+       set_offsets(regs, reg_offsets(engine), engine, false);
 }
 
 static bool virtual_matches(const struct virtual_engine *ve,
@@ -4043,15 +4065,19 @@ static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
 
 static void init_common_reg_state(u32 * const regs,
                                  const struct intel_engine_cs *engine,
-                                 const struct intel_ring *ring)
+                                 const struct intel_ring *ring,
+                                 bool inhibit)
 {
-       regs[CTX_CONTEXT_CONTROL] =
-               _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
-               _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
+       u32 ctl;
+
+       ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
+       ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+       if (inhibit)
+               ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
        if (INTEL_GEN(engine->i915) < 11)
-               regs[CTX_CONTEXT_CONTROL] |=
-                       _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
-                                           CTX_CTRL_RS_CTX_ENABLE);
+               ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
+                                          CTX_CTRL_RS_CTX_ENABLE);
+       regs[CTX_CONTEXT_CONTROL] = ctl;
 
        regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
 }
@@ -4109,7 +4135,7 @@ static void execlists_init_reg_state(u32 *regs,
                                     const struct intel_context *ce,
                                     const struct intel_engine_cs *engine,
                                     const struct intel_ring *ring,
-                                    bool close)
+                                    bool inhibit)
 {
        /*
         * A context is actually a big batch buffer with several
@@ -4121,15 +4147,9 @@ static void execlists_init_reg_state(u32 *regs,
         *
         * Must keep consistent with virtual_update_register_offsets().
         */
-       u32 *bbe = set_offsets(regs, reg_offsets(engine), engine);
+       set_offsets(regs, reg_offsets(engine), engine, inhibit);
 
-       if (close) { /* Close the batch; used mainly by live_lrc_layout() */
-               *bbe = MI_BATCH_BUFFER_END;
-               if (INTEL_GEN(engine->i915) >= 10)
-                       *bbe |= BIT(0);
-       }
-
-       init_common_reg_state(regs, engine, ring);
+       init_common_reg_state(regs, engine, ring, inhibit);
        init_ppgtt_reg_state(regs, vm_alias(ce->vm));
 
        init_wa_bb_reg_state(regs, engine,
@@ -4148,7 +4168,6 @@ populate_lr_context(struct intel_context *ce,
 {
        bool inhibit = true;
        void *vaddr;
-       u32 *regs;
        int ret;
 
        vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
@@ -4178,11 +4197,8 @@ populate_lr_context(struct intel_context *ce,
 
        /* The second page of the context object contains some fields which must
         * be set up prior to the first execution. */
-       regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
-       execlists_init_reg_state(regs, ce, engine, ring, inhibit);
-       if (inhibit)
-               regs[CTX_CONTEXT_CONTROL] |=
-                       _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+       execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
+                                ce, engine, ring, inhibit);
 
        ret = 0;
 err_unpin_ctx:
index 943b623f00e9a5d577c638ce6dd775588677a56b..627613d85db88f084d6f12bbe831a34bca9bf3b3 100644 (file)
@@ -3362,7 +3362,7 @@ static int live_lrc_layout(void *arg)
        struct intel_gt *gt = arg;
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
-       u32 *mem;
+       u32 *lrc;
        int err;
 
        /*
@@ -3370,13 +3370,13 @@ static int live_lrc_layout(void *arg)
         * match the layout saved by HW.
         */
 
-       mem = kmalloc(PAGE_SIZE, GFP_KERNEL);
-       if (!mem)
+       lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!lrc)
                return -ENOMEM;
 
        err = 0;
        for_each_engine(engine, gt, id) {
-               u32 *hw, *lrc;
+               u32 *hw;
                int dw;
 
                if (!engine->default_state)
@@ -3390,8 +3390,7 @@ static int live_lrc_layout(void *arg)
                }
                hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
 
-               lrc = memset(mem, 0, PAGE_SIZE);
-               execlists_init_reg_state(lrc,
+               execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
                                         engine->kernel_context,
                                         engine,
                                         engine->kernel_context->ring,
@@ -3461,7 +3460,7 @@ static int live_lrc_layout(void *arg)
                        break;
        }
 
-       kfree(mem);
+       kfree(lrc);
        return err;
 }