drm/i915/gt: Rearrange hsw workarounds
authorChris Wilson <chris@chris-wilson.co.uk>
Mon, 4 Jan 2021 11:49:14 +0000 (11:49 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Mon, 4 Jan 2021 12:36:31 +0000 (12:36 +0000)
Some rcs0 workarounds were being incorrectly applied to the GT, and so
we failed to restore the expected register settings after a reset.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210104114914.30165-2-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/gt/intel_workarounds.c

index 741ed6e..c21a972 100644 (file)
@@ -956,31 +956,6 @@ hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 
        /* WaVSRefCountFullforceMissDisable:hsw */
        wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
-
-       wa_masked_dis(wal,
-                     CACHE_MODE_0_GEN7,
-                     /* WaDisable_RenderCache_OperationalFlush:hsw */
-                     RC_OP_FLUSH_ENABLE |
-                     /* enable HiZ Raw Stall Optimization */
-                     HIZ_RAW_STALL_OPT_DISABLE);
-
-       /* WaDisable4x2SubspanOptimization:hsw */
-       wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
-
-       /*
-        * BSpec recommends 8x4 when MSAA is used,
-        * however in practice 16x4 seems fastest.
-        *
-        * Note that PS/WM thread counts depend on the WIZ hashing
-        * disable bit, which we don't touch here, but it's good
-        * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
-        */
-       wa_add(wal, GEN7_GT_MODE, 0,
-              _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
-              GEN6_WIZ_HASHING_16x4);
-
-       /* WaSampleCChickenBitEnable:hsw */
-       wa_masked_en(wal, HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
 }
 
 static void
@@ -1948,6 +1923,35 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
                            GEN8_LQSC_FLUSH_COHERENT_LINES);
        }
 
+       if (IS_HASWELL(i915)) {
+               /* WaSampleCChickenBitEnable:hsw */
+               wa_masked_en(wal,
+                            HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
+
+               wa_masked_dis(wal,
+                             CACHE_MODE_0_GEN7,
+                             /* WaDisable_RenderCache_OperationalFlush:hsw */
+                             RC_OP_FLUSH_ENABLE |
+                             /* enable HiZ Raw Stall Optimization */
+                             HIZ_RAW_STALL_OPT_DISABLE);
+
+               /* WaDisable4x2SubspanOptimization:hsw */
+               wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
+
+               /*
+                * BSpec recommends 8x4 when MSAA is used,
+                * however in practice 16x4 seems fastest.
+                *
+                * Note that PS/WM thread counts depend on the WIZ hashing
+                * disable bit, which we don't touch here, but it's good
+                * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+                */
+               wa_add(wal, GEN7_GT_MODE, 0,
+                      _MASKED_FIELD(GEN6_WIZ_HASHING_MASK,
+                                    GEN6_WIZ_HASHING_16x4),
+                      GEN6_WIZ_HASHING_16x4);
+       }
+
        if (IS_GEN(i915, 7))
                /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
                wa_masked_en(wal,