+vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ /* WaDisableEarlyCull:vlv */
+ wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
+
+ /* WaPsdDispatchEnable:vlv */
+ /* WaDisablePSDDualDispatchEnable:vlv */
+ wa_masked_en(wal,
+ GEN7_HALF_SLICE_CHICKEN1,
+ GEN7_MAX_PS_THREAD_DEP |
+ GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
+
+ /* WaDisable_RenderCache_OperationalFlush:vlv */
+ wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
+
+ /* WaForceL3Serialization:vlv */
+ wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
+
+ /*
+ * WaVSThreadDispatchOverride:ivb,vlv
+ *
+ * This actually overrides the dispatch
+ * mode for all thread types.
+ */
+ wa_write_masked_or(wal,
+ GEN7_FF_THREAD_MODE,
+ GEN7_FF_SCHED_MASK,
+ GEN7_FF_TS_SCHED_HW |
+ GEN7_FF_VS_SCHED_HW |
+ GEN7_FF_DS_SCHED_HW);
+
+ /*
+ * BSpec says this must be set, even though
+ * WaDisable4x2SubspanOptimization isn't listed for VLV.
+ */
+ wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
+
+ /*
+ * BSpec recommends 8x4 when MSAA is used,
+ * however in practice 16x4 seems fastest.
+ *
+ * Note that PS/WM thread counts depend on the WIZ hashing
+ * disable bit, which we don't touch here, but it's good
+ * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+ */
+ wa_add(wal, GEN7_GT_MODE, 0,
+ _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
+ GEN6_WIZ_HASHING_16x4);
+
+ /*
+ * WaIncreaseL3CreditsForVLVB0:vlv
+ * This is the hardware default actually.
+ */
+ wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
+}
+
+static void