Merge drm/drm-next into drm-intel-next
[platform/kernel/linux-starfive.git] / drivers / gpu / drm / i915 / gt / intel_workarounds.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2018 Intel Corporation
5  */
6
7 #include "i915_drv.h"
8 #include "intel_context.h"
9 #include "intel_engine_pm.h"
10 #include "intel_gpu_commands.h"
11 #include "intel_gt.h"
12 #include "intel_ring.h"
13 #include "intel_workarounds.h"
14
15 /**
16  * DOC: Hardware workarounds
17  *
18  * This file is intended as a central place to implement most [1]_ of the
19  * required workarounds for hardware to work as originally intended. They fall
20  * in five basic categories depending on how/when they are applied:
21  *
22  * - Workarounds that touch registers that are saved/restored to/from the HW
23  *   context image. The list is emitted (via Load Register Immediate commands)
24  *   everytime a new context is created.
25  * - GT workarounds. The list of these WAs is applied whenever these registers
26  *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
27  * - Display workarounds. The list is applied during display clock-gating
28  *   initialization.
29  * - Workarounds that whitelist a privileged register, so that UMDs can manage
30  *   them directly. This is just a special case of a MMMIO workaround (as we
31  *   write the list of these to/be-whitelisted registers to some special HW
32  *   registers).
33  * - Workaround batchbuffers, that get executed automatically by the hardware
34  *   on every HW context restore.
35  *
36  * .. [1] Please notice that there are other WAs that, due to their nature,
37  *    cannot be applied from a central place. Those are peppered around the rest
38  *    of the code, as needed.
39  *
40  * .. [2] Technically, some registers are powercontext saved & restored, so they
41  *    survive a suspend/resume. In practice, writing them again is not too
42  *    costly and simplifies things. We can revisit this in the future.
43  *
44  * Layout
45  * ~~~~~~
46  *
47  * Keep things in this file ordered by WA type, as per the above (context, GT,
48  * display, register whitelist, batchbuffer). Then, inside each type, keep the
49  * following order:
50  *
51  * - Infrastructure functions and macros
52  * - WAs per platform in standard gen/chrono order
53  * - Public functions to init or apply the given workaround type.
54  */
55
56 /*
57  * KBL revision ID ordering is bizarre; higher revision ID's map to lower
58  * steppings in some cases.  So rather than test against the revision ID
59  * directly, let's map that into our own range of increasing ID's that we
60  * can test against in a regular manner.
61  */
62
63 const struct i915_rev_steppings kbl_revids[] = {
64         [0] = { .gt_stepping = KBL_REVID_A0, .disp_stepping = KBL_REVID_A0 },
65         [1] = { .gt_stepping = KBL_REVID_B0, .disp_stepping = KBL_REVID_B0 },
66         [2] = { .gt_stepping = KBL_REVID_C0, .disp_stepping = KBL_REVID_B0 },
67         [3] = { .gt_stepping = KBL_REVID_D0, .disp_stepping = KBL_REVID_B0 },
68         [4] = { .gt_stepping = KBL_REVID_F0, .disp_stepping = KBL_REVID_C0 },
69         [5] = { .gt_stepping = KBL_REVID_C0, .disp_stepping = KBL_REVID_B1 },
70         [6] = { .gt_stepping = KBL_REVID_D1, .disp_stepping = KBL_REVID_B1 },
71         [7] = { .gt_stepping = KBL_REVID_G0, .disp_stepping = KBL_REVID_C0 },
72 };
73
74 const struct i915_rev_steppings tgl_uy_revid_step_tbl[] = {
75         [0] = { .gt_stepping = STEP_A0, .disp_stepping = STEP_A0 },
76         [1] = { .gt_stepping = STEP_B0, .disp_stepping = STEP_C0 },
77         [2] = { .gt_stepping = STEP_B1, .disp_stepping = STEP_C0 },
78         [3] = { .gt_stepping = STEP_C0, .disp_stepping = STEP_D0 },
79 };
80
81 /* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */
82 const struct i915_rev_steppings tgl_revid_step_tbl[] = {
83         [0] = { .gt_stepping = STEP_A0, .disp_stepping = STEP_B0 },
84         [1] = { .gt_stepping = STEP_B0, .disp_stepping = STEP_D0 },
85 };
86
87 const struct i915_rev_steppings adls_revid_step_tbl[] = {
88         [0x0] = { .gt_stepping = STEP_A0, .disp_stepping = STEP_A0 },
89         [0x1] = { .gt_stepping = STEP_A0, .disp_stepping = STEP_A2 },
90         [0x4] = { .gt_stepping = STEP_B0, .disp_stepping = STEP_B0 },
91         [0x8] = { .gt_stepping = STEP_C0, .disp_stepping = STEP_B0 },
92         [0xC] = { .gt_stepping = STEP_D0, .disp_stepping = STEP_C0 },
93 };
94
95 static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name)
96 {
97         wal->name = name;
98         wal->engine_name = engine_name;
99 }
100
101 #define WA_LIST_CHUNK (1 << 4)
102
103 static void wa_init_finish(struct i915_wa_list *wal)
104 {
105         /* Trim unused entries. */
106         if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
107                 struct i915_wa *list = kmemdup(wal->list,
108                                                wal->count * sizeof(*list),
109                                                GFP_KERNEL);
110
111                 if (list) {
112                         kfree(wal->list);
113                         wal->list = list;
114                 }
115         }
116
117         if (!wal->count)
118                 return;
119
120         DRM_DEBUG_DRIVER("Initialized %u %s workarounds on %s\n",
121                          wal->wa_count, wal->name, wal->engine_name);
122 }
123
124 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
125 {
126         unsigned int addr = i915_mmio_reg_offset(wa->reg);
127         unsigned int start = 0, end = wal->count;
128         const unsigned int grow = WA_LIST_CHUNK;
129         struct i915_wa *wa_;
130
131         GEM_BUG_ON(!is_power_of_2(grow));
132
133         if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
134                 struct i915_wa *list;
135
136                 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
137                                      GFP_KERNEL);
138                 if (!list) {
139                         DRM_ERROR("No space for workaround init!\n");
140                         return;
141                 }
142
143                 if (wal->list) {
144                         memcpy(list, wal->list, sizeof(*wa) * wal->count);
145                         kfree(wal->list);
146                 }
147
148                 wal->list = list;
149         }
150
151         while (start < end) {
152                 unsigned int mid = start + (end - start) / 2;
153
154                 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
155                         start = mid + 1;
156                 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
157                         end = mid;
158                 } else {
159                         wa_ = &wal->list[mid];
160
161                         if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) {
162                                 DRM_ERROR("Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
163                                           i915_mmio_reg_offset(wa_->reg),
164                                           wa_->clr, wa_->set);
165
166                                 wa_->set &= ~wa->clr;
167                         }
168
169                         wal->wa_count++;
170                         wa_->set |= wa->set;
171                         wa_->clr |= wa->clr;
172                         wa_->read |= wa->read;
173                         return;
174                 }
175         }
176
177         wal->wa_count++;
178         wa_ = &wal->list[wal->count++];
179         *wa_ = *wa;
180
181         while (wa_-- > wal->list) {
182                 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
183                            i915_mmio_reg_offset(wa_[1].reg));
184                 if (i915_mmio_reg_offset(wa_[1].reg) >
185                     i915_mmio_reg_offset(wa_[0].reg))
186                         break;
187
188                 swap(wa_[1], wa_[0]);
189         }
190 }
191
192 static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
193                    u32 clear, u32 set, u32 read_mask)
194 {
195         struct i915_wa wa = {
196                 .reg  = reg,
197                 .clr  = clear,
198                 .set  = set,
199                 .read = read_mask,
200         };
201
202         _wa_add(wal, &wa);
203 }
204
205 static void
206 wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
207 {
208         wa_add(wal, reg, clear, set, clear);
209 }
210
211 static void
212 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
213 {
214         wa_write_clr_set(wal, reg, ~0, set);
215 }
216
217 static void
218 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
219 {
220         wa_write_clr_set(wal, reg, set, set);
221 }
222
223 static void
224 wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
225 {
226         wa_write_clr_set(wal, reg, clr, 0);
227 }
228
229 /*
230  * WA operations on "masked register". A masked register has the upper 16 bits
231  * documented as "masked" in b-spec. Its purpose is to allow writing to just a
232  * portion of the register without a rmw: you simply write in the upper 16 bits
233  * the mask of bits you are going to modify.
234  *
235  * The wa_masked_* family of functions already does the necessary operations to
236  * calculate the mask based on the parameters passed, so user only has to
237  * provide the lower 16 bits of that register.
238  */
239
240 static void
241 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
242 {
243         wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val);
244 }
245
246 static void
247 wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
248 {
249         wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val);
250 }
251
252 static void
253 wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
254                     u32 mask, u32 val)
255 {
256         wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask);
257 }
258
259 static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
260                                       struct i915_wa_list *wal)
261 {
262         wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
263 }
264
265 static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
266                                       struct i915_wa_list *wal)
267 {
268         wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
269 }
270
271 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
272                                       struct i915_wa_list *wal)
273 {
274         wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
275
276         /* WaDisableAsyncFlipPerfMode:bdw,chv */
277         wa_masked_en(wal, MI_MODE, ASYNC_FLIP_PERF_DISABLE);
278
279         /* WaDisablePartialInstShootdown:bdw,chv */
280         wa_masked_en(wal, GEN8_ROW_CHICKEN,
281                      PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
282
283         /* Use Force Non-Coherent whenever executing a 3D context. This is a
284          * workaround for for a possible hang in the unlikely event a TLB
285          * invalidation occurs during a PSD flush.
286          */
287         /* WaForceEnableNonCoherent:bdw,chv */
288         /* WaHdcDisableFetchWhenMasked:bdw,chv */
289         wa_masked_en(wal, HDC_CHICKEN0,
290                      HDC_DONOT_FETCH_MEM_WHEN_MASKED |
291                      HDC_FORCE_NON_COHERENT);
292
293         /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
294          * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
295          *  polygons in the same 8x4 pixel/sample area to be processed without
296          *  stalling waiting for the earlier ones to write to Hierarchical Z
297          *  buffer."
298          *
299          * This optimization is off by default for BDW and CHV; turn it on.
300          */
301         wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
302
303         /* Wa4x4STCOptimizationDisable:bdw,chv */
304         wa_masked_en(wal, CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
305
306         /*
307          * BSpec recommends 8x4 when MSAA is used,
308          * however in practice 16x4 seems fastest.
309          *
310          * Note that PS/WM thread counts depend on the WIZ hashing
311          * disable bit, which we don't touch here, but it's good
312          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
313          */
314         wa_masked_field_set(wal, GEN7_GT_MODE,
315                             GEN6_WIZ_HASHING_MASK,
316                             GEN6_WIZ_HASHING_16x4);
317 }
318
319 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
320                                      struct i915_wa_list *wal)
321 {
322         struct drm_i915_private *i915 = engine->i915;
323
324         gen8_ctx_workarounds_init(engine, wal);
325
326         /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
327         wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
328
329         /* WaDisableDopClockGating:bdw
330          *
331          * Also see the related UCGTCL1 write in bdw_init_clock_gating()
332          * to disable EUTC clock gating.
333          */
334         wa_masked_en(wal, GEN7_ROW_CHICKEN2,
335                      DOP_CLOCK_GATING_DISABLE);
336
337         wa_masked_en(wal, HALF_SLICE_CHICKEN3,
338                      GEN8_SAMPLER_POWER_BYPASS_DIS);
339
340         wa_masked_en(wal, HDC_CHICKEN0,
341                      /* WaForceContextSaveRestoreNonCoherent:bdw */
342                      HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
343                      /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
344                      (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
345 }
346
347 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
348                                      struct i915_wa_list *wal)
349 {
350         gen8_ctx_workarounds_init(engine, wal);
351
352         /* WaDisableThreadStallDopClockGating:chv */
353         wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
354
355         /* Improve HiZ throughput on CHV. */
356         wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
357 }
358
359 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
360                                       struct i915_wa_list *wal)
361 {
362         struct drm_i915_private *i915 = engine->i915;
363
364         if (HAS_LLC(i915)) {
365                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
366                  *
367                  * Must match Display Engine. See
368                  * WaCompressedResourceDisplayNewHashMode.
369                  */
370                 wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
371                              GEN9_PBE_COMPRESSED_HASH_SELECTION);
372                 wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
373                              GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
374         }
375
376         /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
377         /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
378         wa_masked_en(wal, GEN8_ROW_CHICKEN,
379                      FLOW_CONTROL_ENABLE |
380                      PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
381
382         /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
383         /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
384         wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
385                      GEN9_ENABLE_YV12_BUGFIX |
386                      GEN9_ENABLE_GPGPU_PREEMPTION);
387
388         /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
389         /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
390         wa_masked_en(wal, CACHE_MODE_1,
391                      GEN8_4x4_STC_OPTIMIZATION_DISABLE |
392                      GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
393
394         /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
395         wa_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
396                       GEN9_CCS_TLB_PREFETCH_ENABLE);
397
398         /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
399         wa_masked_en(wal, HDC_CHICKEN0,
400                      HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
401                      HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
402
403         /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
404          * both tied to WaForceContextSaveRestoreNonCoherent
405          * in some hsds for skl. We keep the tie for all gen9. The
406          * documentation is a bit hazy and so we want to get common behaviour,
407          * even though there is no clear evidence we would need both on kbl/bxt.
408          * This area has been source of system hangs so we play it safe
409          * and mimic the skl regardless of what bspec says.
410          *
411          * Use Force Non-Coherent whenever executing a 3D context. This
412          * is a workaround for a possible hang in the unlikely event
413          * a TLB invalidation occurs during a PSD flush.
414          */
415
416         /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
417         wa_masked_en(wal, HDC_CHICKEN0,
418                      HDC_FORCE_NON_COHERENT);
419
420         /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
421         if (IS_SKYLAKE(i915) ||
422             IS_KABYLAKE(i915) ||
423             IS_COFFEELAKE(i915) ||
424             IS_COMETLAKE(i915))
425                 wa_masked_en(wal, HALF_SLICE_CHICKEN3,
426                              GEN8_SAMPLER_POWER_BYPASS_DIS);
427
428         /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
429         wa_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
430
431         /*
432          * Supporting preemption with fine-granularity requires changes in the
433          * batch buffer programming. Since we can't break old userspace, we
434          * need to set our default preemption level to safe value. Userspace is
435          * still able to use more fine-grained preemption levels, since in
436          * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
437          * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
438          * not real HW workarounds, but merely a way to start using preemption
439          * while maintaining old contract with userspace.
440          */
441
442         /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
443         wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
444
445         /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
446         wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
447                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
448                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
449
450         /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
451         if (IS_GEN9_LP(i915))
452                 wa_masked_en(wal, GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
453 }
454
455 static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
456                                 struct i915_wa_list *wal)
457 {
458         struct intel_gt *gt = engine->gt;
459         u8 vals[3] = { 0, 0, 0 };
460         unsigned int i;
461
462         for (i = 0; i < 3; i++) {
463                 u8 ss;
464
465                 /*
466                  * Only consider slices where one, and only one, subslice has 7
467                  * EUs
468                  */
469                 if (!is_power_of_2(gt->info.sseu.subslice_7eu[i]))
470                         continue;
471
472                 /*
473                  * subslice_7eu[i] != 0 (because of the check above) and
474                  * ss_max == 4 (maximum number of subslices possible per slice)
475                  *
476                  * ->    0 <= ss <= 3;
477                  */
478                 ss = ffs(gt->info.sseu.subslice_7eu[i]) - 1;
479                 vals[i] = 3 - ss;
480         }
481
482         if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
483                 return;
484
485         /* Tune IZ hashing. See intel_device_info_runtime_init() */
486         wa_masked_field_set(wal, GEN7_GT_MODE,
487                             GEN9_IZ_HASHING_MASK(2) |
488                             GEN9_IZ_HASHING_MASK(1) |
489                             GEN9_IZ_HASHING_MASK(0),
490                             GEN9_IZ_HASHING(2, vals[2]) |
491                             GEN9_IZ_HASHING(1, vals[1]) |
492                             GEN9_IZ_HASHING(0, vals[0]));
493 }
494
495 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
496                                      struct i915_wa_list *wal)
497 {
498         gen9_ctx_workarounds_init(engine, wal);
499         skl_tune_iz_hashing(engine, wal);
500 }
501
502 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
503                                      struct i915_wa_list *wal)
504 {
505         gen9_ctx_workarounds_init(engine, wal);
506
507         /* WaDisableThreadStallDopClockGating:bxt */
508         wa_masked_en(wal, GEN8_ROW_CHICKEN,
509                      STALL_DOP_GATING_DISABLE);
510
511         /* WaToEnableHwFixForPushConstHWBug:bxt */
512         wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
513                      GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
514 }
515
516 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
517                                      struct i915_wa_list *wal)
518 {
519         struct drm_i915_private *i915 = engine->i915;
520
521         gen9_ctx_workarounds_init(engine, wal);
522
523         /* WaToEnableHwFixForPushConstHWBug:kbl */
524         if (IS_KBL_GT_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
525                 wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
526                              GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
527
528         /* WaDisableSbeCacheDispatchPortSharing:kbl */
529         wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
530                      GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
531 }
532
533 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
534                                      struct i915_wa_list *wal)
535 {
536         gen9_ctx_workarounds_init(engine, wal);
537
538         /* WaToEnableHwFixForPushConstHWBug:glk */
539         wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
540                      GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
541 }
542
543 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
544                                      struct i915_wa_list *wal)
545 {
546         gen9_ctx_workarounds_init(engine, wal);
547
548         /* WaToEnableHwFixForPushConstHWBug:cfl */
549         wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
550                      GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
551
552         /* WaDisableSbeCacheDispatchPortSharing:cfl */
553         wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
554                      GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
555 }
556
557 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
558                                      struct i915_wa_list *wal)
559 {
560         /* WaForceContextSaveRestoreNonCoherent:cnl */
561         wa_masked_en(wal, CNL_HDC_CHICKEN0,
562                      HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
563
564         /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
565         wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
566                      GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
567
568         /* WaPushConstantDereferenceHoldDisable:cnl */
569         wa_masked_en(wal, GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
570
571         /* FtrEnableFastAnisoL1BankingFix:cnl */
572         wa_masked_en(wal, HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
573
574         /* WaDisable3DMidCmdPreemption:cnl */
575         wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
576
577         /* WaDisableGPGPUMidCmdPreemption:cnl */
578         wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
579                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
580                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
581
582         /* WaDisableEarlyEOT:cnl */
583         wa_masked_en(wal, GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
584 }
585
586 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
587                                      struct i915_wa_list *wal)
588 {
589         struct drm_i915_private *i915 = engine->i915;
590
591         /* WaDisableBankHangMode:icl */
592         wa_write(wal,
593                  GEN8_L3CNTLREG,
594                  intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
595                  GEN8_ERRDETBCTRL);
596
597         /* Wa_1604370585:icl (pre-prod)
598          * Formerly known as WaPushConstantDereferenceHoldDisable
599          */
600         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
601                 wa_masked_en(wal, GEN7_ROW_CHICKEN2,
602                              PUSH_CONSTANT_DEREF_DISABLE);
603
604         /* WaForceEnableNonCoherent:icl
605          * This is not the same workaround as in early Gen9 platforms, where
606          * lacking this could cause system hangs, but coherency performance
607          * overhead is high and only a few compute workloads really need it
608          * (the register is whitelisted in hardware now, so UMDs can opt in
609          * for coherency if they have a good reason).
610          */
611         wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
612
613         /* Wa_2006611047:icl (pre-prod)
614          * Formerly known as WaDisableImprovedTdlClkGating
615          */
616         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
617                 wa_masked_en(wal, GEN7_ROW_CHICKEN2,
618                              GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
619
620         /* Wa_2006665173:icl (pre-prod) */
621         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
622                 wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
623                              GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
624
625         /* WaEnableFloatBlendOptimization:icl */
626         wa_write_clr_set(wal,
627                          GEN10_CACHE_MODE_SS,
628                          0, /* write-only, so skip validation */
629                          _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
630
631         /* WaDisableGPGPUMidThreadPreemption:icl */
632         wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
633                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
634                             GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
635
636         /* allow headerless messages for preemptible GPGPU context */
637         wa_masked_en(wal, GEN10_SAMPLER_MODE,
638                      GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
639
640         /* Wa_1604278689:icl,ehl */
641         wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
642         wa_write_clr_set(wal, IVB_FBC_RT_BASE_UPPER,
643                          0, /* write-only register; skip validation */
644                          0xFFFFFFFF);
645
646         /* Wa_1406306137:icl,ehl */
647         wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
648 }
649
650 static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
651                                        struct i915_wa_list *wal)
652 {
653         /*
654          * Wa_1409142259:tgl
655          * Wa_1409347922:tgl
656          * Wa_1409252684:tgl
657          * Wa_1409217633:tgl
658          * Wa_1409207793:tgl
659          * Wa_1409178076:tgl
660          * Wa_1408979724:tgl
661          * Wa_14010443199:rkl
662          * Wa_14010698770:rkl
663          */
664         wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
665                      GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
666
667         /* WaDisableGPGPUMidThreadPreemption:gen12 */
668         wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
669                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
670                             GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
671 }
672
673 static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
674                                      struct i915_wa_list *wal)
675 {
676         gen12_ctx_workarounds_init(engine, wal);
677
678         /*
679          * Wa_1604555607:tgl,rkl
680          *
681          * Note that the implementation of this workaround is further modified
682          * according to the FF_MODE2 guidance given by Wa_1608008084:gen12.
683          * FF_MODE2 register will return the wrong value when read. The default
684          * value for this register is zero for all fields and there are no bit
685          * masks. So instead of doing a RMW we should just write the GS Timer
686          * and TDS timer values for Wa_1604555607 and Wa_16011163337.
687          */
688         wa_add(wal,
689                FF_MODE2,
690                FF_MODE2_GS_TIMER_MASK | FF_MODE2_TDS_TIMER_MASK,
691                FF_MODE2_GS_TIMER_224  | FF_MODE2_TDS_TIMER_128,
692                0);
693 }
694
695 static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
696                                      struct i915_wa_list *wal)
697 {
698         gen12_ctx_workarounds_init(engine, wal);
699
700         /* Wa_1409044764 */
701         wa_masked_dis(wal, GEN11_COMMON_SLICE_CHICKEN3,
702                       DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN);
703
704         /* Wa_22010493298 */
705         wa_masked_en(wal, HIZ_CHICKEN,
706                      DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
707
708         /*
709          * Wa_16011163337
710          *
711          * Like in tgl_ctx_workarounds_init(), read verification is ignored due
712          * to Wa_1608008084.
713          */
714         wa_add(wal,
715                FF_MODE2,
716                FF_MODE2_GS_TIMER_MASK, FF_MODE2_GS_TIMER_224, 0);
717 }
718
719 static void
720 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
721                            struct i915_wa_list *wal,
722                            const char *name)
723 {
724         struct drm_i915_private *i915 = engine->i915;
725
726         if (engine->class != RENDER_CLASS)
727                 return;
728
729         wa_init_start(wal, name, engine->name);
730
731         if (IS_DG1(i915))
732                 dg1_ctx_workarounds_init(engine, wal);
733         else if (IS_ALDERLAKE_S(i915) || IS_ROCKETLAKE(i915) ||
734                  IS_TIGERLAKE(i915))
735                 tgl_ctx_workarounds_init(engine, wal);
736         else if (IS_GEN(i915, 12))
737                 gen12_ctx_workarounds_init(engine, wal);
738         else if (IS_GEN(i915, 11))
739                 icl_ctx_workarounds_init(engine, wal);
740         else if (IS_CANNONLAKE(i915))
741                 cnl_ctx_workarounds_init(engine, wal);
742         else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
743                 cfl_ctx_workarounds_init(engine, wal);
744         else if (IS_GEMINILAKE(i915))
745                 glk_ctx_workarounds_init(engine, wal);
746         else if (IS_KABYLAKE(i915))
747                 kbl_ctx_workarounds_init(engine, wal);
748         else if (IS_BROXTON(i915))
749                 bxt_ctx_workarounds_init(engine, wal);
750         else if (IS_SKYLAKE(i915))
751                 skl_ctx_workarounds_init(engine, wal);
752         else if (IS_CHERRYVIEW(i915))
753                 chv_ctx_workarounds_init(engine, wal);
754         else if (IS_BROADWELL(i915))
755                 bdw_ctx_workarounds_init(engine, wal);
756         else if (IS_GEN(i915, 7))
757                 gen7_ctx_workarounds_init(engine, wal);
758         else if (IS_GEN(i915, 6))
759                 gen6_ctx_workarounds_init(engine, wal);
760         else if (INTEL_GEN(i915) < 8)
761                 return;
762         else
763                 MISSING_CASE(INTEL_GEN(i915));
764
765         wa_init_finish(wal);
766 }
767
768 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
769 {
770         __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
771 }
772
773 int intel_engine_emit_ctx_wa(struct i915_request *rq)
774 {
775         struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
776         struct i915_wa *wa;
777         unsigned int i;
778         u32 *cs;
779         int ret;
780
781         if (wal->count == 0)
782                 return 0;
783
784         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
785         if (ret)
786                 return ret;
787
788         cs = intel_ring_begin(rq, (wal->count * 2 + 2));
789         if (IS_ERR(cs))
790                 return PTR_ERR(cs);
791
792         *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
793         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
794                 *cs++ = i915_mmio_reg_offset(wa->reg);
795                 *cs++ = wa->set;
796         }
797         *cs++ = MI_NOOP;
798
799         intel_ring_advance(rq, cs);
800
801         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
802         if (ret)
803                 return ret;
804
805         return 0;
806 }
807
808 static void
809 gen4_gt_workarounds_init(struct drm_i915_private *i915,
810                          struct i915_wa_list *wal)
811 {
812         /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
813         wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
814 }
815
816 static void
817 g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
818 {
819         gen4_gt_workarounds_init(i915, wal);
820
821         /* WaDisableRenderCachePipelinedFlush:g4x,ilk */
822         wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE);
823 }
824
825 static void
826 ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
827 {
828         g4x_gt_workarounds_init(i915, wal);
829
830         wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED);
831 }
832
833 static void
834 snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
835 {
836 }
837
838 static void
839 ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
840 {
841         /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
842         wa_masked_dis(wal,
843                       GEN7_COMMON_SLICE_CHICKEN1,
844                       GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
845
846         /* WaApplyL3ControlAndL3ChickenMode:ivb */
847         wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
848         wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
849
850         /* WaForceL3Serialization:ivb */
851         wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
852 }
853
854 static void
855 vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
856 {
857         /* WaForceL3Serialization:vlv */
858         wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
859
860         /*
861          * WaIncreaseL3CreditsForVLVB0:vlv
862          * This is the hardware default actually.
863          */
864         wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
865 }
866
867 static void
868 hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
869 {
870         /* L3 caching of data atomics doesn't work -- disable it. */
871         wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
872
873         wa_add(wal,
874                HSW_ROW_CHICKEN3, 0,
875                _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
876                 0 /* XXX does this reg exist? */);
877
878         /* WaVSRefCountFullforceMissDisable:hsw */
879         wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
880 }
881
882 static void
883 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
884 {
885         /* WaDisableKillLogic:bxt,skl,kbl */
886         if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
887                 wa_write_or(wal,
888                             GAM_ECOCHK,
889                             ECOCHK_DIS_TLB);
890
891         if (HAS_LLC(i915)) {
892                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
893                  *
894                  * Must match Display Engine. See
895                  * WaCompressedResourceDisplayNewHashMode.
896                  */
897                 wa_write_or(wal,
898                             MMCD_MISC_CTRL,
899                             MMCD_PCLA | MMCD_HOTSPOT_EN);
900         }
901
902         /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
903         wa_write_or(wal,
904                     GAM_ECOCHK,
905                     BDW_DISABLE_HDC_INVALIDATION);
906 }
907
908 static void
909 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
910 {
911         gen9_gt_workarounds_init(i915, wal);
912
913         /* WaDisableGafsUnitClkGating:skl */
914         wa_write_or(wal,
915                     GEN7_UCGCTL4,
916                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
917
918         /* WaInPlaceDecompressionHang:skl */
919         if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
920                 wa_write_or(wal,
921                             GEN9_GAMT_ECO_REG_RW_IA,
922                             GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
923 }
924
925 static void
926 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
927 {
928         gen9_gt_workarounds_init(i915, wal);
929
930         /* WaInPlaceDecompressionHang:bxt */
931         wa_write_or(wal,
932                     GEN9_GAMT_ECO_REG_RW_IA,
933                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
934 }
935
936 static void
937 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
938 {
939         gen9_gt_workarounds_init(i915, wal);
940
941         /* WaDisableDynamicCreditSharing:kbl */
942         if (IS_KBL_GT_REVID(i915, 0, KBL_REVID_B0))
943                 wa_write_or(wal,
944                             GAMT_CHKN_BIT_REG,
945                             GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
946
947         /* WaDisableGafsUnitClkGating:kbl */
948         wa_write_or(wal,
949                     GEN7_UCGCTL4,
950                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
951
952         /* WaInPlaceDecompressionHang:kbl */
953         wa_write_or(wal,
954                     GEN9_GAMT_ECO_REG_RW_IA,
955                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
956 }
957
958 static void
959 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
960 {
961         gen9_gt_workarounds_init(i915, wal);
962 }
963
964 static void
965 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
966 {
967         gen9_gt_workarounds_init(i915, wal);
968
969         /* WaDisableGafsUnitClkGating:cfl */
970         wa_write_or(wal,
971                     GEN7_UCGCTL4,
972                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
973
974         /* WaInPlaceDecompressionHang:cfl */
975         wa_write_or(wal,
976                     GEN9_GAMT_ECO_REG_RW_IA,
977                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
978 }
979
980 static void
981 wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
982 {
983         const struct sseu_dev_info *sseu = &i915->gt.info.sseu;
984         unsigned int slice, subslice;
985         u32 l3_en, mcr, mcr_mask;
986
987         GEM_BUG_ON(INTEL_GEN(i915) < 10);
988
989         /*
990          * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
991          * L3Banks could be fused off in single slice scenario. If that is
992          * the case, we might need to program MCR select to a valid L3Bank
993          * by default, to make sure we correctly read certain registers
994          * later on (in the range 0xB100 - 0xB3FF).
995          *
996          * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
997          * Before any MMIO read into slice/subslice specific registers, MCR
998          * packet control register needs to be programmed to point to any
999          * enabled s/ss pair. Otherwise, incorrect values will be returned.
1000          * This means each subsequent MMIO read will be forwarded to an
1001          * specific s/ss combination, but this is OK since these registers
1002          * are consistent across s/ss in almost all cases. In the rare
1003          * occasions, such as INSTDONE, where this value is dependent
1004          * on s/ss combo, the read should be done with read_subslice_reg.
1005          *
1006          * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both
1007          * to which subslice, or to which L3 bank, the respective mmio reads
1008          * will go, we have to find a common index which works for both
1009          * accesses.
1010          *
1011          * Case where we cannot find a common index fortunately should not
1012          * happen in production hardware, so we only emit a warning instead of
1013          * implementing something more complex that requires checking the range
1014          * of every MMIO read.
1015          */
1016
1017         if (INTEL_GEN(i915) >= 10 && is_power_of_2(sseu->slice_mask)) {
1018                 u32 l3_fuse =
1019                         intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) &
1020                         GEN10_L3BANK_MASK;
1021
1022                 drm_dbg(&i915->drm, "L3 fuse = %x\n", l3_fuse);
1023                 l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse);
1024         } else {
1025                 l3_en = ~0;
1026         }
1027
1028         slice = fls(sseu->slice_mask) - 1;
1029         subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
1030         if (!subslice) {
1031                 drm_warn(&i915->drm,
1032                          "No common index found between subslice mask %x and L3 bank mask %x!\n",
1033                          intel_sseu_get_subslices(sseu, slice), l3_en);
1034                 subslice = fls(l3_en);
1035                 drm_WARN_ON(&i915->drm, !subslice);
1036         }
1037         subslice--;
1038
1039         if (INTEL_GEN(i915) >= 11) {
1040                 mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
1041                 mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
1042         } else {
1043                 mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
1044                 mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
1045         }
1046
1047         drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr);
1048
1049         wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
1050 }
1051
1052 static void
1053 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1054 {
1055         wa_init_mcr(i915, wal);
1056
1057         /* WaInPlaceDecompressionHang:cnl */
1058         wa_write_or(wal,
1059                     GEN9_GAMT_ECO_REG_RW_IA,
1060                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1061 }
1062
1063 static void
1064 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1065 {
1066         wa_init_mcr(i915, wal);
1067
1068         /* WaInPlaceDecompressionHang:icl */
1069         wa_write_or(wal,
1070                     GEN9_GAMT_ECO_REG_RW_IA,
1071                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1072
1073         /* WaModifyGamTlbPartitioning:icl */
1074         wa_write_clr_set(wal,
1075                          GEN11_GACB_PERF_CTRL,
1076                          GEN11_HASH_CTRL_MASK,
1077                          GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
1078
1079         /* Wa_1405766107:icl
1080          * Formerly known as WaCL2SFHalfMaxAlloc
1081          */
1082         wa_write_or(wal,
1083                     GEN11_LSN_UNSLCVC,
1084                     GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
1085                     GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
1086
1087         /* Wa_220166154:icl
1088          * Formerly known as WaDisCtxReload
1089          */
1090         wa_write_or(wal,
1091                     GEN8_GAMW_ECO_DEV_RW_IA,
1092                     GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
1093
1094         /* Wa_1405779004:icl (pre-prod) */
1095         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
1096                 wa_write_or(wal,
1097                             SLICE_UNIT_LEVEL_CLKGATE,
1098                             MSCUNIT_CLKGATE_DIS);
1099
1100         /* Wa_1406838659:icl (pre-prod) */
1101         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1102                 wa_write_or(wal,
1103                             INF_UNIT_LEVEL_CLKGATE,
1104                             CGPSF_CLKGATE_DIS);
1105
1106         /* Wa_1406463099:icl
1107          * Formerly known as WaGamTlbPendError
1108          */
1109         wa_write_or(wal,
1110                     GAMT_CHKN_BIT_REG,
1111                     GAMT_CHKN_DISABLE_L3_COH_PIPE);
1112
1113         /* Wa_1607087056:icl,ehl,jsl */
1114         if (IS_ICELAKE(i915) ||
1115                 IS_JSL_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) {
1116                 wa_write_or(wal,
1117                             SLICE_UNIT_LEVEL_CLKGATE,
1118                             L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1119         }
1120 }
1121
1122 static void
1123 gen12_gt_workarounds_init(struct drm_i915_private *i915,
1124                           struct i915_wa_list *wal)
1125 {
1126         wa_init_mcr(i915, wal);
1127 }
1128
1129 static void
1130 tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1131 {
1132         gen12_gt_workarounds_init(i915, wal);
1133
1134         /* Wa_1409420604:tgl */
1135         if (IS_TGL_UY_GT_STEPPING(i915, STEP_A0, STEP_A0))
1136                 wa_write_or(wal,
1137                             SUBSLICE_UNIT_LEVEL_CLKGATE2,
1138                             CPSSUNIT_CLKGATE_DIS);
1139
1140         /* Wa_1607087056:tgl also know as BUG:1409180338 */
1141         if (IS_TGL_UY_GT_STEPPING(i915, STEP_A0, STEP_A0))
1142                 wa_write_or(wal,
1143                             SLICE_UNIT_LEVEL_CLKGATE,
1144                             L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1145
1146         /* Wa_1408615072:tgl[a0] */
1147         if (IS_TGL_UY_GT_STEPPING(i915, STEP_A0, STEP_A0))
1148                 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1149                             VSUNIT_CLKGATE_DIS_TGL);
1150 }
1151
1152 static void
1153 dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1154 {
1155         gen12_gt_workarounds_init(i915, wal);
1156
1157         /* Wa_1607087056:dg1 */
1158         if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0))
1159                 wa_write_or(wal,
1160                             SLICE_UNIT_LEVEL_CLKGATE,
1161                             L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1162
1163         /* Wa_1409420604:dg1 */
1164         if (IS_DG1(i915))
1165                 wa_write_or(wal,
1166                             SUBSLICE_UNIT_LEVEL_CLKGATE2,
1167                             CPSSUNIT_CLKGATE_DIS);
1168
1169         /* Wa_1408615072:dg1 */
1170         /* Empirical testing shows this register is unaffected by engine reset. */
1171         if (IS_DG1(i915))
1172                 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1173                             VSUNIT_CLKGATE_DIS_TGL);
1174 }
1175
1176 static void
1177 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
1178 {
1179         if (IS_DG1(i915))
1180                 dg1_gt_workarounds_init(i915, wal);
1181         else if (IS_TIGERLAKE(i915))
1182                 tgl_gt_workarounds_init(i915, wal);
1183         else if (IS_GEN(i915, 12))
1184                 gen12_gt_workarounds_init(i915, wal);
1185         else if (IS_GEN(i915, 11))
1186                 icl_gt_workarounds_init(i915, wal);
1187         else if (IS_CANNONLAKE(i915))
1188                 cnl_gt_workarounds_init(i915, wal);
1189         else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
1190                 cfl_gt_workarounds_init(i915, wal);
1191         else if (IS_GEMINILAKE(i915))
1192                 glk_gt_workarounds_init(i915, wal);
1193         else if (IS_KABYLAKE(i915))
1194                 kbl_gt_workarounds_init(i915, wal);
1195         else if (IS_BROXTON(i915))
1196                 bxt_gt_workarounds_init(i915, wal);
1197         else if (IS_SKYLAKE(i915))
1198                 skl_gt_workarounds_init(i915, wal);
1199         else if (IS_HASWELL(i915))
1200                 hsw_gt_workarounds_init(i915, wal);
1201         else if (IS_VALLEYVIEW(i915))
1202                 vlv_gt_workarounds_init(i915, wal);
1203         else if (IS_IVYBRIDGE(i915))
1204                 ivb_gt_workarounds_init(i915, wal);
1205         else if (IS_GEN(i915, 6))
1206                 snb_gt_workarounds_init(i915, wal);
1207         else if (IS_GEN(i915, 5))
1208                 ilk_gt_workarounds_init(i915, wal);
1209         else if (IS_G4X(i915))
1210                 g4x_gt_workarounds_init(i915, wal);
1211         else if (IS_GEN(i915, 4))
1212                 gen4_gt_workarounds_init(i915, wal);
1213         else if (INTEL_GEN(i915) <= 8)
1214                 return;
1215         else
1216                 MISSING_CASE(INTEL_GEN(i915));
1217 }
1218
1219 void intel_gt_init_workarounds(struct drm_i915_private *i915)
1220 {
1221         struct i915_wa_list *wal = &i915->gt_wa_list;
1222
1223         wa_init_start(wal, "GT", "global");
1224         gt_init_workarounds(i915, wal);
1225         wa_init_finish(wal);
1226 }
1227
1228 static enum forcewake_domains
1229 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
1230 {
1231         enum forcewake_domains fw = 0;
1232         struct i915_wa *wa;
1233         unsigned int i;
1234
1235         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1236                 fw |= intel_uncore_forcewake_for_reg(uncore,
1237                                                      wa->reg,
1238                                                      FW_REG_READ |
1239                                                      FW_REG_WRITE);
1240
1241         return fw;
1242 }
1243
1244 static bool
1245 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
1246 {
1247         if ((cur ^ wa->set) & wa->read) {
1248                 DRM_ERROR("%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n",
1249                           name, from, i915_mmio_reg_offset(wa->reg),
1250                           cur, cur & wa->read, wa->set & wa->read);
1251
1252                 return false;
1253         }
1254
1255         return true;
1256 }
1257
1258 static void
1259 wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
1260 {
1261         enum forcewake_domains fw;
1262         unsigned long flags;
1263         struct i915_wa *wa;
1264         unsigned int i;
1265
1266         if (!wal->count)
1267                 return;
1268
1269         fw = wal_get_fw_for_rmw(uncore, wal);
1270
1271         spin_lock_irqsave(&uncore->lock, flags);
1272         intel_uncore_forcewake_get__locked(uncore, fw);
1273
1274         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1275                 if (wa->clr)
1276                         intel_uncore_rmw_fw(uncore, wa->reg, wa->clr, wa->set);
1277                 else
1278                         intel_uncore_write_fw(uncore, wa->reg, wa->set);
1279                 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1280                         wa_verify(wa,
1281                                   intel_uncore_read_fw(uncore, wa->reg),
1282                                   wal->name, "application");
1283         }
1284
1285         intel_uncore_forcewake_put__locked(uncore, fw);
1286         spin_unlock_irqrestore(&uncore->lock, flags);
1287 }
1288
1289 void intel_gt_apply_workarounds(struct intel_gt *gt)
1290 {
1291         wa_list_apply(gt->uncore, &gt->i915->gt_wa_list);
1292 }
1293
1294 static bool wa_list_verify(struct intel_uncore *uncore,
1295                            const struct i915_wa_list *wal,
1296                            const char *from)
1297 {
1298         struct i915_wa *wa;
1299         unsigned int i;
1300         bool ok = true;
1301
1302         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1303                 ok &= wa_verify(wa,
1304                                 intel_uncore_read(uncore, wa->reg),
1305                                 wal->name, from);
1306
1307         return ok;
1308 }
1309
1310 bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
1311 {
1312         return wa_list_verify(gt->uncore, &gt->i915->gt_wa_list, from);
1313 }
1314
1315 __maybe_unused
1316 static bool is_nonpriv_flags_valid(u32 flags)
1317 {
1318         /* Check only valid flag bits are set */
1319         if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID)
1320                 return false;
1321
1322         /* NB: Only 3 out of 4 enum values are valid for access field */
1323         if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
1324             RING_FORCE_TO_NONPRIV_ACCESS_INVALID)
1325                 return false;
1326
1327         return true;
1328 }
1329
1330 static void
1331 whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1332 {
1333         struct i915_wa wa = {
1334                 .reg = reg
1335         };
1336
1337         if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1338                 return;
1339
1340         if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
1341                 return;
1342
1343         wa.reg.reg |= flags;
1344         _wa_add(wal, &wa);
1345 }
1346
1347 static void
1348 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1349 {
1350         whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
1351 }
1352
1353 static void gen9_whitelist_build(struct i915_wa_list *w)
1354 {
1355         /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1356         whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1357
1358         /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1359         whitelist_reg(w, GEN8_CS_CHICKEN1);
1360
1361         /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1362         whitelist_reg(w, GEN8_HDC_CHICKEN1);
1363
1364         /* WaSendPushConstantsFromMMIO:skl,bxt */
1365         whitelist_reg(w, COMMON_SLICE_CHICKEN2);
1366 }
1367
1368 static void skl_whitelist_build(struct intel_engine_cs *engine)
1369 {
1370         struct i915_wa_list *w = &engine->whitelist;
1371
1372         if (engine->class != RENDER_CLASS)
1373                 return;
1374
1375         gen9_whitelist_build(w);
1376
1377         /* WaDisableLSQCROPERFforOCL:skl */
1378         whitelist_reg(w, GEN8_L3SQCREG4);
1379 }
1380
1381 static void bxt_whitelist_build(struct intel_engine_cs *engine)
1382 {
1383         if (engine->class != RENDER_CLASS)
1384                 return;
1385
1386         gen9_whitelist_build(&engine->whitelist);
1387 }
1388
1389 static void kbl_whitelist_build(struct intel_engine_cs *engine)
1390 {
1391         struct i915_wa_list *w = &engine->whitelist;
1392
1393         if (engine->class != RENDER_CLASS)
1394                 return;
1395
1396         gen9_whitelist_build(w);
1397
1398         /* WaDisableLSQCROPERFforOCL:kbl */
1399         whitelist_reg(w, GEN8_L3SQCREG4);
1400 }
1401
1402 static void glk_whitelist_build(struct intel_engine_cs *engine)
1403 {
1404         struct i915_wa_list *w = &engine->whitelist;
1405
1406         if (engine->class != RENDER_CLASS)
1407                 return;
1408
1409         gen9_whitelist_build(w);
1410
1411         /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1412         whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1413 }
1414
1415 static void cfl_whitelist_build(struct intel_engine_cs *engine)
1416 {
1417         struct i915_wa_list *w = &engine->whitelist;
1418
1419         if (engine->class != RENDER_CLASS)
1420                 return;
1421
1422         gen9_whitelist_build(w);
1423
1424         /*
1425          * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
1426          *
1427          * This covers 4 register which are next to one another :
1428          *   - PS_INVOCATION_COUNT
1429          *   - PS_INVOCATION_COUNT_UDW
1430          *   - PS_DEPTH_COUNT
1431          *   - PS_DEPTH_COUNT_UDW
1432          */
1433         whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1434                           RING_FORCE_TO_NONPRIV_ACCESS_RD |
1435                           RING_FORCE_TO_NONPRIV_RANGE_4);
1436 }
1437
1438 static void cml_whitelist_build(struct intel_engine_cs *engine)
1439 {
1440         struct i915_wa_list *w = &engine->whitelist;
1441
1442         if (engine->class != RENDER_CLASS)
1443                 whitelist_reg_ext(w,
1444                                   RING_CTX_TIMESTAMP(engine->mmio_base),
1445                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
1446
1447         cfl_whitelist_build(engine);
1448 }
1449
1450 static void cnl_whitelist_build(struct intel_engine_cs *engine)
1451 {
1452         struct i915_wa_list *w = &engine->whitelist;
1453
1454         if (engine->class != RENDER_CLASS)
1455                 return;
1456
1457         /* WaEnablePreemptionGranularityControlByUMD:cnl */
1458         whitelist_reg(w, GEN8_CS_CHICKEN1);
1459 }
1460
1461 static void icl_whitelist_build(struct intel_engine_cs *engine)
1462 {
1463         struct i915_wa_list *w = &engine->whitelist;
1464
1465         switch (engine->class) {
1466         case RENDER_CLASS:
1467                 /* WaAllowUMDToModifyHalfSliceChicken7:icl */
1468                 whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1469
1470                 /* WaAllowUMDToModifySamplerMode:icl */
1471                 whitelist_reg(w, GEN10_SAMPLER_MODE);
1472
1473                 /* WaEnableStateCacheRedirectToCS:icl */
1474                 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1475
1476                 /*
1477                  * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
1478                  *
1479                  * This covers 4 register which are next to one another :
1480                  *   - PS_INVOCATION_COUNT
1481                  *   - PS_INVOCATION_COUNT_UDW
1482                  *   - PS_DEPTH_COUNT
1483                  *   - PS_DEPTH_COUNT_UDW
1484                  */
1485                 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1486                                   RING_FORCE_TO_NONPRIV_ACCESS_RD |
1487                                   RING_FORCE_TO_NONPRIV_RANGE_4);
1488                 break;
1489
1490         case VIDEO_DECODE_CLASS:
1491                 /* hucStatusRegOffset */
1492                 whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
1493                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
1494                 /* hucUKernelHdrInfoRegOffset */
1495                 whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
1496                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
1497                 /* hucStatus2RegOffset */
1498                 whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
1499                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
1500                 whitelist_reg_ext(w,
1501                                   RING_CTX_TIMESTAMP(engine->mmio_base),
1502                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
1503                 break;
1504
1505         default:
1506                 whitelist_reg_ext(w,
1507                                   RING_CTX_TIMESTAMP(engine->mmio_base),
1508                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
1509                 break;
1510         }
1511 }
1512
1513 static void tgl_whitelist_build(struct intel_engine_cs *engine)
1514 {
1515         struct i915_wa_list *w = &engine->whitelist;
1516
1517         switch (engine->class) {
1518         case RENDER_CLASS:
1519                 /*
1520                  * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
1521                  * Wa_1408556865:tgl
1522                  *
1523                  * This covers 4 registers which are next to one another :
1524                  *   - PS_INVOCATION_COUNT
1525                  *   - PS_INVOCATION_COUNT_UDW
1526                  *   - PS_DEPTH_COUNT
1527                  *   - PS_DEPTH_COUNT_UDW
1528                  */
1529                 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1530                                   RING_FORCE_TO_NONPRIV_ACCESS_RD |
1531                                   RING_FORCE_TO_NONPRIV_RANGE_4);
1532
1533                 /* Wa_1808121037:tgl */
1534                 whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
1535
1536                 /* Wa_1806527549:tgl */
1537                 whitelist_reg(w, HIZ_CHICKEN);
1538                 break;
1539         default:
1540                 whitelist_reg_ext(w,
1541                                   RING_CTX_TIMESTAMP(engine->mmio_base),
1542                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
1543                 break;
1544         }
1545 }
1546
1547 static void dg1_whitelist_build(struct intel_engine_cs *engine)
1548 {
1549         struct i915_wa_list *w = &engine->whitelist;
1550
1551         tgl_whitelist_build(engine);
1552
1553         /* GEN:BUG:1409280441:dg1 */
1554         if (IS_DG1_REVID(engine->i915, DG1_REVID_A0, DG1_REVID_A0) &&
1555             (engine->class == RENDER_CLASS ||
1556              engine->class == COPY_ENGINE_CLASS))
1557                 whitelist_reg_ext(w, RING_ID(engine->mmio_base),
1558                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
1559 }
1560
1561 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1562 {
1563         struct drm_i915_private *i915 = engine->i915;
1564         struct i915_wa_list *w = &engine->whitelist;
1565
1566         wa_init_start(w, "whitelist", engine->name);
1567
1568         if (IS_DG1(i915))
1569                 dg1_whitelist_build(engine);
1570         else if (IS_GEN(i915, 12))
1571                 tgl_whitelist_build(engine);
1572         else if (IS_GEN(i915, 11))
1573                 icl_whitelist_build(engine);
1574         else if (IS_CANNONLAKE(i915))
1575                 cnl_whitelist_build(engine);
1576         else if (IS_COMETLAKE(i915))
1577                 cml_whitelist_build(engine);
1578         else if (IS_COFFEELAKE(i915))
1579                 cfl_whitelist_build(engine);
1580         else if (IS_GEMINILAKE(i915))
1581                 glk_whitelist_build(engine);
1582         else if (IS_KABYLAKE(i915))
1583                 kbl_whitelist_build(engine);
1584         else if (IS_BROXTON(i915))
1585                 bxt_whitelist_build(engine);
1586         else if (IS_SKYLAKE(i915))
1587                 skl_whitelist_build(engine);
1588         else if (INTEL_GEN(i915) <= 8)
1589                 return;
1590         else
1591                 MISSING_CASE(INTEL_GEN(i915));
1592
1593         wa_init_finish(w);
1594 }
1595
1596 void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1597 {
1598         const struct i915_wa_list *wal = &engine->whitelist;
1599         struct intel_uncore *uncore = engine->uncore;
1600         const u32 base = engine->mmio_base;
1601         struct i915_wa *wa;
1602         unsigned int i;
1603
1604         if (!wal->count)
1605                 return;
1606
1607         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1608                 intel_uncore_write(uncore,
1609                                    RING_FORCE_TO_NONPRIV(base, i),
1610                                    i915_mmio_reg_offset(wa->reg));
1611
1612         /* And clear the rest just in case of garbage */
1613         for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1614                 intel_uncore_write(uncore,
1615                                    RING_FORCE_TO_NONPRIV(base, i),
1616                                    i915_mmio_reg_offset(RING_NOPID(base)));
1617 }
1618
1619 static void
1620 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1621 {
1622         struct drm_i915_private *i915 = engine->i915;
1623
1624         if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) ||
1625             IS_TGL_UY_GT_STEPPING(i915, STEP_A0, STEP_A0)) {
1626                 /*
1627                  * Wa_1607138336:tgl[a0],dg1[a0]
1628                  * Wa_1607063988:tgl[a0],dg1[a0]
1629                  */
1630                 wa_write_or(wal,
1631                             GEN9_CTX_PREEMPT_REG,
1632                             GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
1633         }
1634
1635         if (IS_TGL_UY_GT_STEPPING(i915, STEP_A0, STEP_A0)) {
1636                 /*
1637                  * Wa_1606679103:tgl
1638                  * (see also Wa_1606682166:icl)
1639                  */
1640                 wa_write_or(wal,
1641                             GEN7_SARCHKMD,
1642                             GEN7_DISABLE_SAMPLER_PREFETCH);
1643         }
1644
1645         if (IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
1646             IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1647                 /* Wa_1606931601:tgl,rkl,dg1,adl-s */
1648                 wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
1649
1650                 /*
1651                  * Wa_1407928979:tgl A*
1652                  * Wa_18011464164:tgl[B0+],dg1[B0+]
1653                  * Wa_22010931296:tgl[B0+],dg1[B0+]
1654                  * Wa_14010919138:rkl,dg1,adl-s
1655                  */
1656                 wa_write_or(wal, GEN7_FF_THREAD_MODE,
1657                             GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
1658
1659                 /*
1660                  * Wa_1606700617:tgl,dg1
1661                  * Wa_22010271021:tgl,rkl,dg1, adl-s
1662                  */
1663                 wa_masked_en(wal,
1664                              GEN9_CS_DEBUG_MODE1,
1665                              FF_DOP_CLOCK_GATE_DISABLE);
1666         }
1667
1668         if (IS_ALDERLAKE_S(i915) || IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) ||
1669             IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1670                 /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s */
1671                 wa_masked_en(wal, GEN7_ROW_CHICKEN2,
1672                              GEN12_PUSH_CONST_DEREF_HOLD_DIS);
1673
1674                 /*
1675                  * Wa_1409085225:tgl
1676                  * Wa_14010229206:tgl,rkl,dg1[a0],adl-s
1677                  */
1678                 wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
1679         }
1680
1681
1682         if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) ||
1683             IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1684                 /*
1685                  * Wa_1607030317:tgl
1686                  * Wa_1607186500:tgl
1687                  * Wa_1607297627:tgl,rkl,dg1[a0]
1688                  *
1689                  * On TGL and RKL there are multiple entries for this WA in the
1690                  * BSpec; some indicate this is an A0-only WA, others indicate
1691                  * it applies to all steppings so we trust the "all steppings."
1692                  * For DG1 this only applies to A0.
1693                  */
1694                 wa_masked_en(wal,
1695                              GEN6_RC_SLEEP_PSMI_CONTROL,
1696                              GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
1697                              GEN8_RC_SEMA_IDLE_MSG_DISABLE);
1698         }
1699
1700         if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1701                 /* Wa_1406941453:tgl,rkl,dg1 */
1702                 wa_masked_en(wal,
1703                              GEN10_SAMPLER_MODE,
1704                              ENABLE_SMALLPL);
1705         }
1706
1707         if (IS_GEN(i915, 11)) {
1708                 /* This is not an Wa. Enable for better image quality */
1709                 wa_masked_en(wal,
1710                              _3D_CHICKEN3,
1711                              _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
1712
1713                 /* WaPipelineFlushCoherentLines:icl */
1714                 wa_write_or(wal,
1715                             GEN8_L3SQCREG4,
1716                             GEN8_LQSC_FLUSH_COHERENT_LINES);
1717
1718                 /*
1719                  * Wa_1405543622:icl
1720                  * Formerly known as WaGAPZPriorityScheme
1721                  */
1722                 wa_write_or(wal,
1723                             GEN8_GARBCNTL,
1724                             GEN11_ARBITRATION_PRIO_ORDER_MASK);
1725
1726                 /*
1727                  * Wa_1604223664:icl
1728                  * Formerly known as WaL3BankAddressHashing
1729                  */
1730                 wa_write_clr_set(wal,
1731                                  GEN8_GARBCNTL,
1732                                  GEN11_HASH_CTRL_EXCL_MASK,
1733                                  GEN11_HASH_CTRL_EXCL_BIT0);
1734                 wa_write_clr_set(wal,
1735                                  GEN11_GLBLINVL,
1736                                  GEN11_BANK_HASH_ADDR_EXCL_MASK,
1737                                  GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1738
1739                 /*
1740                  * Wa_1405733216:icl
1741                  * Formerly known as WaDisableCleanEvicts
1742                  */
1743                 wa_write_or(wal,
1744                             GEN8_L3SQCREG4,
1745                             GEN11_LQSC_CLEAN_EVICT_DISABLE);
1746
1747                 /* WaForwardProgressSoftReset:icl */
1748                 wa_write_or(wal,
1749                             GEN10_SCRATCH_LNCF2,
1750                             PMFLUSHDONE_LNICRSDROP |
1751                             PMFLUSH_GAPL3UNBLOCK |
1752                             PMFLUSHDONE_LNEBLK);
1753
1754                 /* Wa_1406609255:icl (pre-prod) */
1755                 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1756                         wa_write_or(wal,
1757                                     GEN7_SARCHKMD,
1758                                     GEN7_DISABLE_DEMAND_PREFETCH);
1759
1760                 /* Wa_1606682166:icl */
1761                 wa_write_or(wal,
1762                             GEN7_SARCHKMD,
1763                             GEN7_DISABLE_SAMPLER_PREFETCH);
1764
1765                 /* Wa_1409178092:icl */
1766                 wa_write_clr_set(wal,
1767                                  GEN11_SCRATCH2,
1768                                  GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
1769                                  0);
1770
1771                 /* WaEnable32PlaneMode:icl */
1772                 wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
1773                              GEN11_ENABLE_32_PLANE_MODE);
1774
1775                 /*
1776                  * Wa_1408615072:icl,ehl  (vsunit)
1777                  * Wa_1407596294:icl,ehl  (hsunit)
1778                  */
1779                 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
1780                             VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
1781
1782                 /* Wa_1407352427:icl,ehl */
1783                 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1784                             PSDUNIT_CLKGATE_DIS);
1785
1786                 /* Wa_1406680159:icl,ehl */
1787                 wa_write_or(wal,
1788                             SUBSLICE_UNIT_LEVEL_CLKGATE,
1789                             GWUNIT_CLKGATE_DIS);
1790
1791                 /*
1792                  * Wa_1408767742:icl[a2..forever],ehl[all]
1793                  * Wa_1605460711:icl[a0..c0]
1794                  */
1795                 wa_write_or(wal,
1796                             GEN7_FF_THREAD_MODE,
1797                             GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
1798
1799                 /* Wa_22010271021:ehl */
1800                 if (IS_JSL_EHL(i915))
1801                         wa_masked_en(wal,
1802                                      GEN9_CS_DEBUG_MODE1,
1803                                      FF_DOP_CLOCK_GATE_DISABLE);
1804         }
1805
1806         if (IS_GEN_RANGE(i915, 9, 12)) {
1807                 /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */
1808                 wa_masked_en(wal,
1809                              GEN7_FF_SLICE_CS_CHICKEN1,
1810                              GEN9_FFSC_PERCTX_PREEMPT_CTRL);
1811         }
1812
1813         if (IS_SKYLAKE(i915) ||
1814             IS_KABYLAKE(i915) ||
1815             IS_COFFEELAKE(i915) ||
1816             IS_COMETLAKE(i915)) {
1817                 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
1818                 wa_write_or(wal,
1819                             GEN8_GARBCNTL,
1820                             GEN9_GAPS_TSV_CREDIT_DISABLE);
1821         }
1822
1823         if (IS_BROXTON(i915)) {
1824                 /* WaDisablePooledEuLoadBalancingFix:bxt */
1825                 wa_masked_en(wal,
1826                              FF_SLICE_CS_CHICKEN2,
1827                              GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1828         }
1829
1830         if (IS_GEN(i915, 9)) {
1831                 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1832                 wa_masked_en(wal,
1833                              GEN9_CSFE_CHICKEN1_RCS,
1834                              GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
1835
1836                 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1837                 wa_write_or(wal,
1838                             BDW_SCRATCH1,
1839                             GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1840
1841                 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1842                 if (IS_GEN9_LP(i915))
1843                         wa_write_clr_set(wal,
1844                                          GEN8_L3SQCREG1,
1845                                          L3_PRIO_CREDITS_MASK,
1846                                          L3_GENERAL_PRIO_CREDITS(62) |
1847                                          L3_HIGH_PRIO_CREDITS(2));
1848
1849                 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1850                 wa_write_or(wal,
1851                             GEN8_L3SQCREG4,
1852                             GEN8_LQSC_FLUSH_COHERENT_LINES);
1853
1854                 /* Disable atomics in L3 to prevent unrecoverable hangs */
1855                 wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1,
1856                                  GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0);
1857                 wa_write_clr_set(wal, GEN8_L3SQCREG4,
1858                                  GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
1859                 wa_write_clr_set(wal, GEN9_SCRATCH1,
1860                                  EVICTION_PERF_FIX_ENABLE, 0);
1861         }
1862
1863         if (IS_HASWELL(i915)) {
1864                 /* WaSampleCChickenBitEnable:hsw */
1865                 wa_masked_en(wal,
1866                              HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
1867
1868                 wa_masked_dis(wal,
1869                               CACHE_MODE_0_GEN7,
1870                               /* enable HiZ Raw Stall Optimization */
1871                               HIZ_RAW_STALL_OPT_DISABLE);
1872
1873                 /* WaDisable4x2SubspanOptimization:hsw */
1874                 wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
1875         }
1876
1877         if (IS_VALLEYVIEW(i915)) {
1878                 /* WaDisableEarlyCull:vlv */
1879                 wa_masked_en(wal,
1880                              _3D_CHICKEN3,
1881                              _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
1882
1883                 /*
1884                  * WaVSThreadDispatchOverride:ivb,vlv
1885                  *
1886                  * This actually overrides the dispatch
1887                  * mode for all thread types.
1888                  */
1889                 wa_write_clr_set(wal,
1890                                  GEN7_FF_THREAD_MODE,
1891                                  GEN7_FF_SCHED_MASK,
1892                                  GEN7_FF_TS_SCHED_HW |
1893                                  GEN7_FF_VS_SCHED_HW |
1894                                  GEN7_FF_DS_SCHED_HW);
1895
1896                 /* WaPsdDispatchEnable:vlv */
1897                 /* WaDisablePSDDualDispatchEnable:vlv */
1898                 wa_masked_en(wal,
1899                              GEN7_HALF_SLICE_CHICKEN1,
1900                              GEN7_MAX_PS_THREAD_DEP |
1901                              GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
1902         }
1903
1904         if (IS_IVYBRIDGE(i915)) {
1905                 /* WaDisableEarlyCull:ivb */
1906                 wa_masked_en(wal,
1907                              _3D_CHICKEN3,
1908                              _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
1909
1910                 if (0) { /* causes HiZ corruption on ivb:gt1 */
1911                         /* enable HiZ Raw Stall Optimization */
1912                         wa_masked_dis(wal,
1913                                       CACHE_MODE_0_GEN7,
1914                                       HIZ_RAW_STALL_OPT_DISABLE);
1915                 }
1916
1917                 /*
1918                  * WaVSThreadDispatchOverride:ivb,vlv
1919                  *
1920                  * This actually overrides the dispatch
1921                  * mode for all thread types.
1922                  */
1923                 wa_write_clr_set(wal,
1924                                  GEN7_FF_THREAD_MODE,
1925                                  GEN7_FF_SCHED_MASK,
1926                                  GEN7_FF_TS_SCHED_HW |
1927                                  GEN7_FF_VS_SCHED_HW |
1928                                  GEN7_FF_DS_SCHED_HW);
1929
1930                 /* WaDisablePSDDualDispatchEnable:ivb */
1931                 if (IS_IVB_GT1(i915))
1932                         wa_masked_en(wal,
1933                                      GEN7_HALF_SLICE_CHICKEN1,
1934                                      GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
1935         }
1936
1937         if (IS_GEN(i915, 7)) {
1938                 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
1939                 wa_masked_en(wal,
1940                              GFX_MODE_GEN7,
1941                              GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);
1942
1943                 /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
1944                 wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
1945
1946                 /*
1947                  * BSpec says this must be set, even though
1948                  * WaDisable4x2SubspanOptimization:ivb,hsw
1949                  * WaDisable4x2SubspanOptimization isn't listed for VLV.
1950                  */
1951                 wa_masked_en(wal,
1952                              CACHE_MODE_1,
1953                              PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
1954
1955                 /*
1956                  * BSpec recommends 8x4 when MSAA is used,
1957                  * however in practice 16x4 seems fastest.
1958                  *
1959                  * Note that PS/WM thread counts depend on the WIZ hashing
1960                  * disable bit, which we don't touch here, but it's good
1961                  * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
1962                  */
1963                 wa_add(wal, GEN7_GT_MODE, 0,
1964                        _MASKED_FIELD(GEN6_WIZ_HASHING_MASK,
1965                                      GEN6_WIZ_HASHING_16x4),
1966                        GEN6_WIZ_HASHING_16x4);
1967         }
1968
1969         if (IS_GEN_RANGE(i915, 6, 7))
1970                 /*
1971                  * We need to disable the AsyncFlip performance optimisations in
1972                  * order to use MI_WAIT_FOR_EVENT within the CS. It should
1973                  * already be programmed to '1' on all products.
1974                  *
1975                  * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
1976                  */
1977                 wa_masked_en(wal,
1978                              MI_MODE,
1979                              ASYNC_FLIP_PERF_DISABLE);
1980
1981         if (IS_GEN(i915, 6)) {
1982                 /*
1983                  * Required for the hardware to program scanline values for
1984                  * waiting
1985                  * WaEnableFlushTlbInvalidationMode:snb
1986                  */
1987                 wa_masked_en(wal,
1988                              GFX_MODE,
1989                              GFX_TLB_INVALIDATE_EXPLICIT);
1990
1991                 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
1992                 wa_masked_en(wal,
1993                              _3D_CHICKEN,
1994                              _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);
1995
1996                 wa_masked_en(wal,
1997                              _3D_CHICKEN3,
1998                              /* WaStripsFansDisableFastClipPerformanceFix:snb */
1999                              _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
2000                              /*
2001                               * Bspec says:
2002                               * "This bit must be set if 3DSTATE_CLIP clip mode is set
2003                               * to normal and 3DSTATE_SF number of SF output attributes
2004                               * is more than 16."
2005                               */
2006                              _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);
2007
2008                 /*
2009                  * BSpec recommends 8x4 when MSAA is used,
2010                  * however in practice 16x4 seems fastest.
2011                  *
2012                  * Note that PS/WM thread counts depend on the WIZ hashing
2013                  * disable bit, which we don't touch here, but it's good
2014                  * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
2015                  */
2016                 wa_add(wal,
2017                        GEN6_GT_MODE, 0,
2018                        _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
2019                        GEN6_WIZ_HASHING_16x4);
2020
2021                 /* WaDisable_RenderCache_OperationalFlush:snb */
2022                 wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
2023
2024                 /*
2025                  * From the Sandybridge PRM, volume 1 part 3, page 24:
2026                  * "If this bit is set, STCunit will have LRA as replacement
2027                  *  policy. [...] This bit must be reset. LRA replacement
2028                  *  policy is not supported."
2029                  */
2030                 wa_masked_dis(wal,
2031                               CACHE_MODE_0,
2032                               CM0_STC_EVICT_DISABLE_LRA_SNB);
2033         }
2034
2035         if (IS_GEN_RANGE(i915, 4, 6))
2036                 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
2037                 wa_add(wal, MI_MODE,
2038                        0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
2039                        /* XXX bit doesn't stick on Broadwater */
2040                        IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH);
2041
2042         if (IS_GEN(i915, 4))
2043                 /*
2044                  * Disable CONSTANT_BUFFER before it is loaded from the context
2045                  * image. For as it is loaded, it is executed and the stored
2046                  * address may no longer be valid, leading to a GPU hang.
2047                  *
2048                  * This imposes the requirement that userspace reload their
2049                  * CONSTANT_BUFFER on every batch, fortunately a requirement
2050                  * they are already accustomed to from before contexts were
2051                  * enabled.
2052                  */
2053                 wa_add(wal, ECOSKPD,
2054                        0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE),
2055                        0 /* XXX bit doesn't stick on Broadwater */);
2056 }
2057
2058 static void
2059 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2060 {
2061         struct drm_i915_private *i915 = engine->i915;
2062
2063         /* WaKBLVECSSemaphoreWaitPoll:kbl */
2064         if (IS_KBL_GT_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
2065                 wa_write(wal,
2066                          RING_SEMA_WAIT_POLL(engine->mmio_base),
2067                          1);
2068         }
2069 }
2070
2071 static void
2072 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2073 {
2074         if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 4))
2075                 return;
2076
2077         if (engine->class == RENDER_CLASS)
2078                 rcs_engine_wa_init(engine, wal);
2079         else
2080                 xcs_engine_wa_init(engine, wal);
2081 }
2082
2083 void intel_engine_init_workarounds(struct intel_engine_cs *engine)
2084 {
2085         struct i915_wa_list *wal = &engine->wa_list;
2086
2087         if (INTEL_GEN(engine->i915) < 4)
2088                 return;
2089
2090         wa_init_start(wal, "engine", engine->name);
2091         engine_init_workarounds(engine, wal);
2092         wa_init_finish(wal);
2093 }
2094
2095 void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
2096 {
2097         wa_list_apply(engine->uncore, &engine->wa_list);
2098 }
2099
2100 struct mcr_range {
2101         u32 start;
2102         u32 end;
2103 };
2104
2105 static const struct mcr_range mcr_ranges_gen8[] = {
2106         { .start = 0x5500, .end = 0x55ff },
2107         { .start = 0x7000, .end = 0x7fff },
2108         { .start = 0x9400, .end = 0x97ff },
2109         { .start = 0xb000, .end = 0xb3ff },
2110         { .start = 0xe000, .end = 0xe7ff },
2111         {},
2112 };
2113
2114 static const struct mcr_range mcr_ranges_gen12[] = {
2115         { .start =  0x8150, .end =  0x815f },
2116         { .start =  0x9520, .end =  0x955f },
2117         { .start =  0xb100, .end =  0xb3ff },
2118         { .start =  0xde80, .end =  0xe8ff },
2119         { .start = 0x24a00, .end = 0x24a7f },
2120         {},
2121 };
2122
2123 static bool mcr_range(struct drm_i915_private *i915, u32 offset)
2124 {
2125         const struct mcr_range *mcr_ranges;
2126         int i;
2127
2128         if (INTEL_GEN(i915) >= 12)
2129                 mcr_ranges = mcr_ranges_gen12;
2130         else if (INTEL_GEN(i915) >= 8)
2131                 mcr_ranges = mcr_ranges_gen8;
2132         else
2133                 return false;
2134
2135         /*
2136          * Registers in these ranges are affected by the MCR selector
2137          * which only controls CPU initiated MMIO. Routing does not
2138          * work for CS access so we cannot verify them on this path.
2139          */
2140         for (i = 0; mcr_ranges[i].start; i++)
2141                 if (offset >= mcr_ranges[i].start &&
2142                     offset <= mcr_ranges[i].end)
2143                         return true;
2144
2145         return false;
2146 }
2147
2148 static int
2149 wa_list_srm(struct i915_request *rq,
2150             const struct i915_wa_list *wal,
2151             struct i915_vma *vma)
2152 {
2153         struct drm_i915_private *i915 = rq->engine->i915;
2154         unsigned int i, count = 0;
2155         const struct i915_wa *wa;
2156         u32 srm, *cs;
2157
2158         srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
2159         if (INTEL_GEN(i915) >= 8)
2160                 srm++;
2161
2162         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
2163                 if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg)))
2164                         count++;
2165         }
2166
2167         cs = intel_ring_begin(rq, 4 * count);
2168         if (IS_ERR(cs))
2169                 return PTR_ERR(cs);
2170
2171         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
2172                 u32 offset = i915_mmio_reg_offset(wa->reg);
2173
2174                 if (mcr_range(i915, offset))
2175                         continue;
2176
2177                 *cs++ = srm;
2178                 *cs++ = offset;
2179                 *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
2180                 *cs++ = 0;
2181         }
2182         intel_ring_advance(rq, cs);
2183
2184         return 0;
2185 }
2186
2187 static int engine_wa_list_verify(struct intel_context *ce,
2188                                  const struct i915_wa_list * const wal,
2189                                  const char *from)
2190 {
2191         const struct i915_wa *wa;
2192         struct i915_request *rq;
2193         struct i915_vma *vma;
2194         struct i915_gem_ww_ctx ww;
2195         unsigned int i;
2196         u32 *results;
2197         int err;
2198
2199         if (!wal->count)
2200                 return 0;
2201
2202         vma = __vm_create_scratch_for_read(&ce->engine->gt->ggtt->vm,
2203                                            wal->count * sizeof(u32));
2204         if (IS_ERR(vma))
2205                 return PTR_ERR(vma);
2206
2207         intel_engine_pm_get(ce->engine);
2208         i915_gem_ww_ctx_init(&ww, false);
2209 retry:
2210         err = i915_gem_object_lock(vma->obj, &ww);
2211         if (err == 0)
2212                 err = intel_context_pin_ww(ce, &ww);
2213         if (err)
2214                 goto err_pm;
2215
2216         rq = i915_request_create(ce);
2217         if (IS_ERR(rq)) {
2218                 err = PTR_ERR(rq);
2219                 goto err_unpin;
2220         }
2221
2222         err = i915_request_await_object(rq, vma->obj, true);
2223         if (err == 0)
2224                 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
2225         if (err == 0)
2226                 err = wa_list_srm(rq, wal, vma);
2227
2228         i915_request_get(rq);
2229         if (err)
2230                 i915_request_set_error_once(rq, err);
2231         i915_request_add(rq);
2232
2233         if (err)
2234                 goto err_rq;
2235
2236         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2237                 err = -ETIME;
2238                 goto err_rq;
2239         }
2240
2241         results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
2242         if (IS_ERR(results)) {
2243                 err = PTR_ERR(results);
2244                 goto err_rq;
2245         }
2246
2247         err = 0;
2248         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
2249                 if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg)))
2250                         continue;
2251
2252                 if (!wa_verify(wa, results[i], wal->name, from))
2253                         err = -ENXIO;
2254         }
2255
2256         i915_gem_object_unpin_map(vma->obj);
2257
2258 err_rq:
2259         i915_request_put(rq);
2260 err_unpin:
2261         intel_context_unpin(ce);
2262 err_pm:
2263         if (err == -EDEADLK) {
2264                 err = i915_gem_ww_ctx_backoff(&ww);
2265                 if (!err)
2266                         goto retry;
2267         }
2268         i915_gem_ww_ctx_fini(&ww);
2269         intel_engine_pm_put(ce->engine);
2270         i915_vma_unpin(vma);
2271         i915_vma_put(vma);
2272         return err;
2273 }
2274
2275 int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
2276                                     const char *from)
2277 {
2278         return engine_wa_list_verify(engine->kernel_context,
2279                                      &engine->wa_list,
2280                                      from);
2281 }
2282
2283 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2284 #include "selftest_workarounds.c"
2285 #endif