drm/i915/gt: Convert PSS_MODE2 to multicast register
[platform/kernel/linux-starfive.git] / drivers / gpu / drm / i915 / gt / intel_workarounds.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2014-2018 Intel Corporation
4  */
5
6 #include "i915_drv.h"
7 #include "i915_reg.h"
8 #include "intel_context.h"
9 #include "intel_engine_pm.h"
10 #include "intel_engine_regs.h"
11 #include "intel_gpu_commands.h"
12 #include "intel_gt.h"
13 #include "intel_gt_mcr.h"
14 #include "intel_gt_regs.h"
15 #include "intel_ring.h"
16 #include "intel_workarounds.h"
17
18 /**
19  * DOC: Hardware workarounds
20  *
21  * Hardware workarounds are register programming documented to be executed in
22  * the driver that fall outside of the normal programming sequences for a
23  * platform. There are some basic categories of workarounds, depending on
24  * how/when they are applied:
25  *
26  * - Context workarounds: workarounds that touch registers that are
27  *   saved/restored to/from the HW context image. The list is emitted (via Load
28  *   Register Immediate commands) once when initializing the device and saved in
29  *   the default context. That default context is then used on every context
30  *   creation to have a "primed golden context", i.e. a context image that
31  *   already contains the changes needed to all the registers.
32  *
33  *   Context workarounds should be implemented in the *_ctx_workarounds_init()
34  *   variants respective to the targeted platforms.
35  *
36  * - Engine workarounds: the list of these WAs is applied whenever the specific
37  *   engine is reset. It's also possible that a set of engine classes share a
38  *   common power domain and they are reset together. This happens on some
39  *   platforms with render and compute engines. In this case (at least) one of
40  *   them need to keeep the workaround programming: the approach taken in the
41  *   driver is to tie those workarounds to the first compute/render engine that
42  *   is registered.  When executing with GuC submission, engine resets are
43  *   outside of kernel driver control, hence the list of registers involved in
44  *   written once, on engine initialization, and then passed to GuC, that
45  *   saves/restores their values before/after the reset takes place. See
46  *   ``drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c`` for reference.
47  *
48  *   Workarounds for registers specific to RCS and CCS should be implemented in
49  *   rcs_engine_wa_init() and ccs_engine_wa_init(), respectively; those for
50  *   registers belonging to BCS, VCS or VECS should be implemented in
51  *   xcs_engine_wa_init(). Workarounds for registers not belonging to a specific
52  *   engine's MMIO range but that are part of of the common RCS/CCS reset domain
53  *   should be implemented in general_render_compute_wa_init().
54  *
55  * - GT workarounds: the list of these WAs is applied whenever these registers
56  *   revert to their default values: on GPU reset, suspend/resume [1]_, etc.
57  *
58  *   GT workarounds should be implemented in the *_gt_workarounds_init()
59  *   variants respective to the targeted platforms.
60  *
61  * - Register whitelist: some workarounds need to be implemented in userspace,
62  *   but need to touch privileged registers. The whitelist in the kernel
63  *   instructs the hardware to allow the access to happen. From the kernel side,
64  *   this is just a special case of a MMIO workaround (as we write the list of
65  *   these to/be-whitelisted registers to some special HW registers).
66  *
67  *   Register whitelisting should be done in the *_whitelist_build() variants
68  *   respective to the targeted platforms.
69  *
70  * - Workaround batchbuffers: buffers that get executed automatically by the
71  *   hardware on every HW context restore. These buffers are created and
72  *   programmed in the default context so the hardware always go through those
73  *   programming sequences when switching contexts. The support for workaround
74  *   batchbuffers is enabled these hardware mechanisms:
75  *
76  *   #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default
77  *      context, pointing the hardware to jump to that location when that offset
78  *      is reached in the context restore. Workaround batchbuffer in the driver
79  *      currently uses this mechanism for all platforms.
80  *
81  *   #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context,
82  *      pointing the hardware to a buffer to continue executing after the
83  *      engine registers are restored in a context restore sequence. This is
84  *      currently not used in the driver.
85  *
86  * - Other:  There are WAs that, due to their nature, cannot be applied from a
87  *   central place. Those are peppered around the rest of the code, as needed.
88  *   Workarounds related to the display IP are the main example.
89  *
90  * .. [1] Technically, some registers are powercontext saved & restored, so they
91  *    survive a suspend/resume. In practice, writing them again is not too
92  *    costly and simplifies things, so it's the approach taken in the driver.
93  */
94
95 static void wa_init_start(struct i915_wa_list *wal, struct intel_gt *gt,
96                           const char *name, const char *engine_name)
97 {
98         wal->gt = gt;
99         wal->name = name;
100         wal->engine_name = engine_name;
101 }
102
103 #define WA_LIST_CHUNK (1 << 4)
104
105 static void wa_init_finish(struct i915_wa_list *wal)
106 {
107         /* Trim unused entries. */
108         if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
109                 struct i915_wa *list = kmemdup(wal->list,
110                                                wal->count * sizeof(*list),
111                                                GFP_KERNEL);
112
113                 if (list) {
114                         kfree(wal->list);
115                         wal->list = list;
116                 }
117         }
118
119         if (!wal->count)
120                 return;
121
122         drm_dbg(&wal->gt->i915->drm, "Initialized %u %s workarounds on %s\n",
123                 wal->wa_count, wal->name, wal->engine_name);
124 }
125
126 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
127 {
128         unsigned int addr = i915_mmio_reg_offset(wa->reg);
129         struct drm_i915_private *i915 = wal->gt->i915;
130         unsigned int start = 0, end = wal->count;
131         const unsigned int grow = WA_LIST_CHUNK;
132         struct i915_wa *wa_;
133
134         GEM_BUG_ON(!is_power_of_2(grow));
135
136         if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
137                 struct i915_wa *list;
138
139                 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
140                                      GFP_KERNEL);
141                 if (!list) {
142                         drm_err(&i915->drm, "No space for workaround init!\n");
143                         return;
144                 }
145
146                 if (wal->list) {
147                         memcpy(list, wal->list, sizeof(*wa) * wal->count);
148                         kfree(wal->list);
149                 }
150
151                 wal->list = list;
152         }
153
154         while (start < end) {
155                 unsigned int mid = start + (end - start) / 2;
156
157                 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
158                         start = mid + 1;
159                 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
160                         end = mid;
161                 } else {
162                         wa_ = &wal->list[mid];
163
164                         if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) {
165                                 drm_err(&i915->drm,
166                                         "Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
167                                         i915_mmio_reg_offset(wa_->reg),
168                                         wa_->clr, wa_->set);
169
170                                 wa_->set &= ~wa->clr;
171                         }
172
173                         wal->wa_count++;
174                         wa_->set |= wa->set;
175                         wa_->clr |= wa->clr;
176                         wa_->read |= wa->read;
177                         return;
178                 }
179         }
180
181         wal->wa_count++;
182         wa_ = &wal->list[wal->count++];
183         *wa_ = *wa;
184
185         while (wa_-- > wal->list) {
186                 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
187                            i915_mmio_reg_offset(wa_[1].reg));
188                 if (i915_mmio_reg_offset(wa_[1].reg) >
189                     i915_mmio_reg_offset(wa_[0].reg))
190                         break;
191
192                 swap(wa_[1], wa_[0]);
193         }
194 }
195
196 static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
197                    u32 clear, u32 set, u32 read_mask, bool masked_reg)
198 {
199         struct i915_wa wa = {
200                 .reg  = reg,
201                 .clr  = clear,
202                 .set  = set,
203                 .read = read_mask,
204                 .masked_reg = masked_reg,
205         };
206
207         _wa_add(wal, &wa);
208 }
209
210 static void wa_mcr_add(struct i915_wa_list *wal, i915_mcr_reg_t reg,
211                        u32 clear, u32 set, u32 read_mask, bool masked_reg)
212 {
213         struct i915_wa wa = {
214                 .mcr_reg = reg,
215                 .clr  = clear,
216                 .set  = set,
217                 .read = read_mask,
218                 .masked_reg = masked_reg,
219                 .is_mcr = 1,
220         };
221
222         _wa_add(wal, &wa);
223 }
224
225 static void
226 wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
227 {
228         wa_add(wal, reg, clear, set, clear, false);
229 }
230
231 static void
232 wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clear, u32 set)
233 {
234         wa_mcr_add(wal, reg, clear, set, clear, false);
235 }
236
237 static void
238 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
239 {
240         wa_write_clr_set(wal, reg, ~0, set);
241 }
242
243 static void
244 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
245 {
246         wa_write_clr_set(wal, reg, set, set);
247 }
248
249 static void
250 wa_mcr_write_or(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set)
251 {
252         wa_mcr_write_clr_set(wal, reg, set, set);
253 }
254
255 static void
256 wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
257 {
258         wa_write_clr_set(wal, reg, clr, 0);
259 }
260
261 static void
262 wa_mcr_write_clr(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clr)
263 {
264         wa_mcr_write_clr_set(wal, reg, clr, 0);
265 }
266
267 /*
268  * WA operations on "masked register". A masked register has the upper 16 bits
269  * documented as "masked" in b-spec. Its purpose is to allow writing to just a
270  * portion of the register without a rmw: you simply write in the upper 16 bits
271  * the mask of bits you are going to modify.
272  *
273  * The wa_masked_* family of functions already does the necessary operations to
274  * calculate the mask based on the parameters passed, so user only has to
275  * provide the lower 16 bits of that register.
276  */
277
278 static void
279 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
280 {
281         wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
282 }
283
284 static void
285 wa_mcr_masked_en(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
286 {
287         wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
288 }
289
290 static void
291 wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
292 {
293         wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
294 }
295
296 static void
297 wa_mcr_masked_dis(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
298 {
299         wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
300 }
301
302 static void
303 wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
304                     u32 mask, u32 val)
305 {
306         wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
307 }
308
309 static void
310 wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_mcr_reg_t reg,
311                         u32 mask, u32 val)
312 {
313         wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
314 }
315
316 static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
317                                       struct i915_wa_list *wal)
318 {
319         wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
320 }
321
322 static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
323                                       struct i915_wa_list *wal)
324 {
325         wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
326 }
327
328 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
329                                       struct i915_wa_list *wal)
330 {
331         wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
332
333         /* WaDisableAsyncFlipPerfMode:bdw,chv */
334         wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), ASYNC_FLIP_PERF_DISABLE);
335
336         /* WaDisablePartialInstShootdown:bdw,chv */
337         wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
338                          PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
339
340         /* Use Force Non-Coherent whenever executing a 3D context. This is a
341          * workaround for a possible hang in the unlikely event a TLB
342          * invalidation occurs during a PSD flush.
343          */
344         /* WaForceEnableNonCoherent:bdw,chv */
345         /* WaHdcDisableFetchWhenMasked:bdw,chv */
346         wa_masked_en(wal, HDC_CHICKEN0,
347                      HDC_DONOT_FETCH_MEM_WHEN_MASKED |
348                      HDC_FORCE_NON_COHERENT);
349
350         /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
351          * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
352          *  polygons in the same 8x4 pixel/sample area to be processed without
353          *  stalling waiting for the earlier ones to write to Hierarchical Z
354          *  buffer."
355          *
356          * This optimization is off by default for BDW and CHV; turn it on.
357          */
358         wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
359
360         /* Wa4x4STCOptimizationDisable:bdw,chv */
361         wa_masked_en(wal, CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
362
363         /*
364          * BSpec recommends 8x4 when MSAA is used,
365          * however in practice 16x4 seems fastest.
366          *
367          * Note that PS/WM thread counts depend on the WIZ hashing
368          * disable bit, which we don't touch here, but it's good
369          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
370          */
371         wa_masked_field_set(wal, GEN7_GT_MODE,
372                             GEN6_WIZ_HASHING_MASK,
373                             GEN6_WIZ_HASHING_16x4);
374 }
375
376 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
377                                      struct i915_wa_list *wal)
378 {
379         struct drm_i915_private *i915 = engine->i915;
380
381         gen8_ctx_workarounds_init(engine, wal);
382
383         /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
384         wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
385
386         /* WaDisableDopClockGating:bdw
387          *
388          * Also see the related UCGTCL1 write in bdw_init_clock_gating()
389          * to disable EUTC clock gating.
390          */
391         wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
392                          DOP_CLOCK_GATING_DISABLE);
393
394         wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
395                          GEN8_SAMPLER_POWER_BYPASS_DIS);
396
397         wa_masked_en(wal, HDC_CHICKEN0,
398                      /* WaForceContextSaveRestoreNonCoherent:bdw */
399                      HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
400                      /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
401                      (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
402 }
403
404 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
405                                      struct i915_wa_list *wal)
406 {
407         gen8_ctx_workarounds_init(engine, wal);
408
409         /* WaDisableThreadStallDopClockGating:chv */
410         wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
411
412         /* Improve HiZ throughput on CHV. */
413         wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
414 }
415
416 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
417                                       struct i915_wa_list *wal)
418 {
419         struct drm_i915_private *i915 = engine->i915;
420
421         if (HAS_LLC(i915)) {
422                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
423                  *
424                  * Must match Display Engine. See
425                  * WaCompressedResourceDisplayNewHashMode.
426                  */
427                 wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
428                              GEN9_PBE_COMPRESSED_HASH_SELECTION);
429                 wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
430                                  GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
431         }
432
433         /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
434         /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
435         wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
436                          FLOW_CONTROL_ENABLE |
437                          PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
438
439         /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
440         /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
441         wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
442                          GEN9_ENABLE_YV12_BUGFIX |
443                          GEN9_ENABLE_GPGPU_PREEMPTION);
444
445         /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
446         /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
447         wa_masked_en(wal, CACHE_MODE_1,
448                      GEN8_4x4_STC_OPTIMIZATION_DISABLE |
449                      GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
450
451         /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
452         wa_mcr_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
453                           GEN9_CCS_TLB_PREFETCH_ENABLE);
454
455         /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
456         wa_masked_en(wal, HDC_CHICKEN0,
457                      HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
458                      HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
459
460         /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
461          * both tied to WaForceContextSaveRestoreNonCoherent
462          * in some hsds for skl. We keep the tie for all gen9. The
463          * documentation is a bit hazy and so we want to get common behaviour,
464          * even though there is no clear evidence we would need both on kbl/bxt.
465          * This area has been source of system hangs so we play it safe
466          * and mimic the skl regardless of what bspec says.
467          *
468          * Use Force Non-Coherent whenever executing a 3D context. This
469          * is a workaround for a possible hang in the unlikely event
470          * a TLB invalidation occurs during a PSD flush.
471          */
472
473         /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
474         wa_masked_en(wal, HDC_CHICKEN0,
475                      HDC_FORCE_NON_COHERENT);
476
477         /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
478         if (IS_SKYLAKE(i915) ||
479             IS_KABYLAKE(i915) ||
480             IS_COFFEELAKE(i915) ||
481             IS_COMETLAKE(i915))
482                 wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
483                                  GEN8_SAMPLER_POWER_BYPASS_DIS);
484
485         /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
486         wa_mcr_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
487
488         /*
489          * Supporting preemption with fine-granularity requires changes in the
490          * batch buffer programming. Since we can't break old userspace, we
491          * need to set our default preemption level to safe value. Userspace is
492          * still able to use more fine-grained preemption levels, since in
493          * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
494          * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
495          * not real HW workarounds, but merely a way to start using preemption
496          * while maintaining old contract with userspace.
497          */
498
499         /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
500         wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
501
502         /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
503         wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
504                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
505                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
506
507         /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
508         if (IS_GEN9_LP(i915))
509                 wa_masked_en(wal, GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
510 }
511
512 static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
513                                 struct i915_wa_list *wal)
514 {
515         struct intel_gt *gt = engine->gt;
516         u8 vals[3] = { 0, 0, 0 };
517         unsigned int i;
518
519         for (i = 0; i < 3; i++) {
520                 u8 ss;
521
522                 /*
523                  * Only consider slices where one, and only one, subslice has 7
524                  * EUs
525                  */
526                 if (!is_power_of_2(gt->info.sseu.subslice_7eu[i]))
527                         continue;
528
529                 /*
530                  * subslice_7eu[i] != 0 (because of the check above) and
531                  * ss_max == 4 (maximum number of subslices possible per slice)
532                  *
533                  * ->    0 <= ss <= 3;
534                  */
535                 ss = ffs(gt->info.sseu.subslice_7eu[i]) - 1;
536                 vals[i] = 3 - ss;
537         }
538
539         if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
540                 return;
541
542         /* Tune IZ hashing. See intel_device_info_runtime_init() */
543         wa_masked_field_set(wal, GEN7_GT_MODE,
544                             GEN9_IZ_HASHING_MASK(2) |
545                             GEN9_IZ_HASHING_MASK(1) |
546                             GEN9_IZ_HASHING_MASK(0),
547                             GEN9_IZ_HASHING(2, vals[2]) |
548                             GEN9_IZ_HASHING(1, vals[1]) |
549                             GEN9_IZ_HASHING(0, vals[0]));
550 }
551
552 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
553                                      struct i915_wa_list *wal)
554 {
555         gen9_ctx_workarounds_init(engine, wal);
556         skl_tune_iz_hashing(engine, wal);
557 }
558
559 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
560                                      struct i915_wa_list *wal)
561 {
562         gen9_ctx_workarounds_init(engine, wal);
563
564         /* WaDisableThreadStallDopClockGating:bxt */
565         wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
566                          STALL_DOP_GATING_DISABLE);
567
568         /* WaToEnableHwFixForPushConstHWBug:bxt */
569         wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
570                      GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
571 }
572
573 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
574                                      struct i915_wa_list *wal)
575 {
576         struct drm_i915_private *i915 = engine->i915;
577
578         gen9_ctx_workarounds_init(engine, wal);
579
580         /* WaToEnableHwFixForPushConstHWBug:kbl */
581         if (IS_KBL_GRAPHICS_STEP(i915, STEP_C0, STEP_FOREVER))
582                 wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
583                              GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
584
585         /* WaDisableSbeCacheDispatchPortSharing:kbl */
586         wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
587                          GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
588 }
589
590 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
591                                      struct i915_wa_list *wal)
592 {
593         gen9_ctx_workarounds_init(engine, wal);
594
595         /* WaToEnableHwFixForPushConstHWBug:glk */
596         wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
597                      GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
598 }
599
600 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
601                                      struct i915_wa_list *wal)
602 {
603         gen9_ctx_workarounds_init(engine, wal);
604
605         /* WaToEnableHwFixForPushConstHWBug:cfl */
606         wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
607                      GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
608
609         /* WaDisableSbeCacheDispatchPortSharing:cfl */
610         wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
611                          GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
612 }
613
614 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
615                                      struct i915_wa_list *wal)
616 {
617         /* Wa_1406697149 (WaDisableBankHangMode:icl) */
618         wa_write(wal,
619                  GEN8_L3CNTLREG,
620                  intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
621                  GEN8_ERRDETBCTRL);
622
623         /* WaForceEnableNonCoherent:icl
624          * This is not the same workaround as in early Gen9 platforms, where
625          * lacking this could cause system hangs, but coherency performance
626          * overhead is high and only a few compute workloads really need it
627          * (the register is whitelisted in hardware now, so UMDs can opt in
628          * for coherency if they have a good reason).
629          */
630         wa_mcr_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
631
632         /* WaEnableFloatBlendOptimization:icl */
633         wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
634                    _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE),
635                    0 /* write-only, so skip validation */,
636                    true);
637
638         /* WaDisableGPGPUMidThreadPreemption:icl */
639         wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
640                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
641                             GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
642
643         /* allow headerless messages for preemptible GPGPU context */
644         wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
645                          GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
646
647         /* Wa_1604278689:icl,ehl */
648         wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
649         wa_write_clr_set(wal, IVB_FBC_RT_BASE_UPPER,
650                          0, /* write-only register; skip validation */
651                          0xFFFFFFFF);
652
653         /* Wa_1406306137:icl,ehl */
654         wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
655 }
656
657 /*
658  * These settings aren't actually workarounds, but general tuning settings that
659  * need to be programmed on dg2 platform.
660  */
661 static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
662                                    struct i915_wa_list *wal)
663 {
664         wa_mcr_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
665         wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
666                              REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
667         wa_mcr_add(wal,
668                    XEHP_FF_MODE2,
669                    FF_MODE2_TDS_TIMER_MASK,
670                    FF_MODE2_TDS_TIMER_128,
671                    0, false);
672 }
673
674 /*
675  * These settings aren't actually workarounds, but general tuning settings that
676  * need to be programmed on several platforms.
677  */
678 static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine,
679                                      struct i915_wa_list *wal)
680 {
681         /*
682          * Although some platforms refer to it as Wa_1604555607, we need to
683          * program it even on those that don't explicitly list that
684          * workaround.
685          *
686          * Note that the programming of this register is further modified
687          * according to the FF_MODE2 guidance given by Wa_1608008084:gen12.
688          * Wa_1608008084 tells us the FF_MODE2 register will return the wrong
689          * value when read. The default value for this register is zero for all
690          * fields and there are no bit masks. So instead of doing a RMW we
691          * should just write TDS timer value. For the same reason read
692          * verification is ignored.
693          */
694         wa_add(wal,
695                GEN12_FF_MODE2,
696                FF_MODE2_TDS_TIMER_MASK,
697                FF_MODE2_TDS_TIMER_128,
698                0, false);
699 }
700
701 static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
702                                        struct i915_wa_list *wal)
703 {
704         struct drm_i915_private *i915 = engine->i915;
705
706         gen12_ctx_gt_tuning_init(engine, wal);
707
708         /*
709          * Wa_1409142259:tgl,dg1,adl-p
710          * Wa_1409347922:tgl,dg1,adl-p
711          * Wa_1409252684:tgl,dg1,adl-p
712          * Wa_1409217633:tgl,dg1,adl-p
713          * Wa_1409207793:tgl,dg1,adl-p
714          * Wa_1409178076:tgl,dg1,adl-p
715          * Wa_1408979724:tgl,dg1,adl-p
716          * Wa_14010443199:tgl,rkl,dg1,adl-p
717          * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p
718          * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p
719          */
720         wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
721                      GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
722
723         /* WaDisableGPGPUMidThreadPreemption:gen12 */
724         wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
725                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
726                             GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
727
728         /*
729          * Wa_16011163337
730          *
731          * Like in gen12_ctx_gt_tuning_init(), read verification is ignored due
732          * to Wa_1608008084.
733          */
734         wa_add(wal,
735                GEN12_FF_MODE2,
736                FF_MODE2_GS_TIMER_MASK,
737                FF_MODE2_GS_TIMER_224,
738                0, false);
739
740         if (!IS_DG1(i915))
741                 /* Wa_1806527549 */
742                 wa_masked_en(wal, HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE);
743 }
744
745 static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
746                                      struct i915_wa_list *wal)
747 {
748         gen12_ctx_workarounds_init(engine, wal);
749
750         /* Wa_1409044764 */
751         wa_masked_dis(wal, GEN11_COMMON_SLICE_CHICKEN3,
752                       DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN);
753
754         /* Wa_22010493298 */
755         wa_masked_en(wal, HIZ_CHICKEN,
756                      DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
757 }
758
759 static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
760                                      struct i915_wa_list *wal)
761 {
762         dg2_ctx_gt_tuning_init(engine, wal);
763
764         /* Wa_16011186671:dg2_g11 */
765         if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
766                 wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
767                 wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
768         }
769
770         if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
771                 /* Wa_14010469329:dg2_g10 */
772                 wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
773                                  XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
774
775                 /*
776                  * Wa_22010465075:dg2_g10
777                  * Wa_22010613112:dg2_g10
778                  * Wa_14010698770:dg2_g10
779                  */
780                 wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
781                                  GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
782         }
783
784         /* Wa_16013271637:dg2 */
785         wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
786                          MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
787
788         /* Wa_14014947963:dg2 */
789         if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
790             IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
791                 wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
792
793         /* Wa_18018764978:dg2 */
794         if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_C0, STEP_FOREVER) ||
795             IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
796                 wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
797
798         /* Wa_15010599737:dg2 */
799         wa_mcr_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN);
800
801         /* Wa_18019271663:dg2 */
802         wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
803 }
804
805 static void mtl_ctx_workarounds_init(struct intel_engine_cs *engine,
806                                      struct i915_wa_list *wal)
807 {
808         struct drm_i915_private *i915 = engine->i915;
809
810         if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
811             IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) {
812                 /* Wa_14014947963 */
813                 wa_masked_field_set(wal, VF_PREEMPTION,
814                                     PREEMPTION_VERTEX_COUNT, 0x4000);
815
816                 /* Wa_16013271637 */
817                 wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
818                                  MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
819
820                 /* Wa_18019627453 */
821                 wa_mcr_masked_en(wal, VFLSKPD, VF_PREFETCH_TLB_DIS);
822
823                 /* Wa_18018764978 */
824                 wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
825         }
826
827         /* Wa_18019271663 */
828         wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
829 }
830
831 static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine,
832                                          struct i915_wa_list *wal)
833 {
834         /*
835          * This is a "fake" workaround defined by software to ensure we
836          * maintain reliable, backward-compatible behavior for userspace with
837          * regards to how nested MI_BATCH_BUFFER_START commands are handled.
838          *
839          * The per-context setting of MI_MODE[12] determines whether the bits
840          * of a nested MI_BATCH_BUFFER_START instruction should be interpreted
841          * in the traditional manner or whether they should instead use a new
842          * tgl+ meaning that breaks backward compatibility, but allows nesting
843          * into 3rd-level batchbuffers.  When this new capability was first
844          * added in TGL, it remained off by default unless a context
845          * intentionally opted in to the new behavior.  However Xe_HPG now
846          * flips this on by default and requires that we explicitly opt out if
847          * we don't want the new behavior.
848          *
849          * From a SW perspective, we want to maintain the backward-compatible
850          * behavior for userspace, so we'll apply a fake workaround to set it
851          * back to the legacy behavior on platforms where the hardware default
852          * is to break compatibility.  At the moment there is no Linux
853          * userspace that utilizes third-level batchbuffers, so this will avoid
854          * userspace from needing to make any changes.  using the legacy
855          * meaning is the correct thing to do.  If/when we have userspace
856          * consumers that want to utilize third-level batch nesting, we can
857          * provide a context parameter to allow them to opt-in.
858          */
859         wa_masked_dis(wal, RING_MI_MODE(engine->mmio_base), TGL_NESTED_BB_EN);
860 }
861
862 static void gen12_ctx_gt_mocs_init(struct intel_engine_cs *engine,
863                                    struct i915_wa_list *wal)
864 {
865         u8 mocs;
866
867         /*
868          * Some blitter commands do not have a field for MOCS, those
869          * commands will use MOCS index pointed by BLIT_CCTL.
870          * BLIT_CCTL registers are needed to be programmed to un-cached.
871          */
872         if (engine->class == COPY_ENGINE_CLASS) {
873                 mocs = engine->gt->mocs.uc_index;
874                 wa_write_clr_set(wal,
875                                  BLIT_CCTL(engine->mmio_base),
876                                  BLIT_CCTL_MASK,
877                                  BLIT_CCTL_MOCS(mocs, mocs));
878         }
879 }
880
881 /*
882  * gen12_ctx_gt_fake_wa_init() aren't programmingan official workaround
883  * defined by the hardware team, but it programming general context registers.
884  * Adding those context register programming in context workaround
885  * allow us to use the wa framework for proper application and validation.
886  */
887 static void
888 gen12_ctx_gt_fake_wa_init(struct intel_engine_cs *engine,
889                           struct i915_wa_list *wal)
890 {
891         if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
892                 fakewa_disable_nestedbb_mode(engine, wal);
893
894         gen12_ctx_gt_mocs_init(engine, wal);
895 }
896
897 static void
898 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
899                            struct i915_wa_list *wal,
900                            const char *name)
901 {
902         struct drm_i915_private *i915 = engine->i915;
903
904         wa_init_start(wal, engine->gt, name, engine->name);
905
906         /* Applies to all engines */
907         /*
908          * Fake workarounds are not the actual workaround but
909          * programming of context registers using workaround framework.
910          */
911         if (GRAPHICS_VER(i915) >= 12)
912                 gen12_ctx_gt_fake_wa_init(engine, wal);
913
914         if (engine->class != RENDER_CLASS)
915                 goto done;
916
917         if (IS_METEORLAKE(i915))
918                 mtl_ctx_workarounds_init(engine, wal);
919         else if (IS_PONTEVECCHIO(i915))
920                 ; /* noop; none at this time */
921         else if (IS_DG2(i915))
922                 dg2_ctx_workarounds_init(engine, wal);
923         else if (IS_XEHPSDV(i915))
924                 ; /* noop; none at this time */
925         else if (IS_DG1(i915))
926                 dg1_ctx_workarounds_init(engine, wal);
927         else if (GRAPHICS_VER(i915) == 12)
928                 gen12_ctx_workarounds_init(engine, wal);
929         else if (GRAPHICS_VER(i915) == 11)
930                 icl_ctx_workarounds_init(engine, wal);
931         else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
932                 cfl_ctx_workarounds_init(engine, wal);
933         else if (IS_GEMINILAKE(i915))
934                 glk_ctx_workarounds_init(engine, wal);
935         else if (IS_KABYLAKE(i915))
936                 kbl_ctx_workarounds_init(engine, wal);
937         else if (IS_BROXTON(i915))
938                 bxt_ctx_workarounds_init(engine, wal);
939         else if (IS_SKYLAKE(i915))
940                 skl_ctx_workarounds_init(engine, wal);
941         else if (IS_CHERRYVIEW(i915))
942                 chv_ctx_workarounds_init(engine, wal);
943         else if (IS_BROADWELL(i915))
944                 bdw_ctx_workarounds_init(engine, wal);
945         else if (GRAPHICS_VER(i915) == 7)
946                 gen7_ctx_workarounds_init(engine, wal);
947         else if (GRAPHICS_VER(i915) == 6)
948                 gen6_ctx_workarounds_init(engine, wal);
949         else if (GRAPHICS_VER(i915) < 8)
950                 ;
951         else
952                 MISSING_CASE(GRAPHICS_VER(i915));
953
954 done:
955         wa_init_finish(wal);
956 }
957
958 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
959 {
960         __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
961 }
962
963 int intel_engine_emit_ctx_wa(struct i915_request *rq)
964 {
965         struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
966         struct i915_wa *wa;
967         unsigned int i;
968         u32 *cs;
969         int ret;
970
971         if (wal->count == 0)
972                 return 0;
973
974         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
975         if (ret)
976                 return ret;
977
978         cs = intel_ring_begin(rq, (wal->count * 2 + 2));
979         if (IS_ERR(cs))
980                 return PTR_ERR(cs);
981
982         *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
983         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
984                 *cs++ = i915_mmio_reg_offset(wa->reg);
985                 *cs++ = wa->set;
986         }
987         *cs++ = MI_NOOP;
988
989         intel_ring_advance(rq, cs);
990
991         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
992         if (ret)
993                 return ret;
994
995         return 0;
996 }
997
998 static void
999 gen4_gt_workarounds_init(struct intel_gt *gt,
1000                          struct i915_wa_list *wal)
1001 {
1002         /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
1003         wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
1004 }
1005
1006 static void
1007 g4x_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1008 {
1009         gen4_gt_workarounds_init(gt, wal);
1010
1011         /* WaDisableRenderCachePipelinedFlush:g4x,ilk */
1012         wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE);
1013 }
1014
1015 static void
1016 ilk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1017 {
1018         g4x_gt_workarounds_init(gt, wal);
1019
1020         wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED);
1021 }
1022
1023 static void
1024 snb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1025 {
1026 }
1027
1028 static void
1029 ivb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1030 {
1031         /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
1032         wa_masked_dis(wal,
1033                       GEN7_COMMON_SLICE_CHICKEN1,
1034                       GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
1035
1036         /* WaApplyL3ControlAndL3ChickenMode:ivb */
1037         wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
1038         wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
1039
1040         /* WaForceL3Serialization:ivb */
1041         wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
1042 }
1043
1044 static void
1045 vlv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1046 {
1047         /* WaForceL3Serialization:vlv */
1048         wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
1049
1050         /*
1051          * WaIncreaseL3CreditsForVLVB0:vlv
1052          * This is the hardware default actually.
1053          */
1054         wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
1055 }
1056
1057 static void
1058 hsw_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1059 {
1060         /* L3 caching of data atomics doesn't work -- disable it. */
1061         wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
1062
1063         wa_add(wal,
1064                HSW_ROW_CHICKEN3, 0,
1065                _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
1066                0 /* XXX does this reg exist? */, true);
1067
1068         /* WaVSRefCountFullforceMissDisable:hsw */
1069         wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
1070 }
1071
1072 static void
1073 gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
1074 {
1075         const struct sseu_dev_info *sseu = &to_gt(i915)->info.sseu;
1076         unsigned int slice, subslice;
1077         u32 mcr, mcr_mask;
1078
1079         GEM_BUG_ON(GRAPHICS_VER(i915) != 9);
1080
1081         /*
1082          * WaProgramMgsrForCorrectSliceSpecificMmioReads:gen9,glk,kbl,cml
1083          * Before any MMIO read into slice/subslice specific registers, MCR
1084          * packet control register needs to be programmed to point to any
1085          * enabled s/ss pair. Otherwise, incorrect values will be returned.
1086          * This means each subsequent MMIO read will be forwarded to an
1087          * specific s/ss combination, but this is OK since these registers
1088          * are consistent across s/ss in almost all cases. In the rare
1089          * occasions, such as INSTDONE, where this value is dependent
1090          * on s/ss combo, the read should be done with read_subslice_reg.
1091          */
1092         slice = ffs(sseu->slice_mask) - 1;
1093         GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask.hsw));
1094         subslice = ffs(intel_sseu_get_hsw_subslices(sseu, slice));
1095         GEM_BUG_ON(!subslice);
1096         subslice--;
1097
1098         /*
1099          * We use GEN8_MCR..() macros to calculate the |mcr| value for
1100          * Gen9 to address WaProgramMgsrForCorrectSliceSpecificMmioReads
1101          */
1102         mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
1103         mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
1104
1105         drm_dbg(&i915->drm, "MCR slice:%d/subslice:%d = %x\n", slice, subslice, mcr);
1106
1107         wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
1108 }
1109
1110 static void
1111 gen9_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1112 {
1113         struct drm_i915_private *i915 = gt->i915;
1114
1115         /* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml,gen9 */
1116         gen9_wa_init_mcr(i915, wal);
1117
1118         /* WaDisableKillLogic:bxt,skl,kbl */
1119         if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
1120                 wa_write_or(wal,
1121                             GAM_ECOCHK,
1122                             ECOCHK_DIS_TLB);
1123
1124         if (HAS_LLC(i915)) {
1125                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
1126                  *
1127                  * Must match Display Engine. See
1128                  * WaCompressedResourceDisplayNewHashMode.
1129                  */
1130                 wa_write_or(wal,
1131                             MMCD_MISC_CTRL,
1132                             MMCD_PCLA | MMCD_HOTSPOT_EN);
1133         }
1134
1135         /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
1136         wa_write_or(wal,
1137                     GAM_ECOCHK,
1138                     BDW_DISABLE_HDC_INVALIDATION);
1139 }
1140
1141 static void
1142 skl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1143 {
1144         gen9_gt_workarounds_init(gt, wal);
1145
1146         /* WaDisableGafsUnitClkGating:skl */
1147         wa_write_or(wal,
1148                     GEN7_UCGCTL4,
1149                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1150
1151         /* WaInPlaceDecompressionHang:skl */
1152         if (IS_SKL_GRAPHICS_STEP(gt->i915, STEP_A0, STEP_H0))
1153                 wa_write_or(wal,
1154                             GEN9_GAMT_ECO_REG_RW_IA,
1155                             GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1156 }
1157
1158 static void
1159 kbl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1160 {
1161         gen9_gt_workarounds_init(gt, wal);
1162
1163         /* WaDisableDynamicCreditSharing:kbl */
1164         if (IS_KBL_GRAPHICS_STEP(gt->i915, 0, STEP_C0))
1165                 wa_write_or(wal,
1166                             GAMT_CHKN_BIT_REG,
1167                             GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
1168
1169         /* WaDisableGafsUnitClkGating:kbl */
1170         wa_write_or(wal,
1171                     GEN7_UCGCTL4,
1172                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1173
1174         /* WaInPlaceDecompressionHang:kbl */
1175         wa_write_or(wal,
1176                     GEN9_GAMT_ECO_REG_RW_IA,
1177                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1178 }
1179
1180 static void
1181 glk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1182 {
1183         gen9_gt_workarounds_init(gt, wal);
1184 }
1185
1186 static void
1187 cfl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1188 {
1189         gen9_gt_workarounds_init(gt, wal);
1190
1191         /* WaDisableGafsUnitClkGating:cfl */
1192         wa_write_or(wal,
1193                     GEN7_UCGCTL4,
1194                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1195
1196         /* WaInPlaceDecompressionHang:cfl */
1197         wa_write_or(wal,
1198                     GEN9_GAMT_ECO_REG_RW_IA,
1199                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1200 }
1201
1202 static void __set_mcr_steering(struct i915_wa_list *wal,
1203                                i915_reg_t steering_reg,
1204                                unsigned int slice, unsigned int subslice)
1205 {
1206         u32 mcr, mcr_mask;
1207
1208         mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
1209         mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
1210
1211         wa_write_clr_set(wal, steering_reg, mcr_mask, mcr);
1212 }
1213
1214 static void debug_dump_steering(struct intel_gt *gt)
1215 {
1216         struct drm_printer p = drm_debug_printer("MCR Steering:");
1217
1218         if (drm_debug_enabled(DRM_UT_DRIVER))
1219                 intel_gt_mcr_report_steering(&p, gt, false);
1220 }
1221
1222 static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal,
1223                          unsigned int slice, unsigned int subslice)
1224 {
1225         __set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice);
1226
1227         gt->default_steering.groupid = slice;
1228         gt->default_steering.instanceid = subslice;
1229
1230         debug_dump_steering(gt);
1231 }
1232
1233 static void
1234 icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
1235 {
1236         const struct sseu_dev_info *sseu = &gt->info.sseu;
1237         unsigned int subslice;
1238
1239         GEM_BUG_ON(GRAPHICS_VER(gt->i915) < 11);
1240         GEM_BUG_ON(hweight8(sseu->slice_mask) > 1);
1241
1242         /*
1243          * Although a platform may have subslices, we need to always steer
1244          * reads to the lowest instance that isn't fused off.  When Render
1245          * Power Gating is enabled, grabbing forcewake will only power up a
1246          * single subslice (the "minconfig") if there isn't a real workload
1247          * that needs to be run; this means that if we steer register reads to
1248          * one of the higher subslices, we run the risk of reading back 0's or
1249          * random garbage.
1250          */
1251         subslice = __ffs(intel_sseu_get_hsw_subslices(sseu, 0));
1252
1253         /*
1254          * If the subslice we picked above also steers us to a valid L3 bank,
1255          * then we can just rely on the default steering and won't need to
1256          * worry about explicitly re-steering L3BANK reads later.
1257          */
1258         if (gt->info.l3bank_mask & BIT(subslice))
1259                 gt->steering_table[L3BANK] = NULL;
1260
1261         __add_mcr_wa(gt, wal, 0, subslice);
1262 }
1263
1264 static void
1265 xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
1266 {
1267         const struct sseu_dev_info *sseu = &gt->info.sseu;
1268         unsigned long slice, subslice = 0, slice_mask = 0;
1269         u32 lncf_mask = 0;
1270         int i;
1271
1272         /*
1273          * On Xe_HP the steering increases in complexity. There are now several
1274          * more units that require steering and we're not guaranteed to be able
1275          * to find a common setting for all of them. These are:
1276          * - GSLICE (fusable)
1277          * - DSS (sub-unit within gslice; fusable)
1278          * - L3 Bank (fusable)
1279          * - MSLICE (fusable)
1280          * - LNCF (sub-unit within mslice; always present if mslice is present)
1281          *
1282          * We'll do our default/implicit steering based on GSLICE (in the
1283          * sliceid field) and DSS (in the subsliceid field).  If we can
1284          * find overlap between the valid MSLICE and/or LNCF values with
1285          * a suitable GSLICE, then we can just re-use the default value and
1286          * skip and explicit steering at runtime.
1287          *
1288          * We only need to look for overlap between GSLICE/MSLICE/LNCF to find
1289          * a valid sliceid value.  DSS steering is the only type of steering
1290          * that utilizes the 'subsliceid' bits.
1291          *
1292          * Also note that, even though the steering domain is called "GSlice"
1293          * and it is encoded in the register using the gslice format, the spec
1294          * says that the combined (geometry | compute) fuse should be used to
1295          * select the steering.
1296          */
1297
1298         /* Find the potential gslice candidates */
1299         slice_mask = intel_slicemask_from_xehp_dssmask(sseu->subslice_mask,
1300                                                        GEN_DSS_PER_GSLICE);
1301
1302         /*
1303          * Find the potential LNCF candidates.  Either LNCF within a valid
1304          * mslice is fine.
1305          */
1306         for_each_set_bit(i, &gt->info.mslice_mask, GEN12_MAX_MSLICES)
1307                 lncf_mask |= (0x3 << (i * 2));
1308
1309         /*
1310          * Are there any sliceid values that work for both GSLICE and LNCF
1311          * steering?
1312          */
1313         if (slice_mask & lncf_mask) {
1314                 slice_mask &= lncf_mask;
1315                 gt->steering_table[LNCF] = NULL;
1316         }
1317
1318         /* How about sliceid values that also work for MSLICE steering? */
1319         if (slice_mask & gt->info.mslice_mask) {
1320                 slice_mask &= gt->info.mslice_mask;
1321                 gt->steering_table[MSLICE] = NULL;
1322         }
1323
1324         if (IS_XEHPSDV(gt->i915) && slice_mask & BIT(0))
1325                 gt->steering_table[GAM] = NULL;
1326
1327         slice = __ffs(slice_mask);
1328         subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice) %
1329                 GEN_DSS_PER_GSLICE;
1330
1331         __add_mcr_wa(gt, wal, slice, subslice);
1332
1333         /*
1334          * SQIDI ranges are special because they use different steering
1335          * registers than everything else we work with.  On XeHP SDV and
1336          * DG2-G10, any value in the steering registers will work fine since
1337          * all instances are present, but DG2-G11 only has SQIDI instances at
1338          * ID's 2 and 3, so we need to steer to one of those.  For simplicity
1339          * we'll just steer to a hardcoded "2" since that value will work
1340          * everywhere.
1341          */
1342         __set_mcr_steering(wal, MCFG_MCR_SELECTOR, 0, 2);
1343         __set_mcr_steering(wal, SF_MCR_SELECTOR, 0, 2);
1344
1345         /*
1346          * On DG2, GAM registers have a dedicated steering control register
1347          * and must always be programmed to a hardcoded groupid of "1."
1348          */
1349         if (IS_DG2(gt->i915))
1350                 __set_mcr_steering(wal, GAM_MCR_SELECTOR, 1, 0);
1351 }
1352
1353 static void
1354 pvc_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
1355 {
1356         unsigned int dss;
1357
1358         /*
1359          * Setup implicit steering for COMPUTE and DSS ranges to the first
1360          * non-fused-off DSS.  All other types of MCR registers will be
1361          * explicitly steered.
1362          */
1363         dss = intel_sseu_find_first_xehp_dss(&gt->info.sseu, 0, 0);
1364         __add_mcr_wa(gt, wal, dss / GEN_DSS_PER_CSLICE, dss % GEN_DSS_PER_CSLICE);
1365 }
1366
1367 static void
1368 icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1369 {
1370         struct drm_i915_private *i915 = gt->i915;
1371
1372         icl_wa_init_mcr(gt, wal);
1373
1374         /* WaModifyGamTlbPartitioning:icl */
1375         wa_write_clr_set(wal,
1376                          GEN11_GACB_PERF_CTRL,
1377                          GEN11_HASH_CTRL_MASK,
1378                          GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
1379
1380         /* Wa_1405766107:icl
1381          * Formerly known as WaCL2SFHalfMaxAlloc
1382          */
1383         wa_write_or(wal,
1384                     GEN11_LSN_UNSLCVC,
1385                     GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
1386                     GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
1387
1388         /* Wa_220166154:icl
1389          * Formerly known as WaDisCtxReload
1390          */
1391         wa_write_or(wal,
1392                     GEN8_GAMW_ECO_DEV_RW_IA,
1393                     GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
1394
1395         /* Wa_1406463099:icl
1396          * Formerly known as WaGamTlbPendError
1397          */
1398         wa_write_or(wal,
1399                     GAMT_CHKN_BIT_REG,
1400                     GAMT_CHKN_DISABLE_L3_COH_PIPE);
1401
1402         /* Wa_1407352427:icl,ehl */
1403         wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1404                     PSDUNIT_CLKGATE_DIS);
1405
1406         /* Wa_1406680159:icl,ehl */
1407         wa_mcr_write_or(wal,
1408                         GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
1409                         GWUNIT_CLKGATE_DIS);
1410
1411         /* Wa_1607087056:icl,ehl,jsl */
1412         if (IS_ICELAKE(i915) ||
1413             IS_JSL_EHL_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
1414                 wa_write_or(wal,
1415                             GEN11_SLICE_UNIT_LEVEL_CLKGATE,
1416                             L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1417
1418         /*
1419          * This is not a documented workaround, but rather an optimization
1420          * to reduce sampler power.
1421          */
1422         wa_mcr_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
1423 }
1424
1425 /*
1426  * Though there are per-engine instances of these registers,
1427  * they retain their value through engine resets and should
1428  * only be provided on the GT workaround list rather than
1429  * the engine-specific workaround list.
1430  */
1431 static void
1432 wa_14011060649(struct intel_gt *gt, struct i915_wa_list *wal)
1433 {
1434         struct intel_engine_cs *engine;
1435         int id;
1436
1437         for_each_engine(engine, gt, id) {
1438                 if (engine->class != VIDEO_DECODE_CLASS ||
1439                     (engine->instance % 2))
1440                         continue;
1441
1442                 wa_write_or(wal, VDBOX_CGCTL3F10(engine->mmio_base),
1443                             IECPUNIT_CLKGATE_DIS);
1444         }
1445 }
1446
1447 static void
1448 gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1449 {
1450         icl_wa_init_mcr(gt, wal);
1451
1452         /* Wa_14011060649:tgl,rkl,dg1,adl-s,adl-p */
1453         wa_14011060649(gt, wal);
1454
1455         /* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */
1456         wa_mcr_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
1457 }
1458
1459 static void
1460 tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1461 {
1462         struct drm_i915_private *i915 = gt->i915;
1463
1464         gen12_gt_workarounds_init(gt, wal);
1465
1466         /* Wa_1409420604:tgl */
1467         if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
1468                 wa_mcr_write_or(wal,
1469                                 SUBSLICE_UNIT_LEVEL_CLKGATE2,
1470                                 CPSSUNIT_CLKGATE_DIS);
1471
1472         /* Wa_1607087056:tgl also know as BUG:1409180338 */
1473         if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
1474                 wa_write_or(wal,
1475                             GEN11_SLICE_UNIT_LEVEL_CLKGATE,
1476                             L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1477
1478         /* Wa_1408615072:tgl[a0] */
1479         if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
1480                 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1481                             VSUNIT_CLKGATE_DIS_TGL);
1482 }
1483
1484 static void
1485 dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1486 {
1487         struct drm_i915_private *i915 = gt->i915;
1488
1489         gen12_gt_workarounds_init(gt, wal);
1490
1491         /* Wa_1607087056:dg1 */
1492         if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
1493                 wa_write_or(wal,
1494                             GEN11_SLICE_UNIT_LEVEL_CLKGATE,
1495                             L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1496
1497         /* Wa_1409420604:dg1 */
1498         if (IS_DG1(i915))
1499                 wa_mcr_write_or(wal,
1500                                 SUBSLICE_UNIT_LEVEL_CLKGATE2,
1501                                 CPSSUNIT_CLKGATE_DIS);
1502
1503         /* Wa_1408615072:dg1 */
1504         /* Empirical testing shows this register is unaffected by engine reset. */
1505         if (IS_DG1(i915))
1506                 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1507                             VSUNIT_CLKGATE_DIS_TGL);
1508 }
1509
1510 static void
1511 xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1512 {
1513         struct drm_i915_private *i915 = gt->i915;
1514
1515         xehp_init_mcr(gt, wal);
1516
1517         /* Wa_1409757795:xehpsdv */
1518         wa_mcr_write_or(wal, SCCGCTL94DC, CG3DDISURB);
1519
1520         /* Wa_16011155590:xehpsdv */
1521         if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
1522                 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
1523                             TSGUNIT_CLKGATE_DIS);
1524
1525         /* Wa_14011780169:xehpsdv */
1526         if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_B0, STEP_FOREVER)) {
1527                 wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
1528                             GAMTLBVDBOX7_CLKGATE_DIS |
1529                             GAMTLBVDBOX6_CLKGATE_DIS |
1530                             GAMTLBVDBOX5_CLKGATE_DIS |
1531                             GAMTLBVDBOX4_CLKGATE_DIS |
1532                             GAMTLBVDBOX3_CLKGATE_DIS |
1533                             GAMTLBVDBOX2_CLKGATE_DIS |
1534                             GAMTLBVDBOX1_CLKGATE_DIS |
1535                             GAMTLBVDBOX0_CLKGATE_DIS |
1536                             GAMTLBKCR_CLKGATE_DIS |
1537                             GAMTLBGUC_CLKGATE_DIS |
1538                             GAMTLBBLT_CLKGATE_DIS);
1539                 wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
1540                             GAMTLBGFXA1_CLKGATE_DIS |
1541                             GAMTLBCOMPA0_CLKGATE_DIS |
1542                             GAMTLBCOMPA1_CLKGATE_DIS |
1543                             GAMTLBCOMPB0_CLKGATE_DIS |
1544                             GAMTLBCOMPB1_CLKGATE_DIS |
1545                             GAMTLBCOMPC0_CLKGATE_DIS |
1546                             GAMTLBCOMPC1_CLKGATE_DIS |
1547                             GAMTLBCOMPD0_CLKGATE_DIS |
1548                             GAMTLBCOMPD1_CLKGATE_DIS |
1549                             GAMTLBMERT_CLKGATE_DIS   |
1550                             GAMTLBVEBOX3_CLKGATE_DIS |
1551                             GAMTLBVEBOX2_CLKGATE_DIS |
1552                             GAMTLBVEBOX1_CLKGATE_DIS |
1553                             GAMTLBVEBOX0_CLKGATE_DIS);
1554         }
1555
1556         /* Wa_16012725990:xehpsdv */
1557         if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_FOREVER))
1558                 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VFUNIT_CLKGATE_DIS);
1559
1560         /* Wa_14011060649:xehpsdv */
1561         wa_14011060649(gt, wal);
1562 }
1563
1564 static void
1565 dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1566 {
1567         struct intel_engine_cs *engine;
1568         int id;
1569
1570         xehp_init_mcr(gt, wal);
1571
1572         /* Wa_14011060649:dg2 */
1573         wa_14011060649(gt, wal);
1574
1575         /*
1576          * Although there are per-engine instances of these registers,
1577          * they technically exist outside the engine itself and are not
1578          * impacted by engine resets.  Furthermore, they're part of the
1579          * GuC blacklist so trying to treat them as engine workarounds
1580          * will result in GuC initialization failure and a wedged GPU.
1581          */
1582         for_each_engine(engine, gt, id) {
1583                 if (engine->class != VIDEO_DECODE_CLASS)
1584                         continue;
1585
1586                 /* Wa_16010515920:dg2_g10 */
1587                 if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
1588                         wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base),
1589                                     ALNUNIT_CLKGATE_DIS);
1590         }
1591
1592         if (IS_DG2_G10(gt->i915)) {
1593                 /* Wa_22010523718:dg2 */
1594                 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
1595                             CG3DDISCFEG_CLKGATE_DIS);
1596
1597                 /* Wa_14011006942:dg2 */
1598                 wa_mcr_write_or(wal, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
1599                                 DSS_ROUTER_CLKGATE_DIS);
1600         }
1601
1602         if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
1603                 /* Wa_14010948348:dg2_g10 */
1604                 wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS);
1605
1606                 /* Wa_14011037102:dg2_g10 */
1607                 wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS);
1608
1609                 /* Wa_14011371254:dg2_g10 */
1610                 wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
1611
1612                 /* Wa_14011431319:dg2_g10 */
1613                 wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
1614                             GAMTLBVDBOX7_CLKGATE_DIS |
1615                             GAMTLBVDBOX6_CLKGATE_DIS |
1616                             GAMTLBVDBOX5_CLKGATE_DIS |
1617                             GAMTLBVDBOX4_CLKGATE_DIS |
1618                             GAMTLBVDBOX3_CLKGATE_DIS |
1619                             GAMTLBVDBOX2_CLKGATE_DIS |
1620                             GAMTLBVDBOX1_CLKGATE_DIS |
1621                             GAMTLBVDBOX0_CLKGATE_DIS |
1622                             GAMTLBKCR_CLKGATE_DIS |
1623                             GAMTLBGUC_CLKGATE_DIS |
1624                             GAMTLBBLT_CLKGATE_DIS);
1625                 wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
1626                             GAMTLBGFXA1_CLKGATE_DIS |
1627                             GAMTLBCOMPA0_CLKGATE_DIS |
1628                             GAMTLBCOMPA1_CLKGATE_DIS |
1629                             GAMTLBCOMPB0_CLKGATE_DIS |
1630                             GAMTLBCOMPB1_CLKGATE_DIS |
1631                             GAMTLBCOMPC0_CLKGATE_DIS |
1632                             GAMTLBCOMPC1_CLKGATE_DIS |
1633                             GAMTLBCOMPD0_CLKGATE_DIS |
1634                             GAMTLBCOMPD1_CLKGATE_DIS |
1635                             GAMTLBMERT_CLKGATE_DIS   |
1636                             GAMTLBVEBOX3_CLKGATE_DIS |
1637                             GAMTLBVEBOX2_CLKGATE_DIS |
1638                             GAMTLBVEBOX1_CLKGATE_DIS |
1639                             GAMTLBVEBOX0_CLKGATE_DIS);
1640
1641                 /* Wa_14010569222:dg2_g10 */
1642                 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
1643                             GAMEDIA_CLKGATE_DIS);
1644
1645                 /* Wa_14011028019:dg2_g10 */
1646                 wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
1647         }
1648
1649         /* Wa_14014830051:dg2 */
1650         wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
1651
1652         /*
1653          * The following are not actually "workarounds" but rather
1654          * recommended tuning settings documented in the bspec's
1655          * performance guide section.
1656          */
1657         wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
1658
1659         /* Wa_14015795083 */
1660         wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
1661 }
1662
1663 static void
1664 pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1665 {
1666         pvc_init_mcr(gt, wal);
1667
1668         /* Wa_14015795083 */
1669         wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
1670 }
1671
1672 static void
1673 xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1674 {
1675         /* Wa_14014830051 */
1676         if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
1677             IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0))
1678                 wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
1679
1680         /*
1681          * Unlike older platforms, we no longer setup implicit steering here;
1682          * all MCR accesses are explicitly steered.
1683          */
1684         debug_dump_steering(gt);
1685 }
1686
1687 static void
1688 xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1689 {
1690         /* FIXME: Actual workarounds will be added in future patch(es) */
1691
1692         debug_dump_steering(gt);
1693 }
1694
1695 static void
1696 gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
1697 {
1698         struct drm_i915_private *i915 = gt->i915;
1699
1700         if (gt->type == GT_MEDIA) {
1701                 if (MEDIA_VER(i915) >= 13)
1702                         xelpmp_gt_workarounds_init(gt, wal);
1703                 else
1704                         MISSING_CASE(MEDIA_VER(i915));
1705
1706                 return;
1707         }
1708
1709         if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
1710                 xelpg_gt_workarounds_init(gt, wal);
1711         else if (IS_PONTEVECCHIO(i915))
1712                 pvc_gt_workarounds_init(gt, wal);
1713         else if (IS_DG2(i915))
1714                 dg2_gt_workarounds_init(gt, wal);
1715         else if (IS_XEHPSDV(i915))
1716                 xehpsdv_gt_workarounds_init(gt, wal);
1717         else if (IS_DG1(i915))
1718                 dg1_gt_workarounds_init(gt, wal);
1719         else if (IS_TIGERLAKE(i915))
1720                 tgl_gt_workarounds_init(gt, wal);
1721         else if (GRAPHICS_VER(i915) == 12)
1722                 gen12_gt_workarounds_init(gt, wal);
1723         else if (GRAPHICS_VER(i915) == 11)
1724                 icl_gt_workarounds_init(gt, wal);
1725         else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
1726                 cfl_gt_workarounds_init(gt, wal);
1727         else if (IS_GEMINILAKE(i915))
1728                 glk_gt_workarounds_init(gt, wal);
1729         else if (IS_KABYLAKE(i915))
1730                 kbl_gt_workarounds_init(gt, wal);
1731         else if (IS_BROXTON(i915))
1732                 gen9_gt_workarounds_init(gt, wal);
1733         else if (IS_SKYLAKE(i915))
1734                 skl_gt_workarounds_init(gt, wal);
1735         else if (IS_HASWELL(i915))
1736                 hsw_gt_workarounds_init(gt, wal);
1737         else if (IS_VALLEYVIEW(i915))
1738                 vlv_gt_workarounds_init(gt, wal);
1739         else if (IS_IVYBRIDGE(i915))
1740                 ivb_gt_workarounds_init(gt, wal);
1741         else if (GRAPHICS_VER(i915) == 6)
1742                 snb_gt_workarounds_init(gt, wal);
1743         else if (GRAPHICS_VER(i915) == 5)
1744                 ilk_gt_workarounds_init(gt, wal);
1745         else if (IS_G4X(i915))
1746                 g4x_gt_workarounds_init(gt, wal);
1747         else if (GRAPHICS_VER(i915) == 4)
1748                 gen4_gt_workarounds_init(gt, wal);
1749         else if (GRAPHICS_VER(i915) <= 8)
1750                 ;
1751         else
1752                 MISSING_CASE(GRAPHICS_VER(i915));
1753 }
1754
1755 void intel_gt_init_workarounds(struct intel_gt *gt)
1756 {
1757         struct i915_wa_list *wal = &gt->wa_list;
1758
1759         wa_init_start(wal, gt, "GT", "global");
1760         gt_init_workarounds(gt, wal);
1761         wa_init_finish(wal);
1762 }
1763
1764 static enum forcewake_domains
1765 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
1766 {
1767         enum forcewake_domains fw = 0;
1768         struct i915_wa *wa;
1769         unsigned int i;
1770
1771         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1772                 fw |= intel_uncore_forcewake_for_reg(uncore,
1773                                                      wa->reg,
1774                                                      FW_REG_READ |
1775                                                      FW_REG_WRITE);
1776
1777         return fw;
1778 }
1779
1780 static bool
1781 wa_verify(struct intel_gt *gt, const struct i915_wa *wa, u32 cur,
1782           const char *name, const char *from)
1783 {
1784         if ((cur ^ wa->set) & wa->read) {
1785                 drm_err(&gt->i915->drm,
1786                         "%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n",
1787                         name, from, i915_mmio_reg_offset(wa->reg),
1788                         cur, cur & wa->read, wa->set & wa->read);
1789
1790                 return false;
1791         }
1792
1793         return true;
1794 }
1795
1796 static void wa_list_apply(const struct i915_wa_list *wal)
1797 {
1798         struct intel_gt *gt = wal->gt;
1799         struct intel_uncore *uncore = gt->uncore;
1800         enum forcewake_domains fw;
1801         unsigned long flags;
1802         struct i915_wa *wa;
1803         unsigned int i;
1804
1805         if (!wal->count)
1806                 return;
1807
1808         fw = wal_get_fw_for_rmw(uncore, wal);
1809
1810         intel_gt_mcr_lock(gt, &flags);
1811         spin_lock(&uncore->lock);
1812         intel_uncore_forcewake_get__locked(uncore, fw);
1813
1814         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1815                 u32 val, old = 0;
1816
1817                 /* open-coded rmw due to steering */
1818                 if (wa->clr)
1819                         old = wa->is_mcr ?
1820                                 intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
1821                                 intel_uncore_read_fw(uncore, wa->reg);
1822                 val = (old & ~wa->clr) | wa->set;
1823                 if (val != old || !wa->clr) {
1824                         if (wa->is_mcr)
1825                                 intel_gt_mcr_multicast_write_fw(gt, wa->mcr_reg, val);
1826                         else
1827                                 intel_uncore_write_fw(uncore, wa->reg, val);
1828                 }
1829
1830                 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
1831                         u32 val = wa->is_mcr ?
1832                                 intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
1833                                 intel_uncore_read_fw(uncore, wa->reg);
1834
1835                         wa_verify(gt, wa, val, wal->name, "application");
1836                 }
1837         }
1838
1839         intel_uncore_forcewake_put__locked(uncore, fw);
1840         spin_unlock(&uncore->lock);
1841         intel_gt_mcr_unlock(gt, flags);
1842 }
1843
1844 void intel_gt_apply_workarounds(struct intel_gt *gt)
1845 {
1846         wa_list_apply(&gt->wa_list);
1847 }
1848
1849 static bool wa_list_verify(struct intel_gt *gt,
1850                            const struct i915_wa_list *wal,
1851                            const char *from)
1852 {
1853         struct intel_uncore *uncore = gt->uncore;
1854         struct i915_wa *wa;
1855         enum forcewake_domains fw;
1856         unsigned long flags;
1857         unsigned int i;
1858         bool ok = true;
1859
1860         fw = wal_get_fw_for_rmw(uncore, wal);
1861
1862         intel_gt_mcr_lock(gt, &flags);
1863         spin_lock(&uncore->lock);
1864         intel_uncore_forcewake_get__locked(uncore, fw);
1865
1866         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1867                 ok &= wa_verify(wal->gt, wa, wa->is_mcr ?
1868                                 intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
1869                                 intel_uncore_read_fw(uncore, wa->reg),
1870                                 wal->name, from);
1871
1872         intel_uncore_forcewake_put__locked(uncore, fw);
1873         spin_unlock(&uncore->lock);
1874         intel_gt_mcr_unlock(gt, flags);
1875
1876         return ok;
1877 }
1878
1879 bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
1880 {
1881         return wa_list_verify(gt, &gt->wa_list, from);
1882 }
1883
1884 __maybe_unused
1885 static bool is_nonpriv_flags_valid(u32 flags)
1886 {
1887         /* Check only valid flag bits are set */
1888         if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID)
1889                 return false;
1890
1891         /* NB: Only 3 out of 4 enum values are valid for access field */
1892         if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
1893             RING_FORCE_TO_NONPRIV_ACCESS_INVALID)
1894                 return false;
1895
1896         return true;
1897 }
1898
1899 static void
1900 whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1901 {
1902         struct i915_wa wa = {
1903                 .reg = reg
1904         };
1905
1906         if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1907                 return;
1908
1909         if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
1910                 return;
1911
1912         wa.reg.reg |= flags;
1913         _wa_add(wal, &wa);
1914 }
1915
1916 static void
1917 whitelist_mcr_reg_ext(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 flags)
1918 {
1919         struct i915_wa wa = {
1920                 .mcr_reg = reg,
1921                 .is_mcr = 1,
1922         };
1923
1924         if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1925                 return;
1926
1927         if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
1928                 return;
1929
1930         wa.mcr_reg.reg |= flags;
1931         _wa_add(wal, &wa);
1932 }
1933
1934 static void
1935 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1936 {
1937         whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
1938 }
1939
1940 static void
1941 whitelist_mcr_reg(struct i915_wa_list *wal, i915_mcr_reg_t reg)
1942 {
1943         whitelist_mcr_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
1944 }
1945
1946 static void gen9_whitelist_build(struct i915_wa_list *w)
1947 {
1948         /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1949         whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1950
1951         /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1952         whitelist_reg(w, GEN8_CS_CHICKEN1);
1953
1954         /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1955         whitelist_reg(w, GEN8_HDC_CHICKEN1);
1956
1957         /* WaSendPushConstantsFromMMIO:skl,bxt */
1958         whitelist_reg(w, COMMON_SLICE_CHICKEN2);
1959 }
1960
1961 static void skl_whitelist_build(struct intel_engine_cs *engine)
1962 {
1963         struct i915_wa_list *w = &engine->whitelist;
1964
1965         if (engine->class != RENDER_CLASS)
1966                 return;
1967
1968         gen9_whitelist_build(w);
1969
1970         /* WaDisableLSQCROPERFforOCL:skl */
1971         whitelist_mcr_reg(w, GEN8_L3SQCREG4);
1972 }
1973
1974 static void bxt_whitelist_build(struct intel_engine_cs *engine)
1975 {
1976         if (engine->class != RENDER_CLASS)
1977                 return;
1978
1979         gen9_whitelist_build(&engine->whitelist);
1980 }
1981
1982 static void kbl_whitelist_build(struct intel_engine_cs *engine)
1983 {
1984         struct i915_wa_list *w = &engine->whitelist;
1985
1986         if (engine->class != RENDER_CLASS)
1987                 return;
1988
1989         gen9_whitelist_build(w);
1990
1991         /* WaDisableLSQCROPERFforOCL:kbl */
1992         whitelist_mcr_reg(w, GEN8_L3SQCREG4);
1993 }
1994
1995 static void glk_whitelist_build(struct intel_engine_cs *engine)
1996 {
1997         struct i915_wa_list *w = &engine->whitelist;
1998
1999         if (engine->class != RENDER_CLASS)
2000                 return;
2001
2002         gen9_whitelist_build(w);
2003
2004         /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
2005         whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
2006 }
2007
2008 static void cfl_whitelist_build(struct intel_engine_cs *engine)
2009 {
2010         struct i915_wa_list *w = &engine->whitelist;
2011
2012         if (engine->class != RENDER_CLASS)
2013                 return;
2014
2015         gen9_whitelist_build(w);
2016
2017         /*
2018          * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
2019          *
2020          * This covers 4 register which are next to one another :
2021          *   - PS_INVOCATION_COUNT
2022          *   - PS_INVOCATION_COUNT_UDW
2023          *   - PS_DEPTH_COUNT
2024          *   - PS_DEPTH_COUNT_UDW
2025          */
2026         whitelist_reg_ext(w, PS_INVOCATION_COUNT,
2027                           RING_FORCE_TO_NONPRIV_ACCESS_RD |
2028                           RING_FORCE_TO_NONPRIV_RANGE_4);
2029 }
2030
2031 static void allow_read_ctx_timestamp(struct intel_engine_cs *engine)
2032 {
2033         struct i915_wa_list *w = &engine->whitelist;
2034
2035         if (engine->class != RENDER_CLASS)
2036                 whitelist_reg_ext(w,
2037                                   RING_CTX_TIMESTAMP(engine->mmio_base),
2038                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
2039 }
2040
2041 static void cml_whitelist_build(struct intel_engine_cs *engine)
2042 {
2043         allow_read_ctx_timestamp(engine);
2044
2045         cfl_whitelist_build(engine);
2046 }
2047
2048 static void icl_whitelist_build(struct intel_engine_cs *engine)
2049 {
2050         struct i915_wa_list *w = &engine->whitelist;
2051
2052         allow_read_ctx_timestamp(engine);
2053
2054         switch (engine->class) {
2055         case RENDER_CLASS:
2056                 /* WaAllowUMDToModifyHalfSliceChicken7:icl */
2057                 whitelist_mcr_reg(w, GEN9_HALF_SLICE_CHICKEN7);
2058
2059                 /* WaAllowUMDToModifySamplerMode:icl */
2060                 whitelist_mcr_reg(w, GEN10_SAMPLER_MODE);
2061
2062                 /* WaEnableStateCacheRedirectToCS:icl */
2063                 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
2064
2065                 /*
2066                  * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
2067                  *
2068                  * This covers 4 register which are next to one another :
2069                  *   - PS_INVOCATION_COUNT
2070                  *   - PS_INVOCATION_COUNT_UDW
2071                  *   - PS_DEPTH_COUNT
2072                  *   - PS_DEPTH_COUNT_UDW
2073                  */
2074                 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
2075                                   RING_FORCE_TO_NONPRIV_ACCESS_RD |
2076                                   RING_FORCE_TO_NONPRIV_RANGE_4);
2077                 break;
2078
2079         case VIDEO_DECODE_CLASS:
2080                 /* hucStatusRegOffset */
2081                 whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
2082                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
2083                 /* hucUKernelHdrInfoRegOffset */
2084                 whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
2085                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
2086                 /* hucStatus2RegOffset */
2087                 whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
2088                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
2089                 break;
2090
2091         default:
2092                 break;
2093         }
2094 }
2095
2096 static void tgl_whitelist_build(struct intel_engine_cs *engine)
2097 {
2098         struct i915_wa_list *w = &engine->whitelist;
2099
2100         allow_read_ctx_timestamp(engine);
2101
2102         switch (engine->class) {
2103         case RENDER_CLASS:
2104                 /*
2105                  * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
2106                  * Wa_1408556865:tgl
2107                  *
2108                  * This covers 4 registers which are next to one another :
2109                  *   - PS_INVOCATION_COUNT
2110                  *   - PS_INVOCATION_COUNT_UDW
2111                  *   - PS_DEPTH_COUNT
2112                  *   - PS_DEPTH_COUNT_UDW
2113                  */
2114                 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
2115                                   RING_FORCE_TO_NONPRIV_ACCESS_RD |
2116                                   RING_FORCE_TO_NONPRIV_RANGE_4);
2117
2118                 /*
2119                  * Wa_1808121037:tgl
2120                  * Wa_14012131227:dg1
2121                  * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p
2122                  */
2123                 whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
2124
2125                 /* Wa_1806527549:tgl */
2126                 whitelist_reg(w, HIZ_CHICKEN);
2127                 break;
2128         default:
2129                 break;
2130         }
2131 }
2132
2133 static void dg1_whitelist_build(struct intel_engine_cs *engine)
2134 {
2135         struct i915_wa_list *w = &engine->whitelist;
2136
2137         tgl_whitelist_build(engine);
2138
2139         /* GEN:BUG:1409280441:dg1 */
2140         if (IS_DG1_GRAPHICS_STEP(engine->i915, STEP_A0, STEP_B0) &&
2141             (engine->class == RENDER_CLASS ||
2142              engine->class == COPY_ENGINE_CLASS))
2143                 whitelist_reg_ext(w, RING_ID(engine->mmio_base),
2144                                   RING_FORCE_TO_NONPRIV_ACCESS_RD);
2145 }
2146
2147 static void xehpsdv_whitelist_build(struct intel_engine_cs *engine)
2148 {
2149         allow_read_ctx_timestamp(engine);
2150 }
2151
2152 static void dg2_whitelist_build(struct intel_engine_cs *engine)
2153 {
2154         struct i915_wa_list *w = &engine->whitelist;
2155
2156         allow_read_ctx_timestamp(engine);
2157
2158         switch (engine->class) {
2159         case RENDER_CLASS:
2160                 /*
2161                  * Wa_1507100340:dg2_g10
2162                  *
2163                  * This covers 4 registers which are next to one another :
2164                  *   - PS_INVOCATION_COUNT
2165                  *   - PS_INVOCATION_COUNT_UDW
2166                  *   - PS_DEPTH_COUNT
2167                  *   - PS_DEPTH_COUNT_UDW
2168                  */
2169                 if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
2170                         whitelist_reg_ext(w, PS_INVOCATION_COUNT,
2171                                           RING_FORCE_TO_NONPRIV_ACCESS_RD |
2172                                           RING_FORCE_TO_NONPRIV_RANGE_4);
2173
2174                 break;
2175         case COMPUTE_CLASS:
2176                 /* Wa_16011157294:dg2_g10 */
2177                 if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
2178                         whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
2179                 break;
2180         default:
2181                 break;
2182         }
2183 }
2184
2185 static void blacklist_trtt(struct intel_engine_cs *engine)
2186 {
2187         struct i915_wa_list *w = &engine->whitelist;
2188
2189         /*
2190          * Prevent read/write access to [0x4400, 0x4600) which covers
2191          * the TRTT range across all engines. Note that normally userspace
2192          * cannot access the other engines' trtt control, but for simplicity
2193          * we cover the entire range on each engine.
2194          */
2195         whitelist_reg_ext(w, _MMIO(0x4400),
2196                           RING_FORCE_TO_NONPRIV_DENY |
2197                           RING_FORCE_TO_NONPRIV_RANGE_64);
2198         whitelist_reg_ext(w, _MMIO(0x4500),
2199                           RING_FORCE_TO_NONPRIV_DENY |
2200                           RING_FORCE_TO_NONPRIV_RANGE_64);
2201 }
2202
2203 static void pvc_whitelist_build(struct intel_engine_cs *engine)
2204 {
2205         allow_read_ctx_timestamp(engine);
2206
2207         /* Wa_16014440446:pvc */
2208         blacklist_trtt(engine);
2209 }
2210
2211 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
2212 {
2213         struct drm_i915_private *i915 = engine->i915;
2214         struct i915_wa_list *w = &engine->whitelist;
2215
2216         wa_init_start(w, engine->gt, "whitelist", engine->name);
2217
2218         if (IS_METEORLAKE(i915))
2219                 ; /* noop; none at this time */
2220         else if (IS_PONTEVECCHIO(i915))
2221                 pvc_whitelist_build(engine);
2222         else if (IS_DG2(i915))
2223                 dg2_whitelist_build(engine);
2224         else if (IS_XEHPSDV(i915))
2225                 xehpsdv_whitelist_build(engine);
2226         else if (IS_DG1(i915))
2227                 dg1_whitelist_build(engine);
2228         else if (GRAPHICS_VER(i915) == 12)
2229                 tgl_whitelist_build(engine);
2230         else if (GRAPHICS_VER(i915) == 11)
2231                 icl_whitelist_build(engine);
2232         else if (IS_COMETLAKE(i915))
2233                 cml_whitelist_build(engine);
2234         else if (IS_COFFEELAKE(i915))
2235                 cfl_whitelist_build(engine);
2236         else if (IS_GEMINILAKE(i915))
2237                 glk_whitelist_build(engine);
2238         else if (IS_KABYLAKE(i915))
2239                 kbl_whitelist_build(engine);
2240         else if (IS_BROXTON(i915))
2241                 bxt_whitelist_build(engine);
2242         else if (IS_SKYLAKE(i915))
2243                 skl_whitelist_build(engine);
2244         else if (GRAPHICS_VER(i915) <= 8)
2245                 ;
2246         else
2247                 MISSING_CASE(GRAPHICS_VER(i915));
2248
2249         wa_init_finish(w);
2250 }
2251
2252 void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
2253 {
2254         const struct i915_wa_list *wal = &engine->whitelist;
2255         struct intel_uncore *uncore = engine->uncore;
2256         const u32 base = engine->mmio_base;
2257         struct i915_wa *wa;
2258         unsigned int i;
2259
2260         if (!wal->count)
2261                 return;
2262
2263         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
2264                 intel_uncore_write(uncore,
2265                                    RING_FORCE_TO_NONPRIV(base, i),
2266                                    i915_mmio_reg_offset(wa->reg));
2267
2268         /* And clear the rest just in case of garbage */
2269         for (; i < RING_MAX_NONPRIV_SLOTS; i++)
2270                 intel_uncore_write(uncore,
2271                                    RING_FORCE_TO_NONPRIV(base, i),
2272                                    i915_mmio_reg_offset(RING_NOPID(base)));
2273 }
2274
2275 /*
2276  * engine_fake_wa_init(), a place holder to program the registers
2277  * which are not part of an official workaround defined by the
2278  * hardware team.
2279  * Adding programming of those register inside workaround will
2280  * allow utilizing wa framework to proper application and verification.
2281  */
2282 static void
2283 engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2284 {
2285         u8 mocs_w, mocs_r;
2286
2287         /*
2288          * RING_CMD_CCTL specifies the default MOCS entry that will be used
2289          * by the command streamer when executing commands that don't have
2290          * a way to explicitly specify a MOCS setting.  The default should
2291          * usually reference whichever MOCS entry corresponds to uncached
2292          * behavior, although use of a WB cached entry is recommended by the
2293          * spec in certain circumstances on specific platforms.
2294          */
2295         if (GRAPHICS_VER(engine->i915) >= 12) {
2296                 mocs_r = engine->gt->mocs.uc_index;
2297                 mocs_w = engine->gt->mocs.uc_index;
2298
2299                 if (HAS_L3_CCS_READ(engine->i915) &&
2300                     engine->class == COMPUTE_CLASS) {
2301                         mocs_r = engine->gt->mocs.wb_index;
2302
2303                         /*
2304                          * Even on the few platforms where MOCS 0 is a
2305                          * legitimate table entry, it's never the correct
2306                          * setting to use here; we can assume the MOCS init
2307                          * just forgot to initialize wb_index.
2308                          */
2309                         drm_WARN_ON(&engine->i915->drm, mocs_r == 0);
2310                 }
2311
2312                 wa_masked_field_set(wal,
2313                                     RING_CMD_CCTL(engine->mmio_base),
2314                                     CMD_CCTL_MOCS_MASK,
2315                                     CMD_CCTL_MOCS_OVERRIDE(mocs_w, mocs_r));
2316         }
2317 }
2318
2319 static bool needs_wa_1308578152(struct intel_engine_cs *engine)
2320 {
2321         return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >=
2322                 GEN_DSS_PER_GSLICE;
2323 }
2324
2325 static void
2326 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2327 {
2328         struct drm_i915_private *i915 = engine->i915;
2329
2330         if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
2331             IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) {
2332                 /* Wa_22014600077 */
2333                 wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
2334                                  ENABLE_EU_COUNT_FOR_TDL_FLUSH);
2335         }
2336
2337         if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
2338             IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
2339             IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
2340             IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
2341                 /* Wa_1509727124 */
2342                 wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
2343                                  SC_DISABLE_POWER_OPTIMIZATION_EBB);
2344         }
2345
2346         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
2347             IS_DG2_G11(i915) || IS_DG2_G12(i915) ||
2348             IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0)) {
2349                 /* Wa_22012856258 */
2350                 wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
2351                                  GEN12_DISABLE_READ_SUPPRESSION);
2352         }
2353
2354         if (IS_DG2(i915)) {
2355                 /* Wa_1509235366:dg2 */
2356                 wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
2357                             GLOBAL_INVALIDATION_MODE);
2358         }
2359
2360         if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
2361                 /* Wa_14013392000:dg2_g11 */
2362                 wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
2363         }
2364
2365         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) ||
2366             IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
2367                 /* Wa_14012419201:dg2 */
2368                 wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4,
2369                                  GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
2370         }
2371
2372         /* Wa_1308578152:dg2_g10 when first gslice is fused off */
2373         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) &&
2374             needs_wa_1308578152(engine)) {
2375                 wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON,
2376                               GEN12_REPLAY_MODE_GRANULARITY);
2377         }
2378
2379         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
2380             IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
2381                 /*
2382                  * Wa_22010960976:dg2
2383                  * Wa_14013347512:dg2
2384                  */
2385                 wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0,
2386                                   LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
2387         }
2388
2389         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
2390                 /*
2391                  * Wa_1608949956:dg2_g10
2392                  * Wa_14010198302:dg2_g10
2393                  */
2394                 wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
2395                                  MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
2396         }
2397
2398         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
2399                 /* Wa_22010430635:dg2 */
2400                 wa_mcr_masked_en(wal,
2401                                  GEN9_ROW_CHICKEN4,
2402                                  GEN12_DISABLE_GRF_CLEAR);
2403
2404                 /* Wa_14010648519:dg2 */
2405                 wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
2406         }
2407
2408         /* Wa_14013202645:dg2 */
2409         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
2410             IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
2411                 wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
2412
2413         /* Wa_22012532006:dg2 */
2414         if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
2415             IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
2416                 wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
2417                                  DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
2418
2419         if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
2420                 /* Wa_14010680813:dg2_g10 */
2421                 wa_write_or(wal, GEN12_GAMSTLB_CTRL, CONTROL_BLOCK_CLKGATE_DIS |
2422                             EGRESS_BLOCK_CLKGATE_DIS | TAG_BLOCK_CLKGATE_DIS);
2423         }
2424
2425         if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) ||
2426             IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
2427                 /* Wa_14012362059:dg2 */
2428                 wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
2429         }
2430
2431         if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
2432             IS_DG2_G10(i915)) {
2433                 /* Wa_22014600077:dg2 */
2434                 wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
2435                            _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
2436                            0 /* Wa_14012342262 write-only reg, so skip verification */,
2437                            true);
2438         }
2439
2440         if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
2441             IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) {
2442                 /*
2443                  * Wa_1607138336:tgl[a0],dg1[a0]
2444                  * Wa_1607063988:tgl[a0],dg1[a0]
2445                  */
2446                 wa_write_or(wal,
2447                             GEN9_CTX_PREEMPT_REG,
2448                             GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
2449         }
2450
2451         if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) {
2452                 /*
2453                  * Wa_1606679103:tgl
2454                  * (see also Wa_1606682166:icl)
2455                  */
2456                 wa_write_or(wal,
2457                             GEN7_SARCHKMD,
2458                             GEN7_DISABLE_SAMPLER_PREFETCH);
2459         }
2460
2461         if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
2462             IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
2463                 /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
2464                 wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
2465
2466                 /*
2467                  * Wa_1407928979:tgl A*
2468                  * Wa_18011464164:tgl[B0+],dg1[B0+]
2469                  * Wa_22010931296:tgl[B0+],dg1[B0+]
2470                  * Wa_14010919138:rkl,dg1,adl-s,adl-p
2471                  */
2472                 wa_write_or(wal, GEN7_FF_THREAD_MODE,
2473                             GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
2474         }
2475
2476         if (IS_ALDERLAKE_P(i915) || IS_DG2(i915) || IS_ALDERLAKE_S(i915) ||
2477             IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
2478                 /*
2479                  * Wa_1606700617:tgl,dg1,adl-p
2480                  * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
2481                  * Wa_14010826681:tgl,dg1,rkl,adl-p
2482                  * Wa_18019627453:dg2
2483                  */
2484                 wa_masked_en(wal,
2485                              GEN9_CS_DEBUG_MODE1,
2486                              FF_DOP_CLOCK_GATE_DISABLE);
2487         }
2488
2489         if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
2490             IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
2491             IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
2492                 /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */
2493                 wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
2494                                  GEN12_PUSH_CONST_DEREF_HOLD_DIS);
2495
2496                 /*
2497                  * Wa_1409085225:tgl
2498                  * Wa_14010229206:tgl,rkl,dg1[a0],adl-s,adl-p
2499                  */
2500                 wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
2501         }
2502
2503         if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
2504             IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) {
2505                 /*
2506                  * Wa_1607030317:tgl
2507                  * Wa_1607186500:tgl
2508                  * Wa_1607297627:tgl,rkl,dg1[a0],adlp
2509                  *
2510                  * On TGL and RKL there are multiple entries for this WA in the
2511                  * BSpec; some indicate this is an A0-only WA, others indicate
2512                  * it applies to all steppings so we trust the "all steppings."
2513                  * For DG1 this only applies to A0.
2514                  */
2515                 wa_masked_en(wal,
2516                              RING_PSMI_CTL(RENDER_RING_BASE),
2517                              GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
2518                              GEN8_RC_SEMA_IDLE_MSG_DISABLE);
2519         }
2520
2521         if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) ||
2522             IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) {
2523                 /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
2524                 wa_mcr_masked_en(wal,
2525                                  GEN10_SAMPLER_MODE,
2526                                  ENABLE_SMALLPL);
2527         }
2528
2529         if (GRAPHICS_VER(i915) == 11) {
2530                 /* This is not an Wa. Enable for better image quality */
2531                 wa_masked_en(wal,
2532                              _3D_CHICKEN3,
2533                              _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
2534
2535                 /*
2536                  * Wa_1405543622:icl
2537                  * Formerly known as WaGAPZPriorityScheme
2538                  */
2539                 wa_write_or(wal,
2540                             GEN8_GARBCNTL,
2541                             GEN11_ARBITRATION_PRIO_ORDER_MASK);
2542
2543                 /*
2544                  * Wa_1604223664:icl
2545                  * Formerly known as WaL3BankAddressHashing
2546                  */
2547                 wa_write_clr_set(wal,
2548                                  GEN8_GARBCNTL,
2549                                  GEN11_HASH_CTRL_EXCL_MASK,
2550                                  GEN11_HASH_CTRL_EXCL_BIT0);
2551                 wa_write_clr_set(wal,
2552                                  GEN11_GLBLINVL,
2553                                  GEN11_BANK_HASH_ADDR_EXCL_MASK,
2554                                  GEN11_BANK_HASH_ADDR_EXCL_BIT0);
2555
2556                 /*
2557                  * Wa_1405733216:icl
2558                  * Formerly known as WaDisableCleanEvicts
2559                  */
2560                 wa_mcr_write_or(wal,
2561                                 GEN8_L3SQCREG4,
2562                                 GEN11_LQSC_CLEAN_EVICT_DISABLE);
2563
2564                 /* Wa_1606682166:icl */
2565                 wa_write_or(wal,
2566                             GEN7_SARCHKMD,
2567                             GEN7_DISABLE_SAMPLER_PREFETCH);
2568
2569                 /* Wa_1409178092:icl */
2570                 wa_mcr_write_clr_set(wal,
2571                                      GEN11_SCRATCH2,
2572                                      GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
2573                                      0);
2574
2575                 /* WaEnable32PlaneMode:icl */
2576                 wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
2577                              GEN11_ENABLE_32_PLANE_MODE);
2578
2579                 /*
2580                  * Wa_1408615072:icl,ehl  (vsunit)
2581                  * Wa_1407596294:icl,ehl  (hsunit)
2582                  */
2583                 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
2584                             VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
2585
2586                 /*
2587                  * Wa_1408767742:icl[a2..forever],ehl[all]
2588                  * Wa_1605460711:icl[a0..c0]
2589                  */
2590                 wa_write_or(wal,
2591                             GEN7_FF_THREAD_MODE,
2592                             GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
2593
2594                 /* Wa_22010271021 */
2595                 wa_masked_en(wal,
2596                              GEN9_CS_DEBUG_MODE1,
2597                              FF_DOP_CLOCK_GATE_DISABLE);
2598         }
2599
2600         /*
2601          * Intel platforms that support fine-grained preemption (i.e., gen9 and
2602          * beyond) allow the kernel-mode driver to choose between two different
2603          * options for controlling preemption granularity and behavior.
2604          *
2605          * Option 1 (hardware default):
2606          *   Preemption settings are controlled in a global manner via
2607          *   kernel-only register CS_DEBUG_MODE1 (0x20EC).  Any granularity
2608          *   and settings chosen by the kernel-mode driver will apply to all
2609          *   userspace clients.
2610          *
2611          * Option 2:
2612          *   Preemption settings are controlled on a per-context basis via
2613          *   register CS_CHICKEN1 (0x2580).  CS_CHICKEN1 is saved/restored on
2614          *   context switch and is writable by userspace (e.g., via
2615          *   MI_LOAD_REGISTER_IMMEDIATE instructions placed in a batch buffer)
2616          *   which allows different userspace drivers/clients to select
2617          *   different settings, or to change those settings on the fly in
2618          *   response to runtime needs.  This option was known by name
2619          *   "FtrPerCtxtPreemptionGranularityControl" at one time, although
2620          *   that name is somewhat misleading as other non-granularity
2621          *   preemption settings are also impacted by this decision.
2622          *
2623          * On Linux, our policy has always been to let userspace drivers
2624          * control preemption granularity/settings (Option 2).  This was
2625          * originally mandatory on gen9 to prevent ABI breakage (old gen9
2626          * userspace developed before object-level preemption was enabled would
2627          * not behave well if i915 were to go with Option 1 and enable that
2628          * preemption in a global manner).  On gen9 each context would have
2629          * object-level preemption disabled by default (see
2630          * WaDisable3DMidCmdPreemption in gen9_ctx_workarounds_init), but
2631          * userspace drivers could opt-in to object-level preemption as they
2632          * saw fit.  For post-gen9 platforms, we continue to utilize Option 2;
2633          * even though it is no longer necessary for ABI compatibility when
2634          * enabling a new platform, it does ensure that userspace will be able
2635          * to implement any workarounds that show up requiring temporary
2636          * adjustments to preemption behavior at runtime.
2637          *
2638          * Notes/Workarounds:
2639          *  - Wa_14015141709:  On DG2 and early steppings of MTL,
2640          *      CS_CHICKEN1[0] does not disable object-level preemption as
2641          *      it is supposed to (nor does CS_DEBUG_MODE1[0] if we had been
2642          *      using Option 1).  Effectively this means userspace is unable
2643          *      to disable object-level preemption on these platforms/steppings
2644          *      despite the setting here.
2645          *
2646          *  - Wa_16013994831:  May require that userspace program
2647          *      CS_CHICKEN1[10] when certain runtime conditions are true.
2648          *      Userspace requires Option 2 to be in effect for their update of
2649          *      CS_CHICKEN1[10] to be effective.
2650          *
2651          * Other workarounds may appear in the future that will also require
2652          * Option 2 behavior to allow proper userspace implementation.
2653          */
2654         if (GRAPHICS_VER(i915) >= 9)
2655                 wa_masked_en(wal,
2656                              GEN7_FF_SLICE_CS_CHICKEN1,
2657                              GEN9_FFSC_PERCTX_PREEMPT_CTRL);
2658
2659         if (IS_SKYLAKE(i915) ||
2660             IS_KABYLAKE(i915) ||
2661             IS_COFFEELAKE(i915) ||
2662             IS_COMETLAKE(i915)) {
2663                 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
2664                 wa_write_or(wal,
2665                             GEN8_GARBCNTL,
2666                             GEN9_GAPS_TSV_CREDIT_DISABLE);
2667         }
2668
2669         if (IS_BROXTON(i915)) {
2670                 /* WaDisablePooledEuLoadBalancingFix:bxt */
2671                 wa_masked_en(wal,
2672                              FF_SLICE_CS_CHICKEN2,
2673                              GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
2674         }
2675
2676         if (GRAPHICS_VER(i915) == 9) {
2677                 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
2678                 wa_masked_en(wal,
2679                              GEN9_CSFE_CHICKEN1_RCS,
2680                              GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
2681
2682                 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
2683                 wa_mcr_write_or(wal,
2684                                 BDW_SCRATCH1,
2685                                 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
2686
2687                 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
2688                 if (IS_GEN9_LP(i915))
2689                         wa_mcr_write_clr_set(wal,
2690                                              GEN8_L3SQCREG1,
2691                                              L3_PRIO_CREDITS_MASK,
2692                                              L3_GENERAL_PRIO_CREDITS(62) |
2693                                              L3_HIGH_PRIO_CREDITS(2));
2694
2695                 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
2696                 wa_mcr_write_or(wal,
2697                                 GEN8_L3SQCREG4,
2698                                 GEN8_LQSC_FLUSH_COHERENT_LINES);
2699
2700                 /* Disable atomics in L3 to prevent unrecoverable hangs */
2701                 wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1,
2702                                  GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0);
2703                 wa_mcr_write_clr_set(wal, GEN8_L3SQCREG4,
2704                                      GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
2705                 wa_mcr_write_clr_set(wal, GEN9_SCRATCH1,
2706                                      EVICTION_PERF_FIX_ENABLE, 0);
2707         }
2708
2709         if (IS_HASWELL(i915)) {
2710                 /* WaSampleCChickenBitEnable:hsw */
2711                 wa_masked_en(wal,
2712                              HSW_HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
2713
2714                 wa_masked_dis(wal,
2715                               CACHE_MODE_0_GEN7,
2716                               /* enable HiZ Raw Stall Optimization */
2717                               HIZ_RAW_STALL_OPT_DISABLE);
2718         }
2719
2720         if (IS_VALLEYVIEW(i915)) {
2721                 /* WaDisableEarlyCull:vlv */
2722                 wa_masked_en(wal,
2723                              _3D_CHICKEN3,
2724                              _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
2725
2726                 /*
2727                  * WaVSThreadDispatchOverride:ivb,vlv
2728                  *
2729                  * This actually overrides the dispatch
2730                  * mode for all thread types.
2731                  */
2732                 wa_write_clr_set(wal,
2733                                  GEN7_FF_THREAD_MODE,
2734                                  GEN7_FF_SCHED_MASK,
2735                                  GEN7_FF_TS_SCHED_HW |
2736                                  GEN7_FF_VS_SCHED_HW |
2737                                  GEN7_FF_DS_SCHED_HW);
2738
2739                 /* WaPsdDispatchEnable:vlv */
2740                 /* WaDisablePSDDualDispatchEnable:vlv */
2741                 wa_masked_en(wal,
2742                              GEN7_HALF_SLICE_CHICKEN1,
2743                              GEN7_MAX_PS_THREAD_DEP |
2744                              GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
2745         }
2746
2747         if (IS_IVYBRIDGE(i915)) {
2748                 /* WaDisableEarlyCull:ivb */
2749                 wa_masked_en(wal,
2750                              _3D_CHICKEN3,
2751                              _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
2752
2753                 if (0) { /* causes HiZ corruption on ivb:gt1 */
2754                         /* enable HiZ Raw Stall Optimization */
2755                         wa_masked_dis(wal,
2756                                       CACHE_MODE_0_GEN7,
2757                                       HIZ_RAW_STALL_OPT_DISABLE);
2758                 }
2759
2760                 /*
2761                  * WaVSThreadDispatchOverride:ivb,vlv
2762                  *
2763                  * This actually overrides the dispatch
2764                  * mode for all thread types.
2765                  */
2766                 wa_write_clr_set(wal,
2767                                  GEN7_FF_THREAD_MODE,
2768                                  GEN7_FF_SCHED_MASK,
2769                                  GEN7_FF_TS_SCHED_HW |
2770                                  GEN7_FF_VS_SCHED_HW |
2771                                  GEN7_FF_DS_SCHED_HW);
2772
2773                 /* WaDisablePSDDualDispatchEnable:ivb */
2774                 if (IS_IVB_GT1(i915))
2775                         wa_masked_en(wal,
2776                                      GEN7_HALF_SLICE_CHICKEN1,
2777                                      GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
2778         }
2779
2780         if (GRAPHICS_VER(i915) == 7) {
2781                 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
2782                 wa_masked_en(wal,
2783                              RING_MODE_GEN7(RENDER_RING_BASE),
2784                              GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);
2785
2786                 /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
2787                 wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
2788
2789                 /*
2790                  * BSpec says this must be set, even though
2791                  * WaDisable4x2SubspanOptimization:ivb,hsw
2792                  * WaDisable4x2SubspanOptimization isn't listed for VLV.
2793                  */
2794                 wa_masked_en(wal,
2795                              CACHE_MODE_1,
2796                              PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
2797
2798                 /*
2799                  * BSpec recommends 8x4 when MSAA is used,
2800                  * however in practice 16x4 seems fastest.
2801                  *
2802                  * Note that PS/WM thread counts depend on the WIZ hashing
2803                  * disable bit, which we don't touch here, but it's good
2804                  * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
2805                  */
2806                 wa_masked_field_set(wal,
2807                                     GEN7_GT_MODE,
2808                                     GEN6_WIZ_HASHING_MASK,
2809                                     GEN6_WIZ_HASHING_16x4);
2810         }
2811
2812         if (IS_GRAPHICS_VER(i915, 6, 7))
2813                 /*
2814                  * We need to disable the AsyncFlip performance optimisations in
2815                  * order to use MI_WAIT_FOR_EVENT within the CS. It should
2816                  * already be programmed to '1' on all products.
2817                  *
2818                  * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
2819                  */
2820                 wa_masked_en(wal,
2821                              RING_MI_MODE(RENDER_RING_BASE),
2822                              ASYNC_FLIP_PERF_DISABLE);
2823
2824         if (GRAPHICS_VER(i915) == 6) {
2825                 /*
2826                  * Required for the hardware to program scanline values for
2827                  * waiting
2828                  * WaEnableFlushTlbInvalidationMode:snb
2829                  */
2830                 wa_masked_en(wal,
2831                              GFX_MODE,
2832                              GFX_TLB_INVALIDATE_EXPLICIT);
2833
2834                 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
2835                 wa_masked_en(wal,
2836                              _3D_CHICKEN,
2837                              _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);
2838
2839                 wa_masked_en(wal,
2840                              _3D_CHICKEN3,
2841                              /* WaStripsFansDisableFastClipPerformanceFix:snb */
2842                              _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
2843                              /*
2844                               * Bspec says:
2845                               * "This bit must be set if 3DSTATE_CLIP clip mode is set
2846                               * to normal and 3DSTATE_SF number of SF output attributes
2847                               * is more than 16."
2848                               */
2849                              _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);
2850
2851                 /*
2852                  * BSpec recommends 8x4 when MSAA is used,
2853                  * however in practice 16x4 seems fastest.
2854                  *
2855                  * Note that PS/WM thread counts depend on the WIZ hashing
2856                  * disable bit, which we don't touch here, but it's good
2857                  * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
2858                  */
2859                 wa_masked_field_set(wal,
2860                                     GEN6_GT_MODE,
2861                                     GEN6_WIZ_HASHING_MASK,
2862                                     GEN6_WIZ_HASHING_16x4);
2863
2864                 /* WaDisable_RenderCache_OperationalFlush:snb */
2865                 wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
2866
2867                 /*
2868                  * From the Sandybridge PRM, volume 1 part 3, page 24:
2869                  * "If this bit is set, STCunit will have LRA as replacement
2870                  *  policy. [...] This bit must be reset. LRA replacement
2871                  *  policy is not supported."
2872                  */
2873                 wa_masked_dis(wal,
2874                               CACHE_MODE_0,
2875                               CM0_STC_EVICT_DISABLE_LRA_SNB);
2876         }
2877
2878         if (IS_GRAPHICS_VER(i915, 4, 6))
2879                 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
2880                 wa_add(wal, RING_MI_MODE(RENDER_RING_BASE),
2881                        0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
2882                        /* XXX bit doesn't stick on Broadwater */
2883                        IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH, true);
2884
2885         if (GRAPHICS_VER(i915) == 4)
2886                 /*
2887                  * Disable CONSTANT_BUFFER before it is loaded from the context
2888                  * image. For as it is loaded, it is executed and the stored
2889                  * address may no longer be valid, leading to a GPU hang.
2890                  *
2891                  * This imposes the requirement that userspace reload their
2892                  * CONSTANT_BUFFER on every batch, fortunately a requirement
2893                  * they are already accustomed to from before contexts were
2894                  * enabled.
2895                  */
2896                 wa_add(wal, ECOSKPD(RENDER_RING_BASE),
2897                        0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE),
2898                        0 /* XXX bit doesn't stick on Broadwater */,
2899                        true);
2900 }
2901
2902 static void
2903 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2904 {
2905         struct drm_i915_private *i915 = engine->i915;
2906
2907         /* WaKBLVECSSemaphoreWaitPoll:kbl */
2908         if (IS_KBL_GRAPHICS_STEP(i915, STEP_A0, STEP_F0)) {
2909                 wa_write(wal,
2910                          RING_SEMA_WAIT_POLL(engine->mmio_base),
2911                          1);
2912         }
2913 }
2914
2915 static void
2916 ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2917 {
2918         if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) {
2919                 /* Wa_14014999345:pvc */
2920                 wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC);
2921         }
2922 }
2923
2924 /*
2925  * The bspec performance guide has recommended MMIO tuning settings.  These
2926  * aren't truly "workarounds" but we want to program them with the same
2927  * workaround infrastructure to ensure that they're automatically added to
2928  * the GuC save/restore lists, re-applied at the right times, and checked for
2929  * any conflicting programming requested by real workarounds.
2930  *
2931  * Programming settings should be added here only if their registers are not
2932  * part of an engine's register state context.  If a register is part of a
2933  * context, then any tuning settings should be programmed in an appropriate
2934  * function invoked by __intel_engine_init_ctx_wa().
2935  */
2936 static void
2937 add_render_compute_tuning_settings(struct drm_i915_private *i915,
2938                                    struct i915_wa_list *wal)
2939 {
2940         if (IS_PONTEVECCHIO(i915)) {
2941                 wa_write(wal, XEHPC_L3SCRUB,
2942                          SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
2943                 wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_HOSTCACHEEN);
2944         }
2945
2946         if (IS_DG2(i915)) {
2947                 wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
2948                 wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
2949         }
2950
2951         /*
2952          * This tuning setting proves beneficial only on ATS-M designs; the
2953          * default "age based" setting is optimal on regular DG2 and other
2954          * platforms.
2955          */
2956         if (INTEL_INFO(i915)->tuning_thread_rr_after_dep)
2957                 wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
2958                                         THREAD_EX_ARB_MODE_RR_AFTER_DEP);
2959
2960         if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
2961                 wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
2962 }
2963
2964 /*
2965  * The workarounds in this function apply to shared registers in
2966  * the general render reset domain that aren't tied to a
2967  * specific engine.  Since all render+compute engines get reset
2968  * together, and the contents of these registers are lost during
2969  * the shared render domain reset, we'll define such workarounds
2970  * here and then add them to just a single RCS or CCS engine's
2971  * workaround list (whichever engine has the XXXX flag).
2972  */
2973 static void
2974 general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2975 {
2976         struct drm_i915_private *i915 = engine->i915;
2977
2978         add_render_compute_tuning_settings(i915, wal);
2979
2980         if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
2981             IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
2982             IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
2983             IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
2984                 /* Wa_22013037850 */
2985                 wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
2986                                 DISABLE_128B_EVICTION_COMMAND_UDW);
2987         }
2988
2989         if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
2990             IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
2991             IS_PONTEVECCHIO(i915) ||
2992             IS_DG2(i915)) {
2993                 /* Wa_18018781329 */
2994                 wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
2995                 wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
2996                 wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
2997                 wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
2998
2999                 /* Wa_22014226127 */
3000                 wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
3001         }
3002
3003         if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
3004             IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
3005             IS_DG2(i915)) {
3006                 /* Wa_18017747507 */
3007                 wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
3008         }
3009
3010         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
3011             IS_DG2_G11(i915)) {
3012                 /*
3013                  * Wa_22012826095:dg2
3014                  * Wa_22013059131:dg2
3015                  */
3016                 wa_mcr_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
3017                                      MAXREQS_PER_BANK,
3018                                      REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
3019
3020                 /* Wa_22013059131:dg2 */
3021                 wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0,
3022                                 FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
3023         }
3024
3025         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
3026                 /*
3027                  * Wa_14010918519:dg2_g10
3028                  *
3029                  * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping,
3030                  * so ignoring verification.
3031                  */
3032                 wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
3033                            FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
3034                            0, false);
3035         }
3036
3037         if (IS_PONTEVECCHIO(i915)) {
3038                 /* Wa_16016694945 */
3039                 wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC);
3040         }
3041
3042         if (IS_XEHPSDV(i915)) {
3043                 /* Wa_1409954639 */
3044                 wa_mcr_masked_en(wal,
3045                                  GEN8_ROW_CHICKEN,
3046                                  SYSTOLIC_DOP_CLOCK_GATING_DIS);
3047
3048                 /* Wa_1607196519 */
3049                 wa_mcr_masked_en(wal,
3050                                  GEN9_ROW_CHICKEN4,
3051                                  GEN12_DISABLE_GRF_CLEAR);
3052
3053                 /* Wa_14010670810:xehpsdv */
3054                 wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
3055
3056                 /* Wa_14010449647:xehpsdv */
3057                 wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
3058                                  GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
3059
3060                 /* Wa_18011725039:xehpsdv */
3061                 if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
3062                         wa_mcr_masked_dis(wal, MLTICTXCTL, TDONRENDER);
3063                         wa_mcr_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
3064                 }
3065
3066                 /* Wa_14012362059:xehpsdv */
3067                 wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
3068
3069                 /* Wa_14014368820:xehpsdv */
3070                 wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
3071                                 GLOBAL_INVALIDATION_MODE);
3072         }
3073
3074         if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) {
3075                 /* Wa_14015227452:dg2,pvc */
3076                 wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
3077
3078                 /* Wa_16015675438:dg2,pvc */
3079                 wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
3080         }
3081
3082         if (IS_DG2(i915)) {
3083                 /*
3084                  * Wa_16011620976:dg2_g11
3085                  * Wa_22015475538:dg2
3086                  */
3087                 wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
3088         }
3089
3090         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || IS_DG2_G11(i915))
3091                 /*
3092                  * Wa_22012654132
3093                  *
3094                  * Note that register 0xE420 is write-only and cannot be read
3095                  * back for verification on DG2 (due to Wa_14012342262), so
3096                  * we need to explicitly skip the readback.
3097                  */
3098                 wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
3099                            _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
3100                            0 /* write-only, so skip validation */,
3101                            true);
3102 }
3103
3104 static void
3105 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
3106 {
3107         if (GRAPHICS_VER(engine->i915) < 4)
3108                 return;
3109
3110         engine_fake_wa_init(engine, wal);
3111
3112         /*
3113          * These are common workarounds that just need to applied
3114          * to a single RCS/CCS engine's workaround list since
3115          * they're reset as part of the general render domain reset.
3116          */
3117         if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
3118                 general_render_compute_wa_init(engine, wal);
3119
3120         if (engine->class == COMPUTE_CLASS)
3121                 ccs_engine_wa_init(engine, wal);
3122         else if (engine->class == RENDER_CLASS)
3123                 rcs_engine_wa_init(engine, wal);
3124         else
3125                 xcs_engine_wa_init(engine, wal);
3126 }
3127
3128 void intel_engine_init_workarounds(struct intel_engine_cs *engine)
3129 {
3130         struct i915_wa_list *wal = &engine->wa_list;
3131
3132         wa_init_start(wal, engine->gt, "engine", engine->name);
3133         engine_init_workarounds(engine, wal);
3134         wa_init_finish(wal);
3135 }
3136
3137 void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
3138 {
3139         wa_list_apply(&engine->wa_list);
3140 }
3141
3142 static const struct i915_range mcr_ranges_gen8[] = {
3143         { .start = 0x5500, .end = 0x55ff },
3144         { .start = 0x7000, .end = 0x7fff },
3145         { .start = 0x9400, .end = 0x97ff },
3146         { .start = 0xb000, .end = 0xb3ff },
3147         { .start = 0xe000, .end = 0xe7ff },
3148         {},
3149 };
3150
3151 static const struct i915_range mcr_ranges_gen12[] = {
3152         { .start =  0x8150, .end =  0x815f },
3153         { .start =  0x9520, .end =  0x955f },
3154         { .start =  0xb100, .end =  0xb3ff },
3155         { .start =  0xde80, .end =  0xe8ff },
3156         { .start = 0x24a00, .end = 0x24a7f },
3157         {},
3158 };
3159
3160 static const struct i915_range mcr_ranges_xehp[] = {
3161         { .start =  0x4000, .end =  0x4aff },
3162         { .start =  0x5200, .end =  0x52ff },
3163         { .start =  0x5400, .end =  0x7fff },
3164         { .start =  0x8140, .end =  0x815f },
3165         { .start =  0x8c80, .end =  0x8dff },
3166         { .start =  0x94d0, .end =  0x955f },
3167         { .start =  0x9680, .end =  0x96ff },
3168         { .start =  0xb000, .end =  0xb3ff },
3169         { .start =  0xc800, .end =  0xcfff },
3170         { .start =  0xd800, .end =  0xd8ff },
3171         { .start =  0xdc00, .end =  0xffff },
3172         { .start = 0x17000, .end = 0x17fff },
3173         { .start = 0x24a00, .end = 0x24a7f },
3174         {},
3175 };
3176
3177 static bool mcr_range(struct drm_i915_private *i915, u32 offset)
3178 {
3179         const struct i915_range *mcr_ranges;
3180         int i;
3181
3182         if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
3183                 mcr_ranges = mcr_ranges_xehp;
3184         else if (GRAPHICS_VER(i915) >= 12)
3185                 mcr_ranges = mcr_ranges_gen12;
3186         else if (GRAPHICS_VER(i915) >= 8)
3187                 mcr_ranges = mcr_ranges_gen8;
3188         else
3189                 return false;
3190
3191         /*
3192          * Registers in these ranges are affected by the MCR selector
3193          * which only controls CPU initiated MMIO. Routing does not
3194          * work for CS access so we cannot verify them on this path.
3195          */
3196         for (i = 0; mcr_ranges[i].start; i++)
3197                 if (offset >= mcr_ranges[i].start &&
3198                     offset <= mcr_ranges[i].end)
3199                         return true;
3200
3201         return false;
3202 }
3203
3204 static int
3205 wa_list_srm(struct i915_request *rq,
3206             const struct i915_wa_list *wal,
3207             struct i915_vma *vma)
3208 {
3209         struct drm_i915_private *i915 = rq->engine->i915;
3210         unsigned int i, count = 0;
3211         const struct i915_wa *wa;
3212         u32 srm, *cs;
3213
3214         srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
3215         if (GRAPHICS_VER(i915) >= 8)
3216                 srm++;
3217
3218         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
3219                 if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg)))
3220                         count++;
3221         }
3222
3223         cs = intel_ring_begin(rq, 4 * count);
3224         if (IS_ERR(cs))
3225                 return PTR_ERR(cs);
3226
3227         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
3228                 u32 offset = i915_mmio_reg_offset(wa->reg);
3229
3230                 if (mcr_range(i915, offset))
3231                         continue;
3232
3233                 *cs++ = srm;
3234                 *cs++ = offset;
3235                 *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
3236                 *cs++ = 0;
3237         }
3238         intel_ring_advance(rq, cs);
3239
3240         return 0;
3241 }
3242
3243 static int engine_wa_list_verify(struct intel_context *ce,
3244                                  const struct i915_wa_list * const wal,
3245                                  const char *from)
3246 {
3247         const struct i915_wa *wa;
3248         struct i915_request *rq;
3249         struct i915_vma *vma;
3250         struct i915_gem_ww_ctx ww;
3251         unsigned int i;
3252         u32 *results;
3253         int err;
3254
3255         if (!wal->count)
3256                 return 0;
3257
3258         vma = __vm_create_scratch_for_read(&ce->engine->gt->ggtt->vm,
3259                                            wal->count * sizeof(u32));
3260         if (IS_ERR(vma))
3261                 return PTR_ERR(vma);
3262
3263         intel_engine_pm_get(ce->engine);
3264         i915_gem_ww_ctx_init(&ww, false);
3265 retry:
3266         err = i915_gem_object_lock(vma->obj, &ww);
3267         if (err == 0)
3268                 err = intel_context_pin_ww(ce, &ww);
3269         if (err)
3270                 goto err_pm;
3271
3272         err = i915_vma_pin_ww(vma, &ww, 0, 0,
3273                            i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
3274         if (err)
3275                 goto err_unpin;
3276
3277         rq = i915_request_create(ce);
3278         if (IS_ERR(rq)) {
3279                 err = PTR_ERR(rq);
3280                 goto err_vma;
3281         }
3282
3283         err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
3284         if (err == 0)
3285                 err = wa_list_srm(rq, wal, vma);
3286
3287         i915_request_get(rq);
3288         if (err)
3289                 i915_request_set_error_once(rq, err);
3290         i915_request_add(rq);
3291
3292         if (err)
3293                 goto err_rq;
3294
3295         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3296                 err = -ETIME;
3297                 goto err_rq;
3298         }
3299
3300         results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
3301         if (IS_ERR(results)) {
3302                 err = PTR_ERR(results);
3303                 goto err_rq;
3304         }
3305
3306         err = 0;
3307         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
3308                 if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg)))
3309                         continue;
3310
3311                 if (!wa_verify(wal->gt, wa, results[i], wal->name, from))
3312                         err = -ENXIO;
3313         }
3314
3315         i915_gem_object_unpin_map(vma->obj);
3316
3317 err_rq:
3318         i915_request_put(rq);
3319 err_vma:
3320         i915_vma_unpin(vma);
3321 err_unpin:
3322         intel_context_unpin(ce);
3323 err_pm:
3324         if (err == -EDEADLK) {
3325                 err = i915_gem_ww_ctx_backoff(&ww);
3326                 if (!err)
3327                         goto retry;
3328         }
3329         i915_gem_ww_ctx_fini(&ww);
3330         intel_engine_pm_put(ce->engine);
3331         i915_vma_put(vma);
3332         return err;
3333 }
3334
3335 int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
3336                                     const char *from)
3337 {
3338         return engine_wa_list_verify(engine->kernel_context,
3339                                      &engine->wa_list,
3340                                      from);
3341 }
3342
3343 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
3344 #include "selftest_workarounds.c"
3345 #endif