2 * SPDX-License-Identifier: MIT
4 * Copyright © 2014-2018 Intel Corporation
8 #include "intel_context.h"
9 #include "intel_engine_pm.h"
10 #include "intel_gpu_commands.h"
12 #include "intel_ring.h"
13 #include "intel_workarounds.h"
16 * DOC: Hardware workarounds
18 * This file is intended as a central place to implement most [1]_ of the
19 * required workarounds for hardware to work as originally intended. They fall
20 * in five basic categories depending on how/when they are applied:
22 * - Workarounds that touch registers that are saved/restored to/from the HW
23 * context image. The list is emitted (via Load Register Immediate commands)
24 * everytime a new context is created.
25 * - GT workarounds. The list of these WAs is applied whenever these registers
26 * revert to default values (on GPU reset, suspend/resume [2]_, etc..).
27 * - Display workarounds. The list is applied during display clock-gating
29 * - Workarounds that whitelist a privileged register, so that UMDs can manage
30 * them directly. This is just a special case of a MMMIO workaround (as we
31 * write the list of these to/be-whitelisted registers to some special HW
33 * - Workaround batchbuffers, that get executed automatically by the hardware
34 * on every HW context restore.
36 * .. [1] Please notice that there are other WAs that, due to their nature,
37 * cannot be applied from a central place. Those are peppered around the rest
38 * of the code, as needed.
40 * .. [2] Technically, some registers are powercontext saved & restored, so they
41 * survive a suspend/resume. In practice, writing them again is not too
42 * costly and simplifies things. We can revisit this in the future.
47 * Keep things in this file ordered by WA type, as per the above (context, GT,
48 * display, register whitelist, batchbuffer). Then, inside each type, keep the
51 * - Infrastructure functions and macros
52 * - WAs per platform in standard gen/chrono order
53 * - Public functions to init or apply the given workaround type.
57 * KBL revision ID ordering is bizarre; higher revision ID's map to lower
58 * steppings in some cases. So rather than test against the revision ID
59 * directly, let's map that into our own range of increasing ID's that we
60 * can test against in a regular manner.
63 const struct i915_rev_steppings kbl_revids[] = {
64 [0] = { .gt_stepping = KBL_REVID_A0, .disp_stepping = KBL_REVID_A0 },
65 [1] = { .gt_stepping = KBL_REVID_B0, .disp_stepping = KBL_REVID_B0 },
66 [2] = { .gt_stepping = KBL_REVID_C0, .disp_stepping = KBL_REVID_B0 },
67 [3] = { .gt_stepping = KBL_REVID_D0, .disp_stepping = KBL_REVID_B0 },
68 [4] = { .gt_stepping = KBL_REVID_F0, .disp_stepping = KBL_REVID_C0 },
69 [5] = { .gt_stepping = KBL_REVID_C0, .disp_stepping = KBL_REVID_B1 },
70 [6] = { .gt_stepping = KBL_REVID_D1, .disp_stepping = KBL_REVID_B1 },
71 [7] = { .gt_stepping = KBL_REVID_G0, .disp_stepping = KBL_REVID_C0 },
74 const struct i915_rev_steppings tgl_uy_revid_step_tbl[] = {
75 [0] = { .gt_stepping = STEP_A0, .disp_stepping = STEP_A0 },
76 [1] = { .gt_stepping = STEP_B0, .disp_stepping = STEP_C0 },
77 [2] = { .gt_stepping = STEP_B1, .disp_stepping = STEP_C0 },
78 [3] = { .gt_stepping = STEP_C0, .disp_stepping = STEP_D0 },
81 /* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */
82 const struct i915_rev_steppings tgl_revid_step_tbl[] = {
83 [0] = { .gt_stepping = STEP_A0, .disp_stepping = STEP_B0 },
84 [1] = { .gt_stepping = STEP_B0, .disp_stepping = STEP_D0 },
87 const struct i915_rev_steppings adls_revid_step_tbl[] = {
88 [0x0] = { .gt_stepping = STEP_A0, .disp_stepping = STEP_A0 },
89 [0x1] = { .gt_stepping = STEP_A0, .disp_stepping = STEP_A2 },
90 [0x4] = { .gt_stepping = STEP_B0, .disp_stepping = STEP_B0 },
91 [0x8] = { .gt_stepping = STEP_C0, .disp_stepping = STEP_B0 },
92 [0xC] = { .gt_stepping = STEP_D0, .disp_stepping = STEP_C0 },
95 static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name)
98 wal->engine_name = engine_name;
101 #define WA_LIST_CHUNK (1 << 4)
103 static void wa_init_finish(struct i915_wa_list *wal)
105 /* Trim unused entries. */
106 if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
107 struct i915_wa *list = kmemdup(wal->list,
108 wal->count * sizeof(*list),
120 DRM_DEBUG_DRIVER("Initialized %u %s workarounds on %s\n",
121 wal->wa_count, wal->name, wal->engine_name);
124 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
126 unsigned int addr = i915_mmio_reg_offset(wa->reg);
127 unsigned int start = 0, end = wal->count;
128 const unsigned int grow = WA_LIST_CHUNK;
131 GEM_BUG_ON(!is_power_of_2(grow));
133 if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
134 struct i915_wa *list;
136 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
139 DRM_ERROR("No space for workaround init!\n");
144 memcpy(list, wal->list, sizeof(*wa) * wal->count);
151 while (start < end) {
152 unsigned int mid = start + (end - start) / 2;
154 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
156 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
159 wa_ = &wal->list[mid];
161 if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) {
162 DRM_ERROR("Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
163 i915_mmio_reg_offset(wa_->reg),
166 wa_->set &= ~wa->clr;
172 wa_->read |= wa->read;
178 wa_ = &wal->list[wal->count++];
181 while (wa_-- > wal->list) {
182 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
183 i915_mmio_reg_offset(wa_[1].reg));
184 if (i915_mmio_reg_offset(wa_[1].reg) >
185 i915_mmio_reg_offset(wa_[0].reg))
188 swap(wa_[1], wa_[0]);
192 static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
193 u32 clear, u32 set, u32 read_mask)
195 struct i915_wa wa = {
206 wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
208 wa_add(wal, reg, clear, set, clear);
212 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
214 wa_write_clr_set(wal, reg, ~0, set);
218 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
220 wa_write_clr_set(wal, reg, set, set);
224 wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
226 wa_write_clr_set(wal, reg, clr, 0);
230 * WA operations on "masked register". A masked register has the upper 16 bits
231 * documented as "masked" in b-spec. Its purpose is to allow writing to just a
232 * portion of the register without a rmw: you simply write in the upper 16 bits
233 * the mask of bits you are going to modify.
235 * The wa_masked_* family of functions already does the necessary operations to
236 * calculate the mask based on the parameters passed, so user only has to
237 * provide the lower 16 bits of that register.
241 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
243 wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val);
247 wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
249 wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val);
253 wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
256 wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask);
259 static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
260 struct i915_wa_list *wal)
262 wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
265 static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
266 struct i915_wa_list *wal)
268 wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
271 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
272 struct i915_wa_list *wal)
274 wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
276 /* WaDisableAsyncFlipPerfMode:bdw,chv */
277 wa_masked_en(wal, MI_MODE, ASYNC_FLIP_PERF_DISABLE);
279 /* WaDisablePartialInstShootdown:bdw,chv */
280 wa_masked_en(wal, GEN8_ROW_CHICKEN,
281 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
283 /* Use Force Non-Coherent whenever executing a 3D context. This is a
284 * workaround for for a possible hang in the unlikely event a TLB
285 * invalidation occurs during a PSD flush.
287 /* WaForceEnableNonCoherent:bdw,chv */
288 /* WaHdcDisableFetchWhenMasked:bdw,chv */
289 wa_masked_en(wal, HDC_CHICKEN0,
290 HDC_DONOT_FETCH_MEM_WHEN_MASKED |
291 HDC_FORCE_NON_COHERENT);
293 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
294 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
295 * polygons in the same 8x4 pixel/sample area to be processed without
296 * stalling waiting for the earlier ones to write to Hierarchical Z
299 * This optimization is off by default for BDW and CHV; turn it on.
301 wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
303 /* Wa4x4STCOptimizationDisable:bdw,chv */
304 wa_masked_en(wal, CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
307 * BSpec recommends 8x4 when MSAA is used,
308 * however in practice 16x4 seems fastest.
310 * Note that PS/WM thread counts depend on the WIZ hashing
311 * disable bit, which we don't touch here, but it's good
312 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
314 wa_masked_field_set(wal, GEN7_GT_MODE,
315 GEN6_WIZ_HASHING_MASK,
316 GEN6_WIZ_HASHING_16x4);
319 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
320 struct i915_wa_list *wal)
322 struct drm_i915_private *i915 = engine->i915;
324 gen8_ctx_workarounds_init(engine, wal);
326 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
327 wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
329 /* WaDisableDopClockGating:bdw
331 * Also see the related UCGTCL1 write in bdw_init_clock_gating()
332 * to disable EUTC clock gating.
334 wa_masked_en(wal, GEN7_ROW_CHICKEN2,
335 DOP_CLOCK_GATING_DISABLE);
337 wa_masked_en(wal, HALF_SLICE_CHICKEN3,
338 GEN8_SAMPLER_POWER_BYPASS_DIS);
340 wa_masked_en(wal, HDC_CHICKEN0,
341 /* WaForceContextSaveRestoreNonCoherent:bdw */
342 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
343 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
344 (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
347 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
348 struct i915_wa_list *wal)
350 gen8_ctx_workarounds_init(engine, wal);
352 /* WaDisableThreadStallDopClockGating:chv */
353 wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
355 /* Improve HiZ throughput on CHV. */
356 wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
359 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
360 struct i915_wa_list *wal)
362 struct drm_i915_private *i915 = engine->i915;
365 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
367 * Must match Display Engine. See
368 * WaCompressedResourceDisplayNewHashMode.
370 wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
371 GEN9_PBE_COMPRESSED_HASH_SELECTION);
372 wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
373 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
376 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
377 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
378 wa_masked_en(wal, GEN8_ROW_CHICKEN,
379 FLOW_CONTROL_ENABLE |
380 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
382 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
383 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
384 wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
385 GEN9_ENABLE_YV12_BUGFIX |
386 GEN9_ENABLE_GPGPU_PREEMPTION);
388 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
389 /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
390 wa_masked_en(wal, CACHE_MODE_1,
391 GEN8_4x4_STC_OPTIMIZATION_DISABLE |
392 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
394 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
395 wa_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
396 GEN9_CCS_TLB_PREFETCH_ENABLE);
398 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
399 wa_masked_en(wal, HDC_CHICKEN0,
400 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
401 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
403 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
404 * both tied to WaForceContextSaveRestoreNonCoherent
405 * in some hsds for skl. We keep the tie for all gen9. The
406 * documentation is a bit hazy and so we want to get common behaviour,
407 * even though there is no clear evidence we would need both on kbl/bxt.
408 * This area has been source of system hangs so we play it safe
409 * and mimic the skl regardless of what bspec says.
411 * Use Force Non-Coherent whenever executing a 3D context. This
412 * is a workaround for a possible hang in the unlikely event
413 * a TLB invalidation occurs during a PSD flush.
416 /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
417 wa_masked_en(wal, HDC_CHICKEN0,
418 HDC_FORCE_NON_COHERENT);
420 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
421 if (IS_SKYLAKE(i915) ||
423 IS_COFFEELAKE(i915) ||
425 wa_masked_en(wal, HALF_SLICE_CHICKEN3,
426 GEN8_SAMPLER_POWER_BYPASS_DIS);
428 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
429 wa_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
432 * Supporting preemption with fine-granularity requires changes in the
433 * batch buffer programming. Since we can't break old userspace, we
434 * need to set our default preemption level to safe value. Userspace is
435 * still able to use more fine-grained preemption levels, since in
436 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
437 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
438 * not real HW workarounds, but merely a way to start using preemption
439 * while maintaining old contract with userspace.
442 /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
443 wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
445 /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
446 wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
447 GEN9_PREEMPT_GPGPU_LEVEL_MASK,
448 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
450 /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
451 if (IS_GEN9_LP(i915))
452 wa_masked_en(wal, GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
455 static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
456 struct i915_wa_list *wal)
458 struct intel_gt *gt = engine->gt;
459 u8 vals[3] = { 0, 0, 0 };
462 for (i = 0; i < 3; i++) {
466 * Only consider slices where one, and only one, subslice has 7
469 if (!is_power_of_2(gt->info.sseu.subslice_7eu[i]))
473 * subslice_7eu[i] != 0 (because of the check above) and
474 * ss_max == 4 (maximum number of subslices possible per slice)
478 ss = ffs(gt->info.sseu.subslice_7eu[i]) - 1;
482 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
485 /* Tune IZ hashing. See intel_device_info_runtime_init() */
486 wa_masked_field_set(wal, GEN7_GT_MODE,
487 GEN9_IZ_HASHING_MASK(2) |
488 GEN9_IZ_HASHING_MASK(1) |
489 GEN9_IZ_HASHING_MASK(0),
490 GEN9_IZ_HASHING(2, vals[2]) |
491 GEN9_IZ_HASHING(1, vals[1]) |
492 GEN9_IZ_HASHING(0, vals[0]));
495 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
496 struct i915_wa_list *wal)
498 gen9_ctx_workarounds_init(engine, wal);
499 skl_tune_iz_hashing(engine, wal);
502 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
503 struct i915_wa_list *wal)
505 gen9_ctx_workarounds_init(engine, wal);
507 /* WaDisableThreadStallDopClockGating:bxt */
508 wa_masked_en(wal, GEN8_ROW_CHICKEN,
509 STALL_DOP_GATING_DISABLE);
511 /* WaToEnableHwFixForPushConstHWBug:bxt */
512 wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
513 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
516 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
517 struct i915_wa_list *wal)
519 struct drm_i915_private *i915 = engine->i915;
521 gen9_ctx_workarounds_init(engine, wal);
523 /* WaToEnableHwFixForPushConstHWBug:kbl */
524 if (IS_KBL_GT_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
525 wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
526 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
528 /* WaDisableSbeCacheDispatchPortSharing:kbl */
529 wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
530 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
533 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
534 struct i915_wa_list *wal)
536 gen9_ctx_workarounds_init(engine, wal);
538 /* WaToEnableHwFixForPushConstHWBug:glk */
539 wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
540 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
543 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
544 struct i915_wa_list *wal)
546 gen9_ctx_workarounds_init(engine, wal);
548 /* WaToEnableHwFixForPushConstHWBug:cfl */
549 wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
550 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
552 /* WaDisableSbeCacheDispatchPortSharing:cfl */
553 wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
554 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
557 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
558 struct i915_wa_list *wal)
560 /* WaForceContextSaveRestoreNonCoherent:cnl */
561 wa_masked_en(wal, CNL_HDC_CHICKEN0,
562 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
564 /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
565 wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
566 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
568 /* WaPushConstantDereferenceHoldDisable:cnl */
569 wa_masked_en(wal, GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
571 /* FtrEnableFastAnisoL1BankingFix:cnl */
572 wa_masked_en(wal, HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
574 /* WaDisable3DMidCmdPreemption:cnl */
575 wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
577 /* WaDisableGPGPUMidCmdPreemption:cnl */
578 wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
579 GEN9_PREEMPT_GPGPU_LEVEL_MASK,
580 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
582 /* WaDisableEarlyEOT:cnl */
583 wa_masked_en(wal, GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
586 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
587 struct i915_wa_list *wal)
589 struct drm_i915_private *i915 = engine->i915;
591 /* WaDisableBankHangMode:icl */
594 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
597 /* Wa_1604370585:icl (pre-prod)
598 * Formerly known as WaPushConstantDereferenceHoldDisable
600 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
601 wa_masked_en(wal, GEN7_ROW_CHICKEN2,
602 PUSH_CONSTANT_DEREF_DISABLE);
604 /* WaForceEnableNonCoherent:icl
605 * This is not the same workaround as in early Gen9 platforms, where
606 * lacking this could cause system hangs, but coherency performance
607 * overhead is high and only a few compute workloads really need it
608 * (the register is whitelisted in hardware now, so UMDs can opt in
609 * for coherency if they have a good reason).
611 wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
613 /* Wa_2006611047:icl (pre-prod)
614 * Formerly known as WaDisableImprovedTdlClkGating
616 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
617 wa_masked_en(wal, GEN7_ROW_CHICKEN2,
618 GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
620 /* Wa_2006665173:icl (pre-prod) */
621 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
622 wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
623 GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
625 /* WaEnableFloatBlendOptimization:icl */
626 wa_write_clr_set(wal,
628 0, /* write-only, so skip validation */
629 _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
631 /* WaDisableGPGPUMidThreadPreemption:icl */
632 wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
633 GEN9_PREEMPT_GPGPU_LEVEL_MASK,
634 GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
636 /* allow headerless messages for preemptible GPGPU context */
637 wa_masked_en(wal, GEN10_SAMPLER_MODE,
638 GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
640 /* Wa_1604278689:icl,ehl */
641 wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
642 wa_write_clr_set(wal, IVB_FBC_RT_BASE_UPPER,
643 0, /* write-only register; skip validation */
646 /* Wa_1406306137:icl,ehl */
647 wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
650 static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
651 struct i915_wa_list *wal)
664 wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
665 GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
667 /* WaDisableGPGPUMidThreadPreemption:gen12 */
668 wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
669 GEN9_PREEMPT_GPGPU_LEVEL_MASK,
670 GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
673 static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
674 struct i915_wa_list *wal)
676 gen12_ctx_workarounds_init(engine, wal);
679 * Wa_1604555607:tgl,rkl
681 * Note that the implementation of this workaround is further modified
682 * according to the FF_MODE2 guidance given by Wa_1608008084:gen12.
683 * FF_MODE2 register will return the wrong value when read. The default
684 * value for this register is zero for all fields and there are no bit
685 * masks. So instead of doing a RMW we should just write the GS Timer
686 * and TDS timer values for Wa_1604555607 and Wa_16011163337.
690 FF_MODE2_GS_TIMER_MASK | FF_MODE2_TDS_TIMER_MASK,
691 FF_MODE2_GS_TIMER_224 | FF_MODE2_TDS_TIMER_128,
695 static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
696 struct i915_wa_list *wal)
698 gen12_ctx_workarounds_init(engine, wal);
701 wa_masked_dis(wal, GEN11_COMMON_SLICE_CHICKEN3,
702 DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN);
705 wa_masked_en(wal, HIZ_CHICKEN,
706 DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
711 * Like in tgl_ctx_workarounds_init(), read verification is ignored due
716 FF_MODE2_GS_TIMER_MASK, FF_MODE2_GS_TIMER_224, 0);
720 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
721 struct i915_wa_list *wal,
724 struct drm_i915_private *i915 = engine->i915;
726 if (engine->class != RENDER_CLASS)
729 wa_init_start(wal, name, engine->name);
732 dg1_ctx_workarounds_init(engine, wal);
733 else if (IS_ALDERLAKE_S(i915) || IS_ROCKETLAKE(i915) ||
735 tgl_ctx_workarounds_init(engine, wal);
736 else if (IS_GEN(i915, 12))
737 gen12_ctx_workarounds_init(engine, wal);
738 else if (IS_GEN(i915, 11))
739 icl_ctx_workarounds_init(engine, wal);
740 else if (IS_CANNONLAKE(i915))
741 cnl_ctx_workarounds_init(engine, wal);
742 else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
743 cfl_ctx_workarounds_init(engine, wal);
744 else if (IS_GEMINILAKE(i915))
745 glk_ctx_workarounds_init(engine, wal);
746 else if (IS_KABYLAKE(i915))
747 kbl_ctx_workarounds_init(engine, wal);
748 else if (IS_BROXTON(i915))
749 bxt_ctx_workarounds_init(engine, wal);
750 else if (IS_SKYLAKE(i915))
751 skl_ctx_workarounds_init(engine, wal);
752 else if (IS_CHERRYVIEW(i915))
753 chv_ctx_workarounds_init(engine, wal);
754 else if (IS_BROADWELL(i915))
755 bdw_ctx_workarounds_init(engine, wal);
756 else if (IS_GEN(i915, 7))
757 gen7_ctx_workarounds_init(engine, wal);
758 else if (IS_GEN(i915, 6))
759 gen6_ctx_workarounds_init(engine, wal);
760 else if (INTEL_GEN(i915) < 8)
763 MISSING_CASE(INTEL_GEN(i915));
768 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
770 __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
773 int intel_engine_emit_ctx_wa(struct i915_request *rq)
775 struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
784 ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
788 cs = intel_ring_begin(rq, (wal->count * 2 + 2));
792 *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
793 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
794 *cs++ = i915_mmio_reg_offset(wa->reg);
799 intel_ring_advance(rq, cs);
801 ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
809 gen4_gt_workarounds_init(struct drm_i915_private *i915,
810 struct i915_wa_list *wal)
812 /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
813 wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
817 g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
819 gen4_gt_workarounds_init(i915, wal);
821 /* WaDisableRenderCachePipelinedFlush:g4x,ilk */
822 wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE);
826 ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
828 g4x_gt_workarounds_init(i915, wal);
830 wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED);
834 snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
839 ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
841 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
843 GEN7_COMMON_SLICE_CHICKEN1,
844 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
846 /* WaApplyL3ControlAndL3ChickenMode:ivb */
847 wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
848 wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
850 /* WaForceL3Serialization:ivb */
851 wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
855 vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
857 /* WaForceL3Serialization:vlv */
858 wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
861 * WaIncreaseL3CreditsForVLVB0:vlv
862 * This is the hardware default actually.
864 wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
868 hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
870 /* L3 caching of data atomics doesn't work -- disable it. */
871 wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
875 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
876 0 /* XXX does this reg exist? */);
878 /* WaVSRefCountFullforceMissDisable:hsw */
879 wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
883 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
885 /* WaDisableKillLogic:bxt,skl,kbl */
886 if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
892 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
894 * Must match Display Engine. See
895 * WaCompressedResourceDisplayNewHashMode.
899 MMCD_PCLA | MMCD_HOTSPOT_EN);
902 /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
905 BDW_DISABLE_HDC_INVALIDATION);
909 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
911 gen9_gt_workarounds_init(i915, wal);
913 /* WaDisableGafsUnitClkGating:skl */
916 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
918 /* WaInPlaceDecompressionHang:skl */
919 if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
921 GEN9_GAMT_ECO_REG_RW_IA,
922 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
926 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
928 gen9_gt_workarounds_init(i915, wal);
930 /* WaInPlaceDecompressionHang:bxt */
932 GEN9_GAMT_ECO_REG_RW_IA,
933 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
937 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
939 gen9_gt_workarounds_init(i915, wal);
941 /* WaDisableDynamicCreditSharing:kbl */
942 if (IS_KBL_GT_REVID(i915, 0, KBL_REVID_B0))
945 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
947 /* WaDisableGafsUnitClkGating:kbl */
950 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
952 /* WaInPlaceDecompressionHang:kbl */
954 GEN9_GAMT_ECO_REG_RW_IA,
955 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
959 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
961 gen9_gt_workarounds_init(i915, wal);
965 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
967 gen9_gt_workarounds_init(i915, wal);
969 /* WaDisableGafsUnitClkGating:cfl */
972 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
974 /* WaInPlaceDecompressionHang:cfl */
976 GEN9_GAMT_ECO_REG_RW_IA,
977 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
981 wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
983 const struct sseu_dev_info *sseu = &i915->gt.info.sseu;
984 unsigned int slice, subslice;
985 u32 l3_en, mcr, mcr_mask;
987 GEM_BUG_ON(INTEL_GEN(i915) < 10);
990 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
991 * L3Banks could be fused off in single slice scenario. If that is
992 * the case, we might need to program MCR select to a valid L3Bank
993 * by default, to make sure we correctly read certain registers
994 * later on (in the range 0xB100 - 0xB3FF).
996 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
997 * Before any MMIO read into slice/subslice specific registers, MCR
998 * packet control register needs to be programmed to point to any
999 * enabled s/ss pair. Otherwise, incorrect values will be returned.
1000 * This means each subsequent MMIO read will be forwarded to an
1001 * specific s/ss combination, but this is OK since these registers
1002 * are consistent across s/ss in almost all cases. In the rare
1003 * occasions, such as INSTDONE, where this value is dependent
1004 * on s/ss combo, the read should be done with read_subslice_reg.
1006 * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both
1007 * to which subslice, or to which L3 bank, the respective mmio reads
1008 * will go, we have to find a common index which works for both
1011 * Case where we cannot find a common index fortunately should not
1012 * happen in production hardware, so we only emit a warning instead of
1013 * implementing something more complex that requires checking the range
1014 * of every MMIO read.
1017 if (INTEL_GEN(i915) >= 10 && is_power_of_2(sseu->slice_mask)) {
1019 intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) &
1022 drm_dbg(&i915->drm, "L3 fuse = %x\n", l3_fuse);
1023 l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse);
1028 slice = fls(sseu->slice_mask) - 1;
1029 subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
1031 drm_warn(&i915->drm,
1032 "No common index found between subslice mask %x and L3 bank mask %x!\n",
1033 intel_sseu_get_subslices(sseu, slice), l3_en);
1034 subslice = fls(l3_en);
1035 drm_WARN_ON(&i915->drm, !subslice);
1039 if (INTEL_GEN(i915) >= 11) {
1040 mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
1041 mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
1043 mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
1044 mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
1047 drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr);
1049 wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
1053 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1055 wa_init_mcr(i915, wal);
1057 /* WaInPlaceDecompressionHang:cnl */
1059 GEN9_GAMT_ECO_REG_RW_IA,
1060 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1064 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1066 wa_init_mcr(i915, wal);
1068 /* WaInPlaceDecompressionHang:icl */
1070 GEN9_GAMT_ECO_REG_RW_IA,
1071 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1073 /* WaModifyGamTlbPartitioning:icl */
1074 wa_write_clr_set(wal,
1075 GEN11_GACB_PERF_CTRL,
1076 GEN11_HASH_CTRL_MASK,
1077 GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
1079 /* Wa_1405766107:icl
1080 * Formerly known as WaCL2SFHalfMaxAlloc
1084 GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
1085 GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
1088 * Formerly known as WaDisCtxReload
1091 GEN8_GAMW_ECO_DEV_RW_IA,
1092 GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
1094 /* Wa_1405779004:icl (pre-prod) */
1095 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
1097 SLICE_UNIT_LEVEL_CLKGATE,
1098 MSCUNIT_CLKGATE_DIS);
1100 /* Wa_1406838659:icl (pre-prod) */
1101 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1103 INF_UNIT_LEVEL_CLKGATE,
1106 /* Wa_1406463099:icl
1107 * Formerly known as WaGamTlbPendError
1111 GAMT_CHKN_DISABLE_L3_COH_PIPE);
1113 /* Wa_1607087056:icl,ehl,jsl */
1114 if (IS_ICELAKE(i915) ||
1115 IS_JSL_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) {
1117 SLICE_UNIT_LEVEL_CLKGATE,
1118 L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1123 gen12_gt_workarounds_init(struct drm_i915_private *i915,
1124 struct i915_wa_list *wal)
1126 wa_init_mcr(i915, wal);
1130 tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1132 gen12_gt_workarounds_init(i915, wal);
1134 /* Wa_1409420604:tgl */
1135 if (IS_TGL_UY_GT_STEPPING(i915, STEP_A0, STEP_A0))
1137 SUBSLICE_UNIT_LEVEL_CLKGATE2,
1138 CPSSUNIT_CLKGATE_DIS);
1140 /* Wa_1607087056:tgl also know as BUG:1409180338 */
1141 if (IS_TGL_UY_GT_STEPPING(i915, STEP_A0, STEP_A0))
1143 SLICE_UNIT_LEVEL_CLKGATE,
1144 L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1146 /* Wa_1408615072:tgl[a0] */
1147 if (IS_TGL_UY_GT_STEPPING(i915, STEP_A0, STEP_A0))
1148 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1149 VSUNIT_CLKGATE_DIS_TGL);
1153 dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1155 gen12_gt_workarounds_init(i915, wal);
1157 /* Wa_1607087056:dg1 */
1158 if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0))
1160 SLICE_UNIT_LEVEL_CLKGATE,
1161 L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1163 /* Wa_1409420604:dg1 */
1166 SUBSLICE_UNIT_LEVEL_CLKGATE2,
1167 CPSSUNIT_CLKGATE_DIS);
1169 /* Wa_1408615072:dg1 */
1170 /* Empirical testing shows this register is unaffected by engine reset. */
1172 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1173 VSUNIT_CLKGATE_DIS_TGL);
1177 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
1180 dg1_gt_workarounds_init(i915, wal);
1181 else if (IS_TIGERLAKE(i915))
1182 tgl_gt_workarounds_init(i915, wal);
1183 else if (IS_GEN(i915, 12))
1184 gen12_gt_workarounds_init(i915, wal);
1185 else if (IS_GEN(i915, 11))
1186 icl_gt_workarounds_init(i915, wal);
1187 else if (IS_CANNONLAKE(i915))
1188 cnl_gt_workarounds_init(i915, wal);
1189 else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
1190 cfl_gt_workarounds_init(i915, wal);
1191 else if (IS_GEMINILAKE(i915))
1192 glk_gt_workarounds_init(i915, wal);
1193 else if (IS_KABYLAKE(i915))
1194 kbl_gt_workarounds_init(i915, wal);
1195 else if (IS_BROXTON(i915))
1196 bxt_gt_workarounds_init(i915, wal);
1197 else if (IS_SKYLAKE(i915))
1198 skl_gt_workarounds_init(i915, wal);
1199 else if (IS_HASWELL(i915))
1200 hsw_gt_workarounds_init(i915, wal);
1201 else if (IS_VALLEYVIEW(i915))
1202 vlv_gt_workarounds_init(i915, wal);
1203 else if (IS_IVYBRIDGE(i915))
1204 ivb_gt_workarounds_init(i915, wal);
1205 else if (IS_GEN(i915, 6))
1206 snb_gt_workarounds_init(i915, wal);
1207 else if (IS_GEN(i915, 5))
1208 ilk_gt_workarounds_init(i915, wal);
1209 else if (IS_G4X(i915))
1210 g4x_gt_workarounds_init(i915, wal);
1211 else if (IS_GEN(i915, 4))
1212 gen4_gt_workarounds_init(i915, wal);
1213 else if (INTEL_GEN(i915) <= 8)
1216 MISSING_CASE(INTEL_GEN(i915));
1219 void intel_gt_init_workarounds(struct drm_i915_private *i915)
1221 struct i915_wa_list *wal = &i915->gt_wa_list;
1223 wa_init_start(wal, "GT", "global");
1224 gt_init_workarounds(i915, wal);
1225 wa_init_finish(wal);
1228 static enum forcewake_domains
1229 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
1231 enum forcewake_domains fw = 0;
1235 for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1236 fw |= intel_uncore_forcewake_for_reg(uncore,
1245 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
1247 if ((cur ^ wa->set) & wa->read) {
1248 DRM_ERROR("%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n",
1249 name, from, i915_mmio_reg_offset(wa->reg),
1250 cur, cur & wa->read, wa->set & wa->read);
1259 wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
1261 enum forcewake_domains fw;
1262 unsigned long flags;
1269 fw = wal_get_fw_for_rmw(uncore, wal);
1271 spin_lock_irqsave(&uncore->lock, flags);
1272 intel_uncore_forcewake_get__locked(uncore, fw);
1274 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1276 intel_uncore_rmw_fw(uncore, wa->reg, wa->clr, wa->set);
1278 intel_uncore_write_fw(uncore, wa->reg, wa->set);
1279 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1281 intel_uncore_read_fw(uncore, wa->reg),
1282 wal->name, "application");
1285 intel_uncore_forcewake_put__locked(uncore, fw);
1286 spin_unlock_irqrestore(&uncore->lock, flags);
1289 void intel_gt_apply_workarounds(struct intel_gt *gt)
1291 wa_list_apply(gt->uncore, >->i915->gt_wa_list);
1294 static bool wa_list_verify(struct intel_uncore *uncore,
1295 const struct i915_wa_list *wal,
1302 for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1304 intel_uncore_read(uncore, wa->reg),
1310 bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
1312 return wa_list_verify(gt->uncore, >->i915->gt_wa_list, from);
1316 static bool is_nonpriv_flags_valid(u32 flags)
1318 /* Check only valid flag bits are set */
1319 if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID)
1322 /* NB: Only 3 out of 4 enum values are valid for access field */
1323 if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
1324 RING_FORCE_TO_NONPRIV_ACCESS_INVALID)
1331 whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1333 struct i915_wa wa = {
1337 if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1340 if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
1343 wa.reg.reg |= flags;
1348 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1350 whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
1353 static void gen9_whitelist_build(struct i915_wa_list *w)
1355 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1356 whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1358 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1359 whitelist_reg(w, GEN8_CS_CHICKEN1);
1361 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1362 whitelist_reg(w, GEN8_HDC_CHICKEN1);
1364 /* WaSendPushConstantsFromMMIO:skl,bxt */
1365 whitelist_reg(w, COMMON_SLICE_CHICKEN2);
1368 static void skl_whitelist_build(struct intel_engine_cs *engine)
1370 struct i915_wa_list *w = &engine->whitelist;
1372 if (engine->class != RENDER_CLASS)
1375 gen9_whitelist_build(w);
1377 /* WaDisableLSQCROPERFforOCL:skl */
1378 whitelist_reg(w, GEN8_L3SQCREG4);
1381 static void bxt_whitelist_build(struct intel_engine_cs *engine)
1383 if (engine->class != RENDER_CLASS)
1386 gen9_whitelist_build(&engine->whitelist);
1389 static void kbl_whitelist_build(struct intel_engine_cs *engine)
1391 struct i915_wa_list *w = &engine->whitelist;
1393 if (engine->class != RENDER_CLASS)
1396 gen9_whitelist_build(w);
1398 /* WaDisableLSQCROPERFforOCL:kbl */
1399 whitelist_reg(w, GEN8_L3SQCREG4);
1402 static void glk_whitelist_build(struct intel_engine_cs *engine)
1404 struct i915_wa_list *w = &engine->whitelist;
1406 if (engine->class != RENDER_CLASS)
1409 gen9_whitelist_build(w);
1411 /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1412 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1415 static void cfl_whitelist_build(struct intel_engine_cs *engine)
1417 struct i915_wa_list *w = &engine->whitelist;
1419 if (engine->class != RENDER_CLASS)
1422 gen9_whitelist_build(w);
1425 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
1427 * This covers 4 register which are next to one another :
1428 * - PS_INVOCATION_COUNT
1429 * - PS_INVOCATION_COUNT_UDW
1431 * - PS_DEPTH_COUNT_UDW
1433 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1434 RING_FORCE_TO_NONPRIV_ACCESS_RD |
1435 RING_FORCE_TO_NONPRIV_RANGE_4);
1438 static void cml_whitelist_build(struct intel_engine_cs *engine)
1440 struct i915_wa_list *w = &engine->whitelist;
1442 if (engine->class != RENDER_CLASS)
1443 whitelist_reg_ext(w,
1444 RING_CTX_TIMESTAMP(engine->mmio_base),
1445 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1447 cfl_whitelist_build(engine);
1450 static void cnl_whitelist_build(struct intel_engine_cs *engine)
1452 struct i915_wa_list *w = &engine->whitelist;
1454 if (engine->class != RENDER_CLASS)
1457 /* WaEnablePreemptionGranularityControlByUMD:cnl */
1458 whitelist_reg(w, GEN8_CS_CHICKEN1);
1461 static void icl_whitelist_build(struct intel_engine_cs *engine)
1463 struct i915_wa_list *w = &engine->whitelist;
1465 switch (engine->class) {
1467 /* WaAllowUMDToModifyHalfSliceChicken7:icl */
1468 whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1470 /* WaAllowUMDToModifySamplerMode:icl */
1471 whitelist_reg(w, GEN10_SAMPLER_MODE);
1473 /* WaEnableStateCacheRedirectToCS:icl */
1474 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1477 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
1479 * This covers 4 register which are next to one another :
1480 * - PS_INVOCATION_COUNT
1481 * - PS_INVOCATION_COUNT_UDW
1483 * - PS_DEPTH_COUNT_UDW
1485 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1486 RING_FORCE_TO_NONPRIV_ACCESS_RD |
1487 RING_FORCE_TO_NONPRIV_RANGE_4);
1490 case VIDEO_DECODE_CLASS:
1491 /* hucStatusRegOffset */
1492 whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
1493 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1494 /* hucUKernelHdrInfoRegOffset */
1495 whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
1496 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1497 /* hucStatus2RegOffset */
1498 whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
1499 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1500 whitelist_reg_ext(w,
1501 RING_CTX_TIMESTAMP(engine->mmio_base),
1502 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1506 whitelist_reg_ext(w,
1507 RING_CTX_TIMESTAMP(engine->mmio_base),
1508 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1513 static void tgl_whitelist_build(struct intel_engine_cs *engine)
1515 struct i915_wa_list *w = &engine->whitelist;
1517 switch (engine->class) {
1520 * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
1523 * This covers 4 registers which are next to one another :
1524 * - PS_INVOCATION_COUNT
1525 * - PS_INVOCATION_COUNT_UDW
1527 * - PS_DEPTH_COUNT_UDW
1529 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1530 RING_FORCE_TO_NONPRIV_ACCESS_RD |
1531 RING_FORCE_TO_NONPRIV_RANGE_4);
1533 /* Wa_1808121037:tgl */
1534 whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
1536 /* Wa_1806527549:tgl */
1537 whitelist_reg(w, HIZ_CHICKEN);
1540 whitelist_reg_ext(w,
1541 RING_CTX_TIMESTAMP(engine->mmio_base),
1542 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1547 static void dg1_whitelist_build(struct intel_engine_cs *engine)
1549 struct i915_wa_list *w = &engine->whitelist;
1551 tgl_whitelist_build(engine);
1553 /* GEN:BUG:1409280441:dg1 */
1554 if (IS_DG1_REVID(engine->i915, DG1_REVID_A0, DG1_REVID_A0) &&
1555 (engine->class == RENDER_CLASS ||
1556 engine->class == COPY_ENGINE_CLASS))
1557 whitelist_reg_ext(w, RING_ID(engine->mmio_base),
1558 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1561 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1563 struct drm_i915_private *i915 = engine->i915;
1564 struct i915_wa_list *w = &engine->whitelist;
1566 wa_init_start(w, "whitelist", engine->name);
1569 dg1_whitelist_build(engine);
1570 else if (IS_GEN(i915, 12))
1571 tgl_whitelist_build(engine);
1572 else if (IS_GEN(i915, 11))
1573 icl_whitelist_build(engine);
1574 else if (IS_CANNONLAKE(i915))
1575 cnl_whitelist_build(engine);
1576 else if (IS_COMETLAKE(i915))
1577 cml_whitelist_build(engine);
1578 else if (IS_COFFEELAKE(i915))
1579 cfl_whitelist_build(engine);
1580 else if (IS_GEMINILAKE(i915))
1581 glk_whitelist_build(engine);
1582 else if (IS_KABYLAKE(i915))
1583 kbl_whitelist_build(engine);
1584 else if (IS_BROXTON(i915))
1585 bxt_whitelist_build(engine);
1586 else if (IS_SKYLAKE(i915))
1587 skl_whitelist_build(engine);
1588 else if (INTEL_GEN(i915) <= 8)
1591 MISSING_CASE(INTEL_GEN(i915));
1596 void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1598 const struct i915_wa_list *wal = &engine->whitelist;
1599 struct intel_uncore *uncore = engine->uncore;
1600 const u32 base = engine->mmio_base;
1607 for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1608 intel_uncore_write(uncore,
1609 RING_FORCE_TO_NONPRIV(base, i),
1610 i915_mmio_reg_offset(wa->reg));
1612 /* And clear the rest just in case of garbage */
1613 for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1614 intel_uncore_write(uncore,
1615 RING_FORCE_TO_NONPRIV(base, i),
1616 i915_mmio_reg_offset(RING_NOPID(base)));
1620 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1622 struct drm_i915_private *i915 = engine->i915;
1624 if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) ||
1625 IS_TGL_UY_GT_STEPPING(i915, STEP_A0, STEP_A0)) {
1627 * Wa_1607138336:tgl[a0],dg1[a0]
1628 * Wa_1607063988:tgl[a0],dg1[a0]
1631 GEN9_CTX_PREEMPT_REG,
1632 GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
1635 if (IS_TGL_UY_GT_STEPPING(i915, STEP_A0, STEP_A0)) {
1638 * (see also Wa_1606682166:icl)
1642 GEN7_DISABLE_SAMPLER_PREFETCH);
1645 if (IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
1646 IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1647 /* Wa_1606931601:tgl,rkl,dg1,adl-s */
1648 wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
1651 * Wa_1407928979:tgl A*
1652 * Wa_18011464164:tgl[B0+],dg1[B0+]
1653 * Wa_22010931296:tgl[B0+],dg1[B0+]
1654 * Wa_14010919138:rkl,dg1,adl-s
1656 wa_write_or(wal, GEN7_FF_THREAD_MODE,
1657 GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
1660 * Wa_1606700617:tgl,dg1
1661 * Wa_22010271021:tgl,rkl,dg1, adl-s
1664 GEN9_CS_DEBUG_MODE1,
1665 FF_DOP_CLOCK_GATE_DISABLE);
1668 if (IS_ALDERLAKE_S(i915) || IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) ||
1669 IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1670 /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s */
1671 wa_masked_en(wal, GEN7_ROW_CHICKEN2,
1672 GEN12_PUSH_CONST_DEREF_HOLD_DIS);
1676 * Wa_14010229206:tgl,rkl,dg1[a0],adl-s
1678 wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
1682 if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) ||
1683 IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1687 * Wa_1607297627:tgl,rkl,dg1[a0]
1689 * On TGL and RKL there are multiple entries for this WA in the
1690 * BSpec; some indicate this is an A0-only WA, others indicate
1691 * it applies to all steppings so we trust the "all steppings."
1692 * For DG1 this only applies to A0.
1695 GEN6_RC_SLEEP_PSMI_CONTROL,
1696 GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
1697 GEN8_RC_SEMA_IDLE_MSG_DISABLE);
1700 if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1701 /* Wa_1406941453:tgl,rkl,dg1 */
1707 if (IS_GEN(i915, 11)) {
1708 /* This is not an Wa. Enable for better image quality */
1711 _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
1713 /* WaPipelineFlushCoherentLines:icl */
1716 GEN8_LQSC_FLUSH_COHERENT_LINES);
1720 * Formerly known as WaGAPZPriorityScheme
1724 GEN11_ARBITRATION_PRIO_ORDER_MASK);
1728 * Formerly known as WaL3BankAddressHashing
1730 wa_write_clr_set(wal,
1732 GEN11_HASH_CTRL_EXCL_MASK,
1733 GEN11_HASH_CTRL_EXCL_BIT0);
1734 wa_write_clr_set(wal,
1736 GEN11_BANK_HASH_ADDR_EXCL_MASK,
1737 GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1741 * Formerly known as WaDisableCleanEvicts
1745 GEN11_LQSC_CLEAN_EVICT_DISABLE);
1747 /* WaForwardProgressSoftReset:icl */
1749 GEN10_SCRATCH_LNCF2,
1750 PMFLUSHDONE_LNICRSDROP |
1751 PMFLUSH_GAPL3UNBLOCK |
1752 PMFLUSHDONE_LNEBLK);
1754 /* Wa_1406609255:icl (pre-prod) */
1755 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1758 GEN7_DISABLE_DEMAND_PREFETCH);
1760 /* Wa_1606682166:icl */
1763 GEN7_DISABLE_SAMPLER_PREFETCH);
1765 /* Wa_1409178092:icl */
1766 wa_write_clr_set(wal,
1768 GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
1771 /* WaEnable32PlaneMode:icl */
1772 wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
1773 GEN11_ENABLE_32_PLANE_MODE);
1776 * Wa_1408615072:icl,ehl (vsunit)
1777 * Wa_1407596294:icl,ehl (hsunit)
1779 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
1780 VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
1782 /* Wa_1407352427:icl,ehl */
1783 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1784 PSDUNIT_CLKGATE_DIS);
1786 /* Wa_1406680159:icl,ehl */
1788 SUBSLICE_UNIT_LEVEL_CLKGATE,
1789 GWUNIT_CLKGATE_DIS);
1792 * Wa_1408767742:icl[a2..forever],ehl[all]
1793 * Wa_1605460711:icl[a0..c0]
1796 GEN7_FF_THREAD_MODE,
1797 GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
1799 /* Wa_22010271021:ehl */
1800 if (IS_JSL_EHL(i915))
1802 GEN9_CS_DEBUG_MODE1,
1803 FF_DOP_CLOCK_GATE_DISABLE);
1806 if (IS_GEN_RANGE(i915, 9, 12)) {
1807 /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */
1809 GEN7_FF_SLICE_CS_CHICKEN1,
1810 GEN9_FFSC_PERCTX_PREEMPT_CTRL);
1813 if (IS_SKYLAKE(i915) ||
1814 IS_KABYLAKE(i915) ||
1815 IS_COFFEELAKE(i915) ||
1816 IS_COMETLAKE(i915)) {
1817 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
1820 GEN9_GAPS_TSV_CREDIT_DISABLE);
1823 if (IS_BROXTON(i915)) {
1824 /* WaDisablePooledEuLoadBalancingFix:bxt */
1826 FF_SLICE_CS_CHICKEN2,
1827 GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1830 if (IS_GEN(i915, 9)) {
1831 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1833 GEN9_CSFE_CHICKEN1_RCS,
1834 GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
1836 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1839 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1841 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1842 if (IS_GEN9_LP(i915))
1843 wa_write_clr_set(wal,
1845 L3_PRIO_CREDITS_MASK,
1846 L3_GENERAL_PRIO_CREDITS(62) |
1847 L3_HIGH_PRIO_CREDITS(2));
1849 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1852 GEN8_LQSC_FLUSH_COHERENT_LINES);
1854 /* Disable atomics in L3 to prevent unrecoverable hangs */
1855 wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1,
1856 GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0);
1857 wa_write_clr_set(wal, GEN8_L3SQCREG4,
1858 GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
1859 wa_write_clr_set(wal, GEN9_SCRATCH1,
1860 EVICTION_PERF_FIX_ENABLE, 0);
1863 if (IS_HASWELL(i915)) {
1864 /* WaSampleCChickenBitEnable:hsw */
1866 HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
1870 /* enable HiZ Raw Stall Optimization */
1871 HIZ_RAW_STALL_OPT_DISABLE);
1873 /* WaDisable4x2SubspanOptimization:hsw */
1874 wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
1877 if (IS_VALLEYVIEW(i915)) {
1878 /* WaDisableEarlyCull:vlv */
1881 _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
1884 * WaVSThreadDispatchOverride:ivb,vlv
1886 * This actually overrides the dispatch
1887 * mode for all thread types.
1889 wa_write_clr_set(wal,
1890 GEN7_FF_THREAD_MODE,
1892 GEN7_FF_TS_SCHED_HW |
1893 GEN7_FF_VS_SCHED_HW |
1894 GEN7_FF_DS_SCHED_HW);
1896 /* WaPsdDispatchEnable:vlv */
1897 /* WaDisablePSDDualDispatchEnable:vlv */
1899 GEN7_HALF_SLICE_CHICKEN1,
1900 GEN7_MAX_PS_THREAD_DEP |
1901 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
1904 if (IS_IVYBRIDGE(i915)) {
1905 /* WaDisableEarlyCull:ivb */
1908 _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
1910 if (0) { /* causes HiZ corruption on ivb:gt1 */
1911 /* enable HiZ Raw Stall Optimization */
1914 HIZ_RAW_STALL_OPT_DISABLE);
1918 * WaVSThreadDispatchOverride:ivb,vlv
1920 * This actually overrides the dispatch
1921 * mode for all thread types.
1923 wa_write_clr_set(wal,
1924 GEN7_FF_THREAD_MODE,
1926 GEN7_FF_TS_SCHED_HW |
1927 GEN7_FF_VS_SCHED_HW |
1928 GEN7_FF_DS_SCHED_HW);
1930 /* WaDisablePSDDualDispatchEnable:ivb */
1931 if (IS_IVB_GT1(i915))
1933 GEN7_HALF_SLICE_CHICKEN1,
1934 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
1937 if (IS_GEN(i915, 7)) {
1938 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
1941 GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);
1943 /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
1944 wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
1947 * BSpec says this must be set, even though
1948 * WaDisable4x2SubspanOptimization:ivb,hsw
1949 * WaDisable4x2SubspanOptimization isn't listed for VLV.
1953 PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
1956 * BSpec recommends 8x4 when MSAA is used,
1957 * however in practice 16x4 seems fastest.
1959 * Note that PS/WM thread counts depend on the WIZ hashing
1960 * disable bit, which we don't touch here, but it's good
1961 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
1963 wa_add(wal, GEN7_GT_MODE, 0,
1964 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK,
1965 GEN6_WIZ_HASHING_16x4),
1966 GEN6_WIZ_HASHING_16x4);
1969 if (IS_GEN_RANGE(i915, 6, 7))
1971 * We need to disable the AsyncFlip performance optimisations in
1972 * order to use MI_WAIT_FOR_EVENT within the CS. It should
1973 * already be programmed to '1' on all products.
1975 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
1979 ASYNC_FLIP_PERF_DISABLE);
1981 if (IS_GEN(i915, 6)) {
1983 * Required for the hardware to program scanline values for
1985 * WaEnableFlushTlbInvalidationMode:snb
1989 GFX_TLB_INVALIDATE_EXPLICIT);
1991 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
1994 _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);
1998 /* WaStripsFansDisableFastClipPerformanceFix:snb */
1999 _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
2002 * "This bit must be set if 3DSTATE_CLIP clip mode is set
2003 * to normal and 3DSTATE_SF number of SF output attributes
2006 _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);
2009 * BSpec recommends 8x4 when MSAA is used,
2010 * however in practice 16x4 seems fastest.
2012 * Note that PS/WM thread counts depend on the WIZ hashing
2013 * disable bit, which we don't touch here, but it's good
2014 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
2018 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
2019 GEN6_WIZ_HASHING_16x4);
2021 /* WaDisable_RenderCache_OperationalFlush:snb */
2022 wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
2025 * From the Sandybridge PRM, volume 1 part 3, page 24:
2026 * "If this bit is set, STCunit will have LRA as replacement
2027 * policy. [...] This bit must be reset. LRA replacement
2028 * policy is not supported."
2032 CM0_STC_EVICT_DISABLE_LRA_SNB);
2035 if (IS_GEN_RANGE(i915, 4, 6))
2036 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
2037 wa_add(wal, MI_MODE,
2038 0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
2039 /* XXX bit doesn't stick on Broadwater */
2040 IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH);
2042 if (IS_GEN(i915, 4))
2044 * Disable CONSTANT_BUFFER before it is loaded from the context
2045 * image. For as it is loaded, it is executed and the stored
2046 * address may no longer be valid, leading to a GPU hang.
2048 * This imposes the requirement that userspace reload their
2049 * CONSTANT_BUFFER on every batch, fortunately a requirement
2050 * they are already accustomed to from before contexts were
2053 wa_add(wal, ECOSKPD,
2054 0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE),
2055 0 /* XXX bit doesn't stick on Broadwater */);
2059 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2061 struct drm_i915_private *i915 = engine->i915;
2063 /* WaKBLVECSSemaphoreWaitPoll:kbl */
2064 if (IS_KBL_GT_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
2066 RING_SEMA_WAIT_POLL(engine->mmio_base),
2072 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2074 if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 4))
2077 if (engine->class == RENDER_CLASS)
2078 rcs_engine_wa_init(engine, wal);
2080 xcs_engine_wa_init(engine, wal);
2083 void intel_engine_init_workarounds(struct intel_engine_cs *engine)
2085 struct i915_wa_list *wal = &engine->wa_list;
2087 if (INTEL_GEN(engine->i915) < 4)
2090 wa_init_start(wal, "engine", engine->name);
2091 engine_init_workarounds(engine, wal);
2092 wa_init_finish(wal);
2095 void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
2097 wa_list_apply(engine->uncore, &engine->wa_list);
2105 static const struct mcr_range mcr_ranges_gen8[] = {
2106 { .start = 0x5500, .end = 0x55ff },
2107 { .start = 0x7000, .end = 0x7fff },
2108 { .start = 0x9400, .end = 0x97ff },
2109 { .start = 0xb000, .end = 0xb3ff },
2110 { .start = 0xe000, .end = 0xe7ff },
2114 static const struct mcr_range mcr_ranges_gen12[] = {
2115 { .start = 0x8150, .end = 0x815f },
2116 { .start = 0x9520, .end = 0x955f },
2117 { .start = 0xb100, .end = 0xb3ff },
2118 { .start = 0xde80, .end = 0xe8ff },
2119 { .start = 0x24a00, .end = 0x24a7f },
2123 static bool mcr_range(struct drm_i915_private *i915, u32 offset)
2125 const struct mcr_range *mcr_ranges;
2128 if (INTEL_GEN(i915) >= 12)
2129 mcr_ranges = mcr_ranges_gen12;
2130 else if (INTEL_GEN(i915) >= 8)
2131 mcr_ranges = mcr_ranges_gen8;
2136 * Registers in these ranges are affected by the MCR selector
2137 * which only controls CPU initiated MMIO. Routing does not
2138 * work for CS access so we cannot verify them on this path.
2140 for (i = 0; mcr_ranges[i].start; i++)
2141 if (offset >= mcr_ranges[i].start &&
2142 offset <= mcr_ranges[i].end)
2149 wa_list_srm(struct i915_request *rq,
2150 const struct i915_wa_list *wal,
2151 struct i915_vma *vma)
2153 struct drm_i915_private *i915 = rq->engine->i915;
2154 unsigned int i, count = 0;
2155 const struct i915_wa *wa;
2158 srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
2159 if (INTEL_GEN(i915) >= 8)
2162 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
2163 if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg)))
2167 cs = intel_ring_begin(rq, 4 * count);
2171 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
2172 u32 offset = i915_mmio_reg_offset(wa->reg);
2174 if (mcr_range(i915, offset))
2179 *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
2182 intel_ring_advance(rq, cs);
2187 static int engine_wa_list_verify(struct intel_context *ce,
2188 const struct i915_wa_list * const wal,
2191 const struct i915_wa *wa;
2192 struct i915_request *rq;
2193 struct i915_vma *vma;
2194 struct i915_gem_ww_ctx ww;
2202 vma = __vm_create_scratch_for_read(&ce->engine->gt->ggtt->vm,
2203 wal->count * sizeof(u32));
2205 return PTR_ERR(vma);
2207 intel_engine_pm_get(ce->engine);
2208 i915_gem_ww_ctx_init(&ww, false);
2210 err = i915_gem_object_lock(vma->obj, &ww);
2212 err = intel_context_pin_ww(ce, &ww);
2216 rq = i915_request_create(ce);
2222 err = i915_request_await_object(rq, vma->obj, true);
2224 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
2226 err = wa_list_srm(rq, wal, vma);
2228 i915_request_get(rq);
2230 i915_request_set_error_once(rq, err);
2231 i915_request_add(rq);
2236 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2241 results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
2242 if (IS_ERR(results)) {
2243 err = PTR_ERR(results);
2248 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
2249 if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg)))
2252 if (!wa_verify(wa, results[i], wal->name, from))
2256 i915_gem_object_unpin_map(vma->obj);
2259 i915_request_put(rq);
2261 intel_context_unpin(ce);
2263 if (err == -EDEADLK) {
2264 err = i915_gem_ww_ctx_backoff(&ww);
2268 i915_gem_ww_ctx_fini(&ww);
2269 intel_engine_pm_put(ce->engine);
2270 i915_vma_unpin(vma);
2275 int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
2278 return engine_wa_list_verify(engine->kernel_context,
2283 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2284 #include "selftest_workarounds.c"