From 73c7a8a871dc9aa6b7876c1a30bdbe0f899eb4f6 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 26 Aug 2022 14:27:18 -0700 Subject: [PATCH] drm/i915/ats-m: Add thread execution tuning setting On client DG2 platforms, optimal performance is achieved with the hardware's default "age based" thread execution setting. However on ATS-M, switching this to "round robin after dependencies" provides better performance. We'll add a new "tuning" feature flag to the ATS-M device info to enable/disable this setting. Bspec: 68331 Cc: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20220826212718.409948-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 2 ++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 9 +++++++++ drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/intel_device_info.h | 1 + 4 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 94f9ddc..d414785 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1110,6 +1110,8 @@ #define GEN12_DISABLE_TDL_PUSH REG_BIT(9) #define GEN11_DIS_PICK_2ND_EU REG_BIT(7) #define GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX REG_BIT(4) +#define THREAD_EX_ARB_MODE REG_GENMASK(3, 2) +#define THREAD_EX_ARB_MODE_RR_AFTER_DEP REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2) #define HSW_ROW_CHICKEN3 _MMIO(0xe49c) #define HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE (1 << 6) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 69a0c6a..6d2003d 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2700,6 +2700,15 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, 0 /* write-only, so skip validation */, true); } + + /* + * This tuning setting proves beneficial only on ATS-M designs; the + * default "age based" setting is optimal on regular DG2 and other + * platforms. + */ + if (INTEL_INFO(i915)->tuning_thread_rr_after_dep) + wa_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE, + THREAD_EX_ARB_MODE_RR_AFTER_DEP); } /* diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 3e3e95c..7b03843 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1083,6 +1083,7 @@ static const struct intel_device_info ats_m_info = { DG2_FEATURES, .display = { 0 }, .require_force_probe = 1, + .tuning_thread_rr_after_dep = 1, }; #define XE_HPC_FEATURES \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 677fb68..9cd912c 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -172,6 +172,7 @@ enum intel_ppgtt_type { func(has_runtime_pm); \ func(has_snoop); \ func(has_coherent_ggtt); \ + func(tuning_thread_rr_after_dep); \ func(unfenced_needs_alignment); \ func(hws_needs_physical); -- 2.7.4