From 6b332fa20f671265638d1d62496f9607c5f6e92f Mon Sep 17 00:00:00 2001 From: Arun Siluvery Date: Mon, 4 Apr 2016 18:50:56 +0100 Subject: [PATCH] drm/i915/guc: reset GuC and retry on firmware load failure Due to timing issues in the HW, some of the status bits required for GuC authentication occasionally don't get set; when that happens, the GuC cannot be initialized and we will be left with a wedged GPU. The W/A suggested is to perform a soft reset of the GuC and attempt to reload the F/W again for few times before giving up. As the failure is dependent on timing, tests performed by triggering manual full gpu reset (i915_wedged) showed that we could sometimes hit this after several thousand iterations, but sometimes tests ran even longer without any issues. Reset and reload mechanism proved helpful when we indeed hit f/w load failure, so it is better to include this to improve driver stability. This change implements the following WAs, WaEnableuKernelHeaderValidFix:skl,bxt WaEnableGuCBootHashCheckNotSet:skl,bxt Signed-off-by: Arun Siluvery Signed-off-by: Dave Gordon Reviewed-by: Alex Dai Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_guc_reg.h | 1 + drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_guc_loader.c | 49 +++++++++++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_uncore.c | 19 +++++++++++++ 5 files changed, 69 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6443745..466b8b6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2747,6 +2747,7 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd, extern int intel_gpu_reset(struct drm_device *dev, u32 engine_mask); extern bool intel_has_gpu_reset(struct drm_device *dev); extern int i915_reset(struct drm_device *dev); +extern int intel_guc_reset(struct drm_i915_private *dev_priv); extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine); extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv); extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h b/drivers/gpu/drm/i915/i915_guc_reg.h index e4ba582..94ceee5 100644 --- a/drivers/gpu/drm/i915/i915_guc_reg.h +++ b/drivers/gpu/drm/i915/i915_guc_reg.h @@ -27,6 +27,7 @@ /* Definitions of GuC H/W registers, bits, etc */ #define GUC_STATUS _MMIO(0xc000) +#define GS_MIA_IN_RESET (1 << 0) #define GS_BOOTROM_SHIFT 1 #define GS_BOOTROM_MASK (0x7F << GS_BOOTROM_SHIFT) #define GS_BOOTROM_RSA_FAILED (0x50 << GS_BOOTROM_SHIFT) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 683274f..30fea34 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -165,6 +165,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN6_GRDOM_MEDIA (1 << 2) #define GEN6_GRDOM_BLT (1 << 3) #define GEN6_GRDOM_VECS (1 << 4) +#define GEN9_GRDOM_GUC (1 << 5) #define GEN8_GRDOM_MEDIA2 (1 << 7) #define RING_PP_DIR_BASE(ring) _MMIO((ring)->mmio_base+0x228) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index b4976f9..d84c560 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -353,6 +353,24 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) return ret; } +static int i915_reset_guc(struct drm_i915_private *dev_priv) +{ + int ret; + u32 guc_status; + + ret = intel_guc_reset(dev_priv); + if (ret) { + DRM_ERROR("GuC reset failed, ret = %d\n", ret); + return ret; + } + + guc_status = I915_READ(GUC_STATUS); + WARN(!(guc_status & GS_MIA_IN_RESET), + "GuC status: 0x%x, MIA core expected to be in reset\n", guc_status); + + return ret; +} + /** * intel_guc_ucode_load() - load GuC uCode into the device * @dev: drm device @@ -417,9 +435,36 @@ int intel_guc_ucode_load(struct drm_device *dev) if (err) goto fail; + /* + * WaEnableuKernelHeaderValidFix:skl,bxt + * For BXT, this is only upto B0 but below WA is required for later + * steppings also so this is extended as well. + */ + /* WaEnableGuCBootHashCheckNotSet:skl,bxt */ err = guc_ucode_xfer(dev_priv); - if (err) - goto fail; + if (err) { + int retries = 3; + + DRM_ERROR("GuC fw load failed, err=%d, attempting reset and retry\n", err); + + while (retries--) { + err = i915_reset_guc(dev_priv); + if (err) + break; + + err = guc_ucode_xfer(dev_priv); + if (!err) { + DRM_DEBUG_DRIVER("GuC fw reload succeeded after reset\n"); + break; + } + DRM_DEBUG_DRIVER("GuC fw reload retries left: %d\n", retries); + } + + if (err) { + DRM_ERROR("GuC fw reload attempt failed, ret=%d\n", err); + goto fail; + } + } guc_fw->guc_fw_load_status = GUC_FIRMWARE_SUCCESS; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index ac1c545..fbc1d21 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1673,6 +1673,25 @@ bool intel_has_gpu_reset(struct drm_device *dev) return intel_get_gpu_reset(dev) != NULL; } +int intel_guc_reset(struct drm_i915_private *dev_priv) +{ + int ret; + unsigned long irqflags; + + if (!i915.enable_guc_submission) + return -EINVAL; + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + + ret = gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC); + + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + + return ret; +} + bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv) { return check_for_unclaimed_mmio(dev_priv); -- 2.7.4