From 56c7f0e2026328e56106b54cb0e2fe3a7e70ce4f Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 27 Jul 2022 19:20:23 -0700 Subject: [PATCH] drm/i915/guc: Fix capture size warning and bump the size There was a size check to warn if the GuC error state capture buffer allocation would be too small to fit a reasonable amount of capture data for the current platform. Unfortunately, the test was done too early in the boot sequence and was actually testing 'if(-ENODEV > size)'. Move the check to be later. The check is only used to print a warning message, so it doesn't really matter how early or late it is done. Note that it is not possible to dynamically size the buffer because the allocation needs to be done before the engine information is available (at least, it would be in the intended two-phase GuC init process). Now that the check works, it is reporting size too small for newer platforms. The check includes a 3x oversample multiplier to allow for multiple error captures to be bufferd by GuC before i915 has a chance to read them out. This is less important than simply being big enough to fit the first capture. So a) bump the default size to be large enough for one capture minimum and b) make the warning only if one capture won't fit, instead use a notice for the 3x size. Note that the size estimate is a worst case scenario. Actual captures will likely be smaller. Lastly, use drm_warn istead of DRM_WARN as the former provides more infmration and the latter is deprecated. Signed-off-by: John Harrison Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20220728022028.2190627-3-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c | 40 +++++++++++++++++++------- drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h | 1 - drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 4 --- drivers/gpu/drm/i915/gt/uc/intel_guc_log.h | 4 +-- 4 files changed, 31 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index 75257bd..b54b788 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -600,10 +600,8 @@ intel_guc_capture_getnullheader(struct intel_guc *guc, return 0; } -#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3 - -int -intel_guc_capture_output_min_size_est(struct intel_guc *guc) +static int +guc_capture_output_min_size_est(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); struct intel_engine_cs *engine; @@ -623,13 +621,8 @@ intel_guc_capture_output_min_size_est(struct intel_guc *guc) * For each engine instance, there would be 1 x guc_state_capture_group_t output * followed by 3 x guc_state_capture_t lists. The latter is how the register * dumps are split across different register types (where the '3' are global vs class - * vs instance). Finally, let's multiply the whole thing by 3x (just so we are - * not limited to just 1 round of data in a worst case full register dump log) - * - * NOTE: intel_guc_log that allocates the log buffer would round this size up to - * a power of two. + * vs instance). */ - for_each_engine(engine, gt, id) { worst_min_size += sizeof(struct guc_state_capture_group_header_t) + (3 * sizeof(struct guc_state_capture_header_t)); @@ -649,7 +642,30 @@ intel_guc_capture_output_min_size_est(struct intel_guc *guc) worst_min_size += (num_regs * sizeof(struct guc_mmio_reg)); - return (worst_min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER); + return worst_min_size; +} + +/* + * Add on a 3x multiplier to allow for multiple back-to-back captures occurring + * before the i915 can read the data out and process it + */ +#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3 + +static void check_guc_capture_size(struct intel_guc *guc) +{ + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + int min_size = guc_capture_output_min_size_est(guc); + int spare_size = min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER; + + if (min_size < 0) + drm_warn(&i915->drm, "Failed to calculate GuC error state capture buffer minimum size: %d!\n", + min_size); + else if (min_size > CAPTURE_BUFFER_SIZE) + drm_warn(&i915->drm, "GuC error state capture buffer is too small: %d < %d\n", + CAPTURE_BUFFER_SIZE, min_size); + else if (spare_size > CAPTURE_BUFFER_SIZE) + drm_notice(&i915->drm, "GuC error state capture buffer maybe too small: %d < %d (min = %d)\n", + CAPTURE_BUFFER_SIZE, spare_size, min_size); } /* @@ -1580,5 +1596,7 @@ int intel_guc_capture_init(struct intel_guc *guc) INIT_LIST_HEAD(&guc->capture->outlist); INIT_LIST_HEAD(&guc->capture->cachelist); + check_guc_capture_size(guc); + return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h index d3d7bd0..fbd3713 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h @@ -21,7 +21,6 @@ int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *m, void intel_guc_capture_get_matching_node(struct intel_gt *gt, struct intel_engine_coredump *ee, struct intel_context *ce); void intel_guc_capture_process(struct intel_guc *guc); -int intel_guc_capture_output_min_size_est(struct intel_guc *guc); int intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classid, void **outptr); int intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index e52ac17..cc8f3d1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -487,10 +487,6 @@ int intel_guc_log_create(struct intel_guc_log *log) GEM_BUG_ON(log->vma); - if (intel_guc_capture_output_min_size_est(guc) > CAPTURE_BUFFER_SIZE) - DRM_WARN("GuC log buffer for state_capture maybe too small. %d < %d\n", - CAPTURE_BUFFER_SIZE, intel_guc_capture_output_min_size_est(guc)); - guc_log_size = intel_guc_log_size(log); vma = intel_guc_allocate_vma(guc, guc_log_size); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index 18007e6..dc97154 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -22,11 +22,11 @@ struct intel_guc; #elif defined(CONFIG_DRM_I915_DEBUG_GEM) #define CRASH_BUFFER_SIZE SZ_1M #define DEBUG_BUFFER_SIZE SZ_2M -#define CAPTURE_BUFFER_SIZE SZ_1M +#define CAPTURE_BUFFER_SIZE SZ_4M #else #define CRASH_BUFFER_SIZE SZ_8K #define DEBUG_BUFFER_SIZE SZ_64K -#define CAPTURE_BUFFER_SIZE SZ_16K +#define CAPTURE_BUFFER_SIZE SZ_2M #endif /* -- 2.7.4