drm/i915/gt: Try to more gracefully quiesce the system before resets

author Chris Wilson <chris@chris-wilson.co.uk>

Wed, 23 Oct 2019 13:31:04 +0000 (14:31 +0100)

committer Chris Wilson <chris@chris-wilson.co.uk>

Wed, 23 Oct 2019 22:52:10 +0000 (23:52 +0100)
author Chris Wilson <chris@chris-wilson.co.uk>
Wed, 23 Oct 2019 13:31:04 +0000 (14:31 +0100)
committer Chris Wilson <chris@chris-wilson.co.uk>
Wed, 23 Oct 2019 22:52:10 +0000 (23:52 +0100)
diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile

index 48df8889a88a7fecc31a5e0979c39761c697d57d..3a3881d5e44ba1388594fc30b71e98b1b542de77 100644 (file)
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -25,3 +25,14 @@ config DRM_I915_SPIN_REQUEST
           May be 0 to disable the initial spin. In practice, we estimate
           the cost of enabling the interrupt (if currently disabled) to be
           a few microseconds.
+
+config DRM_I915_STOP_TIMEOUT
+       int "How long to wait for an engine to quiesce gracefully before reset (ms)"
+       default 100 # milliseconds
+       help
+         By stopping submission and sleeping for a short time before resetting
+         the GPU, we allow the innocent contexts also on the system to quiesce.
+         It is then less likely for a hanging context to cause collateral
+         damage as the system is reset in order to recover. The corollary is
+         that the reset itself may take longer and so be more disruptive to
+         interactive or low latency workloads.
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c

index 0e20713603ec1f6d3f1b4368d7f3ac1c667b7d10..e4203eb4413924730c9156e0dd9384b2c478023c 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -308,6 +308,9 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
         engine->instance = info->instance;
         __sprint_engine_name(engine);
  
+       engine->props.stop_timeout_ms =
+               CONFIG_DRM_I915_STOP_TIMEOUT;
+
         /*
          * To be overridden by the backend on setup. However to facilitate
          * cleanup on error during setup, we always provide the destroy vfunc.
@@ -875,6 +878,21 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)
         return bbaddr;
  }
  
+static unsigned long stop_timeout(const struct intel_engine_cs *engine)
+{
+       if (in_atomic() || irqs_disabled()) /* inside atomic preempt-reset? */
+               return 0;
+
+       /*
+        * If we are doing a normal GPU reset, we can take our time and allow
+        * the engine to quiesce. We've stopped submission to the engine, and
+        * if we wait long enough an innocent context should complete and
+        * leave the engine idle. So they should not be caught unaware by
+        * the forthcoming GPU reset (which usually follows the stop_cs)!
+        */
+       return READ_ONCE(engine->props.stop_timeout_ms);
+}
+
  int intel_engine_stop_cs(struct intel_engine_cs *engine)
  {
         struct intel_uncore *uncore = engine->uncore;
@@ -892,7 +910,7 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine)
         err = 0;
         if (__intel_wait_for_register_fw(uncore,
                                          mode, MODE_IDLE, MODE_IDLE,
-                                        1000, 0,
+                                        1000, stop_timeout(engine),
                                          NULL)) {
                 GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name);
                 err = -ETIMEDOUT;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h

index 3451be034caf4c3b6396dea6249c1c0f577ac249..87d5c4ef3ae7754ac79363077a2c2aba83047cef 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -542,6 +542,10 @@ struct intel_engine_cs {
                  */
                 ktime_t total;
         } stats;
+
+       struct {
+               unsigned long stop_timeout_ms;
+       } props;
  };
  
  static inline bool
author	Chris Wilson <chris@chris-wilson.co.uk>
	Wed, 23 Oct 2019 13:31:04 +0000 (14:31 +0100)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Wed, 23 Oct 2019 22:52:10 +0000 (23:52 +0100)
drivers/gpu/drm/i915/Kconfig.profile		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_engine_cs.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_engine_types.h		patch \| blob \| history