May be 0 to disable the initial spin. In practice, we estimate
the cost of enabling the interrupt (if currently disabled) to be
a few microseconds.
+
+config DRM_I915_STOP_TIMEOUT
+ int "How long to wait for an engine to quiesce gracefully before reset (ms)"
+ default 100 # milliseconds
+ help
+ By stopping submission and sleeping for a short time before resetting
+ the GPU, we allow the innocent contexts also on the system to quiesce.
+ It is then less likely for a hanging context to cause collateral
+ damage as the system is reset in order to recover. The corollary is
+ that the reset itself may take longer and so be more disruptive to
+ interactive or low latency workloads.
engine->instance = info->instance;
__sprint_engine_name(engine);
+ engine->props.stop_timeout_ms =
+ CONFIG_DRM_I915_STOP_TIMEOUT;
+
/*
* To be overridden by the backend on setup. However to facilitate
* cleanup on error during setup, we always provide the destroy vfunc.
return bbaddr;
}
+static unsigned long stop_timeout(const struct intel_engine_cs *engine)
+{
+ if (in_atomic() || irqs_disabled()) /* inside atomic preempt-reset? */
+ return 0;
+
+ /*
+ * If we are doing a normal GPU reset, we can take our time and allow
+ * the engine to quiesce. We've stopped submission to the engine, and
+ * if we wait long enough an innocent context should complete and
+ * leave the engine idle. So they should not be caught unaware by
+ * the forthcoming GPU reset (which usually follows the stop_cs)!
+ */
+ return READ_ONCE(engine->props.stop_timeout_ms);
+}
+
int intel_engine_stop_cs(struct intel_engine_cs *engine)
{
struct intel_uncore *uncore = engine->uncore;
err = 0;
if (__intel_wait_for_register_fw(uncore,
mode, MODE_IDLE, MODE_IDLE,
- 1000, 0,
+ 1000, stop_timeout(engine),
NULL)) {
GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name);
err = -ETIMEDOUT;