drm/i915/selftest: Fix workarounds selftest for GuC submission
authorRahul Kumar Singh <rahul.kumar.singh@intel.com>
Tue, 27 Jul 2021 00:23:42 +0000 (17:23 -0700)
committerJohn Harrison <John.C.Harrison@Intel.com>
Wed, 28 Jul 2021 00:32:18 +0000 (17:32 -0700)
When GuC submission is enabled, the GuC controls engine resets. Rather
than explicitly triggering a reset, the driver must submit a hanging
context to GuC and wait for the reset to occur.

Signed-off-by: Rahul Kumar Singh <rahul.kumar.singh@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-28-matthew.brost@intel.com
drivers/gpu/drm/i915/Makefile
drivers/gpu/drm/i915/gt/intel_engine_types.h
drivers/gpu/drm/i915/gt/selftest_workarounds.c
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c [new file with mode: 0644]
drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h [new file with mode: 0644]

index 10b3bb6..ab76799 100644 (file)
@@ -280,6 +280,7 @@ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
 i915-$(CONFIG_DRM_I915_SELFTEST) += \
        gem/selftests/i915_gem_client_blt.o \
        gem/selftests/igt_gem_utils.o \
+       selftests/intel_scheduler_helpers.o \
        selftests/i915_random.o \
        selftests/i915_selftest.o \
        selftests/igt_atomic.o \
index 260cce1..ed91bcf 100644 (file)
@@ -443,6 +443,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_IS_VIRTUAL       BIT(5)
 #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
 #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
+#define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8)
        unsigned int flags;
 
        /*
index 7a38ce4..ba7ee69 100644 (file)
@@ -12,6 +12,7 @@
 #include "selftests/igt_flush_test.h"
 #include "selftests/igt_reset.h"
 #include "selftests/igt_spinner.h"
+#include "selftests/intel_scheduler_helpers.h"
 #include "selftests/mock_drm.h"
 
 #include "gem/selftests/igt_gem_utils.h"
@@ -261,28 +262,34 @@ static int do_engine_reset(struct intel_engine_cs *engine)
        return intel_engine_reset(engine, "live_workarounds");
 }
 
+static int do_guc_reset(struct intel_engine_cs *engine)
+{
+       /* Currently a no-op as the reset is handled by GuC */
+       return 0;
+}
+
 static int
 switch_to_scratch_context(struct intel_engine_cs *engine,
-                         struct igt_spinner *spin)
+                         struct igt_spinner *spin,
+                         struct i915_request **rq)
 {
        struct intel_context *ce;
-       struct i915_request *rq;
        int err = 0;
 
        ce = intel_context_create(engine);
        if (IS_ERR(ce))
                return PTR_ERR(ce);
 
-       rq = igt_spinner_create_request(spin, ce, MI_NOOP);
+       *rq = igt_spinner_create_request(spin, ce, MI_NOOP);
        intel_context_put(ce);
 
-       if (IS_ERR(rq)) {
+       if (IS_ERR(*rq)) {
                spin = NULL;
-               err = PTR_ERR(rq);
+               err = PTR_ERR(*rq);
                goto err;
        }
 
-       err = request_add_spin(rq, spin);
+       err = request_add_spin(*rq, spin);
 err:
        if (err && spin)
                igt_spinner_end(spin);
@@ -296,6 +303,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
 {
        struct intel_context *ce, *tmp;
        struct igt_spinner spin;
+       struct i915_request *rq;
        intel_wakeref_t wakeref;
        int err;
 
@@ -316,13 +324,24 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
                goto out_spin;
        }
 
-       err = switch_to_scratch_context(engine, &spin);
+       err = switch_to_scratch_context(engine, &spin, &rq);
        if (err)
                goto out_spin;
 
+       /* Ensure the spinner hasn't aborted */
+       if (i915_request_completed(rq)) {
+               pr_err("%s spinner failed to start\n", name);
+               err = -ETIMEDOUT;
+               goto out_spin;
+       }
+
        with_intel_runtime_pm(engine->uncore->rpm, wakeref)
                err = reset(engine);
 
+       /* Ensure the reset happens and kills the engine */
+       if (err == 0)
+               err = intel_selftest_wait_for_rq(rq);
+
        igt_spinner_end(&spin);
 
        if (err) {
@@ -787,9 +806,27 @@ static int live_reset_whitelist(void *arg)
                        continue;
 
                if (intel_has_reset_engine(gt)) {
-                       err = check_whitelist_across_reset(engine,
-                                                          do_engine_reset,
-                                                          "engine");
+                       if (intel_engine_uses_guc(engine)) {
+                               struct intel_selftest_saved_policy saved;
+                               int err2;
+
+                               err = intel_selftest_modify_policy(engine, &saved);
+                               if (err)
+                                       goto out;
+
+                               err = check_whitelist_across_reset(engine,
+                                                                  do_guc_reset,
+                                                                  "guc");
+
+                               err2 = intel_selftest_restore_policy(engine, &saved);
+                               if (err == 0)
+                                       err = err2;
+                       } else {
+                               err = check_whitelist_across_reset(engine,
+                                                                  do_engine_reset,
+                                                                  "engine");
+                       }
+
                        if (err)
                                goto out;
                }
@@ -1235,31 +1272,40 @@ live_engine_reset_workarounds(void *arg)
        reference_lists_init(gt, lists);
 
        for_each_engine(engine, gt, id) {
+               struct intel_selftest_saved_policy saved;
+               bool using_guc = intel_engine_uses_guc(engine);
                bool ok;
+               int ret2;
 
                pr_info("Verifying after %s reset...\n", engine->name);
+               ret = intel_selftest_modify_policy(engine, &saved);
+               if (ret)
+                       break;
+
                ce = intel_context_create(engine);
                if (IS_ERR(ce)) {
                        ret = PTR_ERR(ce);
-                       break;
+                       goto restore;
                }
 
-               ok = verify_wa_lists(gt, lists, "before reset");
-               if (!ok) {
-                       ret = -ESRCH;
-                       goto err;
-               }
+               if (!using_guc) {
+                       ok = verify_wa_lists(gt, lists, "before reset");
+                       if (!ok) {
+                               ret = -ESRCH;
+                               goto err;
+                       }
 
-               ret = intel_engine_reset(engine, "live_workarounds:idle");
-               if (ret) {
-                       pr_err("%s: Reset failed while idle\n", engine->name);
-                       goto err;
-               }
+                       ret = intel_engine_reset(engine, "live_workarounds:idle");
+                       if (ret) {
+                               pr_err("%s: Reset failed while idle\n", engine->name);
+                               goto err;
+                       }
 
-               ok = verify_wa_lists(gt, lists, "after idle reset");
-               if (!ok) {
-                       ret = -ESRCH;
-                       goto err;
+                       ok = verify_wa_lists(gt, lists, "after idle reset");
+                       if (!ok) {
+                               ret = -ESRCH;
+                               goto err;
+                       }
                }
 
                ret = igt_spinner_init(&spin, engine->gt);
@@ -1280,25 +1326,41 @@ live_engine_reset_workarounds(void *arg)
                        goto err;
                }
 
-               ret = intel_engine_reset(engine, "live_workarounds:active");
-               if (ret) {
-                       pr_err("%s: Reset failed on an active spinner\n",
-                              engine->name);
-                       igt_spinner_fini(&spin);
-                       goto err;
+               /* Ensure the spinner hasn't aborted */
+               if (i915_request_completed(rq)) {
+                       ret = -ETIMEDOUT;
+                       goto skip;
+               }
+
+               if (!using_guc) {
+                       ret = intel_engine_reset(engine, "live_workarounds:active");
+                       if (ret) {
+                               pr_err("%s: Reset failed on an active spinner\n",
+                                      engine->name);
+                               igt_spinner_fini(&spin);
+                               goto err;
+                       }
                }
 
+               /* Ensure the reset happens and kills the engine */
+               if (ret == 0)
+                       ret = intel_selftest_wait_for_rq(rq);
+
+skip:
                igt_spinner_end(&spin);
                igt_spinner_fini(&spin);
 
                ok = verify_wa_lists(gt, lists, "after busy reset");
-               if (!ok) {
+               if (!ok)
                        ret = -ESRCH;
-                       goto err;
-               }
 
 err:
                intel_context_put(ce);
+
+restore:
+               ret2 = intel_selftest_restore_policy(engine, &saved);
+               if (ret == 0)
+                       ret = ret2;
                if (ret)
                        break;
        }
index ee4f1f9..3ff42d6 100644 (file)
@@ -1252,6 +1252,9 @@ static void guc_context_policy_init(struct intel_engine_cs *engine,
 {
        desc->policy_flags = 0;
 
+       if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
+               desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE;
+
        /* NB: For both of these, zero means disabled. */
        desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
        desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c
new file mode 100644 (file)
index 0000000..5cdee13
--- /dev/null
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+//#include "gt/intel_engine_user.h"
+#include "gt/intel_gt.h"
+#include "i915_drv.h"
+#include "i915_selftest.h"
+
+#include "selftests/intel_scheduler_helpers.h"
+
+#define REDUCED_TIMESLICE      5
+#define REDUCED_PREEMPT                10
+#define WAIT_FOR_RESET_TIME    1000
+
+int intel_selftest_modify_policy(struct intel_engine_cs *engine,
+                                struct intel_selftest_saved_policy *saved)
+
+{
+       int err;
+
+       saved->reset = engine->i915->params.reset;
+       saved->flags = engine->flags;
+       saved->timeslice = engine->props.timeslice_duration_ms;
+       saved->preempt_timeout = engine->props.preempt_timeout_ms;
+
+       /*
+        * Enable force pre-emption on time slice expiration
+        * together with engine reset on pre-emption timeout.
+        * This is required to make the GuC notice and reset
+        * the single hanging context.
+        * Also, reduce the preemption timeout to something
+        * small to speed the test up.
+        */
+       engine->i915->params.reset = 2;
+       engine->flags |= I915_ENGINE_WANT_FORCED_PREEMPTION;
+       engine->props.timeslice_duration_ms = REDUCED_TIMESLICE;
+       engine->props.preempt_timeout_ms = REDUCED_PREEMPT;
+
+       if (!intel_engine_uses_guc(engine))
+               return 0;
+
+       err = intel_guc_global_policies_update(&engine->gt->uc.guc);
+       if (err)
+               intel_selftest_restore_policy(engine, saved);
+
+       return err;
+}
+
+int intel_selftest_restore_policy(struct intel_engine_cs *engine,
+                                 struct intel_selftest_saved_policy *saved)
+{
+       /* Restore the original policies */
+       engine->i915->params.reset = saved->reset;
+       engine->flags = saved->flags;
+       engine->props.timeslice_duration_ms = saved->timeslice;
+       engine->props.preempt_timeout_ms = saved->preempt_timeout;
+
+       if (!intel_engine_uses_guc(engine))
+               return 0;
+
+       return intel_guc_global_policies_update(&engine->gt->uc.guc);
+}
+
+int intel_selftest_wait_for_rq(struct i915_request *rq)
+{
+       long ret;
+
+       ret = i915_request_wait(rq, 0, WAIT_FOR_RESET_TIME);
+       if (ret < 0)
+               return ret;
+
+       return 0;
+}
diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h
new file mode 100644 (file)
index 0000000..79605b1
--- /dev/null
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _INTEL_SELFTEST_SCHEDULER_HELPERS_H_
+#define _INTEL_SELFTEST_SCHEDULER_HELPERS_H_
+
+#include <linux/types.h>
+
+struct i915_request;
+struct intel_engine_cs;
+
+struct intel_selftest_saved_policy {
+       u32 flags;
+       u32 reset;
+       u64 timeslice;
+       u64 preempt_timeout;
+};
+
+int intel_selftest_modify_policy(struct intel_engine_cs *engine,
+                                struct intel_selftest_saved_policy *saved);
+int intel_selftest_restore_policy(struct intel_engine_cs *engine,
+                                 struct intel_selftest_saved_policy *saved);
+int intel_selftest_wait_for_rq(struct i915_request *rq);
+
+#endif