habanalabs: support hard-reset scheduling during soft-reset
authorOfir Bitton <obitton@habana.ai>
Tue, 23 Nov 2021 14:34:28 +0000 (16:34 +0200)
committerOded Gabbay <ogabbay@kernel.org>
Sun, 26 Dec 2021 12:42:31 +0000 (14:42 +0200)
As hard-reset can be requested during soft-reset, driver must allow
it or else critical events received during soft-reset will be
ignored.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/misc/habanalabs/common/device.c
drivers/misc/habanalabs/common/habanalabs.h

index 84621ad..733338a 100644 (file)
@@ -978,7 +978,7 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
 int hl_device_reset(struct hl_device *hdev, u32 flags)
 {
        bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false,
-                                                               reset_upon_device_release = false;
+                       reset_upon_device_release = false, schedule_hard_reset = false;
        u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
        struct hl_ctx *ctx;
        int i, rc;
@@ -1031,6 +1031,9 @@ do_reset:
                /* Block future CS/VM/JOB completion operations */
                spin_lock(&hdev->reset_info.lock);
                if (hdev->reset_info.in_reset) {
+                       /* We only allow scheduling of a hard reset during soft reset */
+                       if (hard_reset && hdev->reset_info.is_in_soft_reset)
+                               hdev->reset_info.hard_reset_schedule_flags = flags;
                        spin_unlock(&hdev->reset_info.lock);
                        return 0;
                }
@@ -1193,7 +1196,6 @@ kill_processes:
         * is required for the initialization itself
         */
        hdev->disabled = false;
-       hdev->reset_info.is_in_soft_reset = false;
 
        rc = hdev->asic_funcs->hw_init(hdev);
        if (rc) {
@@ -1243,7 +1245,20 @@ kill_processes:
                }
        }
 
-       hdev->reset_info.in_reset = 0;
+       spin_lock(&hdev->reset_info.lock);
+       hdev->reset_info.is_in_soft_reset = false;
+
+       /* Schedule hard reset only if requested and if not already in hard reset.
+        * We keep 'in_reset' enabled, so no other reset can go in during the hard
+        * reset schedule
+        */
+       if (!hard_reset && hdev->reset_info.hard_reset_schedule_flags)
+               schedule_hard_reset = true;
+       else
+               hdev->reset_info.in_reset = 0;
+
+       spin_unlock(&hdev->reset_info.lock);
+
        hdev->reset_info.needs_reset = false;
 
        dev_notice(hdev->dev, "Successfully finished resetting the device\n");
@@ -1261,6 +1276,16 @@ kill_processes:
                hdev->reset_info.soft_reset_cnt++;
        }
 
+       if (schedule_hard_reset) {
+               dev_info(hdev->dev, "Performing hard reset scheduled during soft reset\n");
+               flags = hdev->reset_info.hard_reset_schedule_flags;
+               hdev->reset_info.hard_reset_schedule_flags = 0;
+               hdev->disabled = true;
+               hard_reset = true;
+               handle_reset_trigger(hdev, flags);
+               goto again;
+       }
+
        return 0;
 
 out_err:
index 37a3a46..cb710fd 100644 (file)
@@ -2460,6 +2460,8 @@ struct last_error_session_info {
  * @lock: lock to protect critical reset flows.
  * @soft_reset_cnt: number of soft reset since the driver was loaded.
  * @hard_reset_cnt: number of hard reset since the driver was loaded.
+ * @hard_reset_schedule_flags: hard reset is scheduled to after current soft reset,
+ *                             here we hold the hard reset flags.
  * @in_reset: is device in reset flow.
  * @is_in_soft_reset: Device is currently in soft reset process.
  * @needs_reset: true if reset_on_lockup is false and device should be reset
@@ -2478,6 +2480,7 @@ struct hl_reset_info {
        spinlock_t      lock;
        u32             soft_reset_cnt;
        u32             hard_reset_cnt;
+       u32             hard_reset_schedule_flags;
        u8              in_reset;
        u8              is_in_soft_reset;
        u8              needs_reset;