From e09498b078f85bbad8eccb665786bea847af93d4 Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Sat, 9 May 2020 12:18:01 +0300 Subject: [PATCH] habanalabs: add dedicated define for hard reset Gaudi requires longer waiting during reset due to closing of network ports. Add this explanation to the relevant comment in the code and add a dedicated define for this reset timeout period, instead of multiplying another define. Signed-off-by: Omer Shpigelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/device.c | 5 +++-- drivers/misc/habanalabs/habanalabs.h | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index c89157d..f618cff 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -1326,11 +1326,12 @@ void hl_device_fini(struct hl_device *hdev) * This function is competing with the reset function, so try to * take the reset atomic and if we are already in middle of reset, * wait until reset function is finished. Reset function is designed - * to always finish (could take up to a few seconds in worst case). + * to always finish. However, in Gaudi, because of all the network + * ports, the hard reset could take between 10-30 seconds */ timeout = ktime_add_us(ktime_get(), - HL_PENDING_RESET_PER_SEC * 1000 * 1000 * 4); + HL_HARD_RESET_MAX_TIMEOUT * 1000 * 1000); rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); while (rc) { usleep_range(50, 200); diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index cfb306d..d774108 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -25,6 +25,8 @@ #define HL_PENDING_RESET_PER_SEC 30 +#define HL_HARD_RESET_MAX_TIMEOUT 120 + #define HL_DEVICE_TIMEOUT_USEC 1000000 /* 1 s */ #define HL_HEARTBEAT_PER_USEC 5000000 /* 5 s */ -- 2.7.4