habanalabs: add status of reset after device release
authorOded Gabbay <ogabbay@kernel.org>
Thu, 7 Jul 2022 08:42:15 +0000 (11:42 +0300)
committerOded Gabbay <ogabbay@kernel.org>
Tue, 12 Jul 2022 06:09:31 +0000 (09:09 +0300)
The user might want to know the device is in reset after device
release, which is not an erroneous event as a regular reset.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/misc/habanalabs/common/device.c
drivers/misc/habanalabs/common/habanalabs_drv.c
include/uapi/misc/habanalabs.h

index 5bc291c..19c0490 100644 (file)
@@ -271,16 +271,20 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
 {
        enum hl_device_status status;
 
-       if (hdev->reset_info.in_reset)
-               status = HL_DEVICE_STATUS_IN_RESET;
-       else if (hdev->reset_info.needs_reset)
+       if (hdev->reset_info.in_reset) {
+               if (hdev->reset_info.is_in_soft_reset)
+                       status = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE;
+               else
+                       status = HL_DEVICE_STATUS_IN_RESET;
+       } else if (hdev->reset_info.needs_reset) {
                status = HL_DEVICE_STATUS_NEEDS_RESET;
-       else if (hdev->disabled)
+       } else if (hdev->disabled) {
                status = HL_DEVICE_STATUS_MALFUNCTION;
-       else if (!hdev->init_done)
+       } else if (!hdev->init_done) {
                status = HL_DEVICE_STATUS_IN_DEVICE_CREATION;
-       else
+       } else {
                status = HL_DEVICE_STATUS_OPERATIONAL;
+       }
 
        return status;
 }
@@ -296,6 +300,7 @@ bool hl_device_operational(struct hl_device *hdev,
 
        switch (current_status) {
        case HL_DEVICE_STATUS_IN_RESET:
+       case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE:
        case HL_DEVICE_STATUS_MALFUNCTION:
        case HL_DEVICE_STATUS_NEEDS_RESET:
                return false;
index d900bae..f733ead 100644 (file)
@@ -165,7 +165,8 @@ int hl_device_open(struct inode *inode, struct file *filp)
                        "Can't open %s because it is %s\n",
                        dev_name(hdev->dev), hdev->status[status]);
 
-               if (status == HL_DEVICE_STATUS_IN_RESET)
+               if (status == HL_DEVICE_STATUS_IN_RESET ||
+                                       status == HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE)
                        rc = -EAGAIN;
                else
                        rc = -EPERM;
@@ -395,6 +396,9 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
        strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
        strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
                                        "in device creation", HL_STR_MAX);
+       strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE],
+                                       "in reset after device release", HL_STR_MAX);
+
 
        /* First, we must find out which ASIC are we handling. This is needed
         * to configure the behavior of the driver (kernel parameters)
index 8c6ab71..5d06d5c 100644 (file)
@@ -684,6 +684,8 @@ enum hl_goya_dma_direction {
  * @HL_DEVICE_STATUS_NEEDS_RESET: Device needs reset because auto reset was disabled.
  * @HL_DEVICE_STATUS_IN_DEVICE_CREATION: Device is operational but its creation is still in
  *                                       progress.
+ * @HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE: Device is currently during reset that was
+ *                                                  triggered because the user released the device
  * @HL_DEVICE_STATUS_LAST: Last status.
  */
 enum hl_device_status {
@@ -692,7 +694,8 @@ enum hl_device_status {
        HL_DEVICE_STATUS_MALFUNCTION,
        HL_DEVICE_STATUS_NEEDS_RESET,
        HL_DEVICE_STATUS_IN_DEVICE_CREATION,
-       HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_DEVICE_CREATION
+       HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE,
+       HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE
 };
 
 enum hl_server_type {