habanalabs: add new return code to device fd open
authorOfir Bitton <obitton@habana.ai>
Mon, 28 Feb 2022 14:08:20 +0000 (16:08 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 22 May 2022 18:57:34 +0000 (20:57 +0200)
In order to be more informative during device open, we are adding a
new return code -EAGAIN that indicates device is still going through
resource reclaiming and hence it cannot be used yet.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/misc/habanalabs/common/device.c
drivers/misc/habanalabs/common/habanalabs.h
drivers/misc/habanalabs/common/habanalabs_drv.c

index dc9341a..3eb392b 100644 (file)
@@ -107,6 +107,8 @@ static void hpriv_release(struct kref *ref)
        hdev->is_compute_ctx_active = false;
        mutex_unlock(&hdev->fpriv_list_lock);
 
+       hdev->compute_ctx_in_release = 0;
+
        kfree(hpriv);
 }
 
@@ -150,6 +152,8 @@ static int hl_device_release(struct inode *inode, struct file *filp)
        hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr);
        hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
 
+       hdev->compute_ctx_in_release = 1;
+
        if (!hl_hpriv_put(hpriv))
                dev_notice(hdev->dev,
                        "User process closed FD but device still in use\n");
index 5647977..0079f43 100644 (file)
@@ -2710,6 +2710,7 @@ struct hl_reset_info {
  *                        cases where Linux was not loaded to device CPU
  * @supports_wait_for_multi_cs: true if wait for multi CS is supported
  * @is_compute_ctx_active: Whether there is an active compute context executing.
+ * @compute_ctx_in_release: true if the current compute context is being released.
  */
 struct hl_device {
        struct pci_dev                  *pdev;
@@ -2828,6 +2829,7 @@ struct hl_device {
        u8                              supports_wait_for_multi_cs;
        u8                              stream_master_qid_arr_size;
        u8                              is_compute_ctx_active;
+       u8                              compute_ctx_in_release;
 
        /* Parameters for bring-up */
        u64                             nic_ports_mask;
index ca404ed..e870c32 100644 (file)
@@ -150,7 +150,20 @@ int hl_device_open(struct inode *inode, struct file *filp)
                dev_err_ratelimited(hdev->dev,
                        "Can't open %s because it is %s\n",
                        dev_name(hdev->dev), hdev->status[status]);
-               rc = -EPERM;
+
+               if (status == HL_DEVICE_STATUS_IN_RESET)
+                       rc = -EAGAIN;
+               else
+                       rc = -EPERM;
+
+               goto out_err;
+       }
+
+       if (hdev->compute_ctx_in_release) {
+               dev_dbg_ratelimited(hdev->dev,
+                       "Can't open %s because another user is still releasing it\n",
+                       dev_name(hdev->dev));
+               rc = -EAGAIN;
                goto out_err;
        }