habanalabs: use enum for CB allocation options
authorTal Cohen <talcohen@habana.ai>
Wed, 3 Jun 2020 06:25:27 +0000 (09:25 +0300)
committerOded Gabbay <ogabbay@kernel.org>
Mon, 30 Nov 2020 08:47:29 +0000 (10:47 +0200)
In the future there will be situations where queues can accept either
kernel allocated CBs or user allocated CBs, depending on different
states.

Therefore, instead of using a boolean variable of kernel/user allocated
CB, we need to use a bitmask to indicate that, which will allow to
combine the two options.

Add a flag to the uapi so the user will be able to indicate whether
the CB was allocated by kernel or by user. Of course the driver
validates that.

Signed-off-by: Tal Cohen <talcohen@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/misc/habanalabs/common/command_submission.c
drivers/misc/habanalabs/common/habanalabs.h
drivers/misc/habanalabs/gaudi/gaudi.c
drivers/misc/habanalabs/goya/goya.c
include/uapi/misc/habanalabs.h

index 0e37aad..cd3422b 100644 (file)
@@ -568,9 +568,36 @@ static int validate_queue_index(struct hl_device *hdev,
                return -EINVAL;
        }
 
-       *queue_type = hw_queue_prop->type;
-       *is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb;
+       /* When hw queue type isn't QUEUE_TYPE_HW,
+        * USER_ALLOC_CB flag shall be referred as "don't care".
+        */
+       if (hw_queue_prop->type == QUEUE_TYPE_HW) {
+               if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) {
+                       if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) {
+                               dev_err(hdev->dev,
+                                       "Queue index %d doesn't support user CB\n",
+                                       chunk->queue_index);
+                               return -EINVAL;
+                       }
 
+                       *is_kernel_allocated_cb = false;
+               } else {
+                       if (!(hw_queue_prop->cb_alloc_flags &
+                                       CB_ALLOC_KERNEL)) {
+                               dev_err(hdev->dev,
+                                       "Queue index %d doesn't support kernel CB\n",
+                                       chunk->queue_index);
+                               return -EINVAL;
+                       }
+
+                       *is_kernel_allocated_cb = true;
+               }
+       } else {
+               *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags
+                                               & CB_ALLOC_KERNEL);
+       }
+
+       *queue_type = hw_queue_prop->type;
        return 0;
 }
 
index b5a3493..0823798 100644 (file)
@@ -207,6 +207,17 @@ struct hl_outbound_pci_region {
 };
 
 /*
+ * enum queue_cb_alloc_flags - Indicates queue support for CBs that
+ * allocated by Kernel or by User
+ * @CB_ALLOC_KERNEL: support only CBs that allocated by Kernel
+ * @CB_ALLOC_USER: support only CBs that allocated by User
+ */
+enum queue_cb_alloc_flags {
+       CB_ALLOC_KERNEL = 0x1,
+       CB_ALLOC_USER   = 0x2
+};
+
+/*
  * struct hl_hw_sob - H/W SOB info.
  * @hdev: habanalabs device structure.
  * @kref: refcount of this SOB. The SOB will reset once the refcount is zero.
@@ -223,16 +234,18 @@ struct hl_hw_sob {
 /**
  * struct hw_queue_properties - queue information.
  * @type: queue type.
+ * @queue_cb_alloc_flags: bitmap which indicates if the hw queue supports CB
+ *                        that allocated by the Kernel driver and therefore,
+ *                        a CB handle can be provided for jobs on this queue.
+ *                        Otherwise, a CB address must be provided.
  * @driver_only: true if only the driver is allowed to send a job to this queue,
  *               false otherwise.
- * @requires_kernel_cb: true if a CB handle must be provided for jobs on this
- *                      queue, false otherwise (a CB address must be provided).
  * @supports_sync_stream: True if queue supports sync stream
  */
 struct hw_queue_properties {
        enum hl_queue_type      type;
+       enum queue_cb_alloc_flags cb_alloc_flags;
        u8                      driver_only;
-       u8                      requires_kernel_cb;
        u8                      supports_sync_stream;
 };
 
index 2dd9b73..9393e34 100644 (file)
@@ -381,23 +381,28 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
                if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
                        prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
                        prop->hw_queues_props[i].driver_only = 0;
-                       prop->hw_queues_props[i].requires_kernel_cb = 1;
                        prop->hw_queues_props[i].supports_sync_stream = 1;
+                       prop->hw_queues_props[i].cb_alloc_flags =
+                               CB_ALLOC_KERNEL;
                        num_sync_stream_queues++;
                } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
                        prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
                        prop->hw_queues_props[i].driver_only = 1;
-                       prop->hw_queues_props[i].requires_kernel_cb = 0;
                        prop->hw_queues_props[i].supports_sync_stream = 0;
+                       prop->hw_queues_props[i].cb_alloc_flags =
+                               CB_ALLOC_KERNEL;
                } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
                        prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
                        prop->hw_queues_props[i].driver_only = 0;
-                       prop->hw_queues_props[i].requires_kernel_cb = 0;
+                       prop->hw_queues_props[i].supports_sync_stream = 0;
+                       prop->hw_queues_props[i].cb_alloc_flags =
+                               CB_ALLOC_USER;
                } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
                        prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
                        prop->hw_queues_props[i].driver_only = 0;
-                       prop->hw_queues_props[i].requires_kernel_cb = 0;
                        prop->hw_queues_props[i].supports_sync_stream = 0;
+                       prop->hw_queues_props[i].cb_alloc_flags =
+                               CB_ALLOC_USER;
                }
        }
 
index d873f61..74c4427 100644 (file)
@@ -373,20 +373,20 @@ int goya_get_fixed_properties(struct hl_device *hdev)
        for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
                prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
                prop->hw_queues_props[i].driver_only = 0;
-               prop->hw_queues_props[i].requires_kernel_cb = 1;
+               prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL;
        }
 
        for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
                prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
                prop->hw_queues_props[i].driver_only = 1;
-               prop->hw_queues_props[i].requires_kernel_cb = 0;
+               prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL;
        }
 
        for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
                        NUMBER_OF_INT_HW_QUEUES; i++) {
                prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
                prop->hw_queues_props[i].driver_only = 0;
-               prop->hw_queues_props[i].requires_kernel_cb = 0;
+               prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_USER;
        }
 
        prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
index 9705b8a..5753157 100644 (file)
@@ -490,6 +490,22 @@ union hl_cb_args {
        struct hl_cb_out out;
 };
 
+/* HL_CS_CHUNK_FLAGS_ values
+ *
+ * HL_CS_CHUNK_FLAGS_USER_ALLOC_CB:
+ *      Indicates if the CB was allocated and mapped by userspace.
+ *      User allocated CB is a command buffer allocated by the user, via malloc
+ *      (or similar). After allocating the CB, the user invokes “memory ioctl”
+ *      to map the user memory into a device virtual address. The user provides
+ *      this address via the cb_handle field. The interface provides the
+ *      ability to create a large CBs, Which aren’t limited to
+ *      “HL_MAX_CB_SIZE”. Therefore, it increases the PCI-DMA queues
+ *      throughput. This CB allocation method also reduces the use of Linux
+ *      DMA-able memory pool. Which are limited and used by other Linux
+ *      sub-systems.
+ */
+#define HL_CS_CHUNK_FLAGS_USER_ALLOC_CB 0x1
+
 /*
  * This structure size must always be fixed to 64-bytes for backward
  * compatibility