drm/amdkfd: Add SDMA user-mode queues support to QCM
authorBen Goz <ben.goz@amd.com>
Sat, 3 Jan 2015 20:12:32 +0000 (22:12 +0200)
committerOded Gabbay <oded.gabbay@amd.com>
Fri, 9 Jan 2015 20:26:05 +0000 (22:26 +0200)
This patch adds support for SDMA user-mode queues to the QCM - the Queue
management system that manages queues-per-device and queues-per-process.

v2: Remove calls to interface function that initializes sdma engines.

v3: Use the new names of some of the defines.

Signed-off-by: Ben Goz <ben.goz@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c

index fb94f1a..7ead080 100644 (file)
@@ -46,9 +46,24 @@ static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
                                        struct queue *q,
                                        struct qcm_process_device *qpd);
+
 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
 static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock);
 
+static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
+                                       struct queue *q,
+                                       struct qcm_process_device *qpd);
+
+static void deallocate_sdma_queue(struct device_queue_manager *dqm,
+                               unsigned int sdma_queue_id);
+
+static inline
+enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
+{
+       if (type == KFD_QUEUE_TYPE_SDMA)
+               return KFD_MQD_TYPE_CIK_SDMA;
+       return KFD_MQD_TYPE_CIK_CP;
+}
 
 static inline unsigned int get_pipes_num(struct device_queue_manager *dqm)
 {
@@ -189,7 +204,10 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
        *allocated_vmid = qpd->vmid;
        q->properties.vmid = qpd->vmid;
 
-       retval = create_compute_queue_nocpsch(dqm, q, qpd);
+       if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
+               retval = create_compute_queue_nocpsch(dqm, q, qpd);
+       if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+               retval = create_sdma_queue_nocpsch(dqm, q, qpd);
 
        if (retval != 0) {
                if (list_empty(&qpd->queues_list)) {
@@ -202,7 +220,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
 
        list_add(&q->list, &qpd->queues_list);
        dqm->queue_count++;
-
+       if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+               dqm->sdma_queue_count++;
        mutex_unlock(&dqm->lock);
        return 0;
 }
@@ -279,8 +298,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
                                struct queue *q)
 {
        int retval;
-       struct mqd_manager *mqd;
-
+       struct mqd_manager *mqd, *mqd_sdma;
        BUG_ON(!dqm || !q || !q->mqd || !qpd);
 
        retval = 0;
@@ -294,6 +312,12 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
                goto out;
        }
 
+       mqd_sdma = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_SDMA);
+       if (mqd_sdma == NULL) {
+               mutex_unlock(&dqm->lock);
+               return -ENOMEM;
+       }
+
        retval = mqd->destroy_mqd(mqd, q->mqd,
                                KFD_PREEMPT_TYPE_WAVEFRONT,
                                QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
@@ -302,7 +326,12 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
        if (retval != 0)
                goto out;
 
-       deallocate_hqd(dqm, q);
+       if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
+               deallocate_hqd(dqm, q);
+       else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+               dqm->sdma_queue_count--;
+               deallocate_sdma_queue(dqm, q->sdma_id);
+       }
 
        mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
 
@@ -323,7 +352,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
        BUG_ON(!dqm || !q || !q->mqd);
 
        mutex_lock(&dqm->lock);
-       mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
+       mqd = dqm->get_mqd_manager(dqm, q->properties.type);
        if (mqd == NULL) {
                mutex_unlock(&dqm->lock);
                return -ENOMEM;
@@ -526,7 +555,6 @@ static int init_pipelines(struct device_queue_manager *dqm,
        return 0;
 }
 
-
 static int init_scheduler(struct device_queue_manager *dqm)
 {
        int retval;
@@ -556,6 +584,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
        mutex_init(&dqm->lock);
        INIT_LIST_HEAD(&dqm->queues);
        dqm->queue_count = dqm->next_pipe_to_allocate = 0;
+       dqm->sdma_queue_count = 0;
        dqm->allocated_queues = kcalloc(get_pipes_num(dqm),
                                        sizeof(unsigned int), GFP_KERNEL);
        if (!dqm->allocated_queues) {
@@ -567,6 +596,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
                dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1;
 
        dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
+       dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
 
        init_scheduler(dqm);
        return 0;
@@ -598,6 +628,77 @@ static int stop_nocpsch(struct device_queue_manager *dqm)
        return 0;
 }
 
+static int allocate_sdma_queue(struct device_queue_manager *dqm,
+                               unsigned int *sdma_queue_id)
+{
+       int bit;
+
+       if (dqm->sdma_bitmap == 0)
+               return -ENOMEM;
+
+       bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap,
+                               CIK_SDMA_QUEUES);
+
+       clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap);
+       *sdma_queue_id = bit;
+
+       return 0;
+}
+
+static void deallocate_sdma_queue(struct device_queue_manager *dqm,
+                               unsigned int sdma_queue_id)
+{
+       if (sdma_queue_id < 0 || sdma_queue_id >= CIK_SDMA_QUEUES)
+               return;
+       set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap);
+}
+
+static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
+                               struct qcm_process_device *qpd)
+{
+       uint32_t value = SDMA_ATC;
+
+       if (q->process->is_32bit_user_mode)
+               value |= SDMA_VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd));
+       else
+               value |= SDMA_VA_SHARED_BASE(get_sh_mem_bases_nybble_64(
+                                                       qpd_to_pdd(qpd)));
+       q->properties.sdma_vm_addr = value;
+}
+
+static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
+                                       struct queue *q,
+                                       struct qcm_process_device *qpd)
+{
+       struct mqd_manager *mqd;
+       int retval;
+
+       mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_SDMA);
+       if (!mqd)
+               return -ENOMEM;
+
+       retval = allocate_sdma_queue(dqm, &q->sdma_id);
+       if (retval != 0)
+               return retval;
+
+       q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
+       q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM;
+
+       pr_debug("kfd: sdma id is:    %d\n", q->sdma_id);
+       pr_debug("     sdma queue id: %d\n", q->properties.sdma_queue_id);
+       pr_debug("     sdma engine id: %d\n", q->properties.sdma_engine_id);
+
+       retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
+                               &q->gart_mqd_addr, &q->properties);
+       if (retval != 0) {
+               deallocate_sdma_queue(dqm, q->sdma_id);
+               return retval;
+       }
+
+       init_sdma_vm(dqm, q, qpd);
+       return 0;
+}
+
 /*
  * Device Queue Manager implementation for cp scheduler
  */
@@ -639,6 +740,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
        mutex_init(&dqm->lock);
        INIT_LIST_HEAD(&dqm->queues);
        dqm->queue_count = dqm->processes_count = 0;
+       dqm->sdma_queue_count = 0;
        dqm->active_runlist = false;
        retval = init_pipelines(dqm, get_pipes_num(dqm), 0);
        if (retval != 0)
@@ -682,7 +784,6 @@ static int start_cpsch(struct device_queue_manager *dqm)
 
        dqm->fence_addr = dqm->fence_mem->cpu_ptr;
        dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
-
        list_for_each_entry(node, &dqm->queues, list)
                if (node->qpd->pqm->process && dqm->dev)
                        kfd_bind_process_to_device(dqm->dev,
@@ -753,6 +854,14 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
        mutex_unlock(&dqm->lock);
 }
 
+static void select_sdma_engine_id(struct queue *q)
+{
+       static int sdma_id;
+
+       q->sdma_id = sdma_id;
+       sdma_id = (sdma_id + 1) % 2;
+}
+
 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
                        struct qcm_process_device *qpd, int *allocate_vmid)
 {
@@ -768,7 +877,12 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 
        mutex_lock(&dqm->lock);
 
-       mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
+       if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+               select_sdma_engine_id(q);
+
+       mqd = dqm->get_mqd_manager(dqm,
+                       get_mqd_type_from_queue_type(q->properties.type));
+
        if (mqd == NULL) {
                mutex_unlock(&dqm->lock);
                return -ENOMEM;
@@ -785,6 +899,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
                retval = execute_queues_cpsch(dqm, false);
        }
 
+       if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+                       dqm->sdma_queue_count++;
+
 out:
        mutex_unlock(&dqm->lock);
        return retval;
@@ -808,6 +925,14 @@ static int fence_wait_timeout(unsigned int *fence_addr,
        return 0;
 }
 
+static int destroy_sdma_queues(struct device_queue_manager *dqm,
+                               unsigned int sdma_engine)
+{
+       return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
+                       KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false,
+                       sdma_engine);
+}
+
 static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
 {
        int retval;
@@ -820,6 +945,15 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
                mutex_lock(&dqm->lock);
        if (dqm->active_runlist == false)
                goto out;
+
+       pr_debug("kfd: Before destroying queues, sdma queue count is : %u\n",
+               dqm->sdma_queue_count);
+
+       if (dqm->sdma_queue_count > 0) {
+               destroy_sdma_queues(dqm, 0);
+               destroy_sdma_queues(dqm, 1);
+       }
+
        retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
                        KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0);
        if (retval != 0)
@@ -891,13 +1025,16 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 
        /* remove queue from list to prevent rescheduling after preemption */
        mutex_lock(&dqm->lock);
-
-       mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
+       mqd = dqm->get_mqd_manager(dqm,
+                       get_mqd_type_from_queue_type(q->properties.type));
        if (!mqd) {
                retval = -ENOMEM;
                goto failed;
        }
 
+       if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+               dqm->sdma_queue_count--;
+
        list_del(&q->list);
        dqm->queue_count--;
 
index c3f189e..554c06e 100644 (file)
@@ -36,6 +36,9 @@
 #define KFD_VMID_START_OFFSET                  (8)
 #define VMID_PER_DEVICE                                CIK_VMID_NUM
 #define KFD_DQM_FIRST_PIPE                     (0)
+#define CIK_SDMA_QUEUES                                (4)
+#define CIK_SDMA_QUEUES_PER_ENGINE             (2)
+#define CIK_SDMA_ENGINE_NUM                    (2)
 
 struct device_process_node {
        struct qcm_process_device *qpd;
@@ -130,8 +133,10 @@ struct device_queue_manager {
        struct list_head        queues;
        unsigned int            processes_count;
        unsigned int            queue_count;
+       unsigned int            sdma_queue_count;
        unsigned int            next_pipe_to_allocate;
        unsigned int            *allocated_queues;
+       unsigned int            sdma_bitmap;
        unsigned int            vmid_bitmap;
        uint64_t                pipelines_addr;
        struct kfd_mem_obj      *pipeline_mem;
index d12f9d3..948b1ca 100644 (file)
@@ -128,7 +128,6 @@ static int create_cp_queue(struct process_queue_manager *pqm,
        /* let DQM handle it*/
        q_properties->vmid = 0;
        q_properties->queue_id = qid;
-       q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
 
        retval = init_queue(q, *q_properties);
        if (retval != 0)
@@ -189,6 +188,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
        }
 
        switch (type) {
+       case KFD_QUEUE_TYPE_SDMA:
        case KFD_QUEUE_TYPE_COMPUTE:
                /* check if there is over subscription */
                if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&