drm/amdkfd: avoid HMM change cause circular lock
authorPhilip Yang <Philip.Yang@amd.com>
Wed, 5 Dec 2018 19:03:43 +0000 (14:03 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 19 Mar 2019 20:03:37 +0000 (15:03 -0500)
There is circular lock between gfx and kfd path with HMM change:
lock(dqm) -> bo::reserve -> amdgpu_mn_lock

To avoid this, move init/unint_mqd() out of lock(dqm), to remove nested
locking between mmap_sem and bo::reserve. The locking order
is: bo::reserve -> amdgpu_mn_lock(p->mn)

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

index c6c9530..1d6b157 100644 (file)
@@ -1162,21 +1162,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
        int retval;
        struct mqd_manager *mqd_mgr;
 
-       retval = 0;
-
-       dqm_lock(dqm);
-
        if (dqm->total_queue_count >= max_num_of_queues_per_device) {
                pr_warn("Can't create new usermode queue because %d queues were already created\n",
                                dqm->total_queue_count);
                retval = -EPERM;
-               goto out_unlock;
+               goto out;
        }
 
        if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
                retval = allocate_sdma_queue(dqm, &q->sdma_id);
                if (retval)
-                       goto out_unlock;
+                       goto out;
                q->properties.sdma_queue_id =
                        q->sdma_id / get_num_sdma_engines(dqm);
                q->properties.sdma_engine_id =
@@ -1187,6 +1183,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
        if (retval)
                goto out_deallocate_sdma_queue;
 
+       /* Do init_mqd before dqm_lock(dqm) to avoid circular locking order:
+        * lock(dqm) -> bo::reserve
+        */
        mqd_mgr = dqm->ops.get_mqd_manager(dqm,
                        get_mqd_type_from_queue_type(q->properties.type));
 
@@ -1194,6 +1193,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
                retval = -ENOMEM;
                goto out_deallocate_doorbell;
        }
+
        /*
         * Eviction state logic: we only mark active queues as evicted
         * to avoid the overhead of restoring inactive queues later
@@ -1202,9 +1202,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
                q->properties.is_evicted = (q->properties.queue_size > 0 &&
                                            q->properties.queue_percent > 0 &&
                                            q->properties.queue_address != 0);
-
        dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
-
        q->properties.tba_addr = qpd->tba_addr;
        q->properties.tma_addr = qpd->tma_addr;
        retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
@@ -1212,6 +1210,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
        if (retval)
                goto out_deallocate_doorbell;
 
+       dqm_lock(dqm);
+
        list_add(&q->list, &qpd->queues_list);
        qpd->queue_count++;
        if (q->properties.is_active) {
@@ -1239,9 +1239,7 @@ out_deallocate_doorbell:
 out_deallocate_sdma_queue:
        if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
                deallocate_sdma_queue(dqm, q->sdma_id);
-out_unlock:
-       dqm_unlock(dqm);
-
+out:
        return retval;
 }
 
@@ -1404,8 +1402,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
                        qpd->reset_wavefronts = true;
        }
 
-       mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
-
        /*
         * Unconditionally decrement this counter, regardless of the queue's
         * type
@@ -1416,6 +1412,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 
        dqm_unlock(dqm);
 
+       /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
+       mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+
        return retval;
 
 failed:
@@ -1637,7 +1636,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
                qpd->reset_wavefronts = false;
        }
 
-       /* lastly, free mqd resources */
+       dqm_unlock(dqm);
+
+       /* Lastly, free mqd resources.
+        * Do uninit_mqd() after dqm_unlock to avoid circular locking.
+        */
        list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
                mqd_mgr = dqm->ops.get_mqd_manager(dqm,
                        get_mqd_type_from_queue_type(q->properties.type));
@@ -1651,7 +1654,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
        }
 
 out:
-       dqm_unlock(dqm);
        return retval;
 }