From 9fd3f1bfae6c6c75f0c8aedb5d499d74cdb52eb9 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 27 Sep 2017 00:09:52 -0400 Subject: [PATCH] drm/amdkfd: Improve process termination handling Separate device queue termination from process queue manager termination. Unmap all queues at once instead of one at a time. Unmap device queues before the PASID is unbound, in the kfd_process_iommu_unbind_callback. When resetting wavefronts in non-HWS mode, do it before the VMID is released. Signed-off-by: Ben Goz Signed-off-by: shaoyun liu Signed-off-by: Amber Lin Signed-off-by: Yong Zhao Signed-off-by: Felix Kuehling Signed-off-by: Oded Gabbay --- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 185 ++++++++++++++++----- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 5 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 18 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 36 ++-- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 37 +++-- 5 files changed, 200 insertions(+), 81 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index f5a5988..dd7e445 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -296,65 +296,73 @@ out_deallocate_hqd: return retval; } -static int destroy_queue_nocpsch(struct device_queue_manager *dqm, +/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked + * to avoid asynchronized access + */ +static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) { int retval; struct mqd_manager *mqd; - retval = 0; - - mutex_lock(&dqm->lock); + mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); + if (!mqd) + return -ENOMEM; if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { - mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); - if (mqd == NULL) { - retval = -ENOMEM; - goto out; - } deallocate_hqd(dqm, q); } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); - if (mqd == NULL) { - retval = -ENOMEM; - goto out; - } dqm->sdma_queue_count--; deallocate_sdma_queue(dqm, q->sdma_id); } else { pr_debug("q->properties.type %d is invalid\n", q->properties.type); - retval = -EINVAL; - goto out; + return -EINVAL; } + dqm->total_queue_count--; retval = mqd->destroy_mqd(mqd, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_RESET, KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); - - if (retval) - goto out; + if (retval == -ETIME) + qpd->reset_wavefronts = true; mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); list_del(&q->list); - if (list_empty(&qpd->queues_list)) + if (list_empty(&qpd->queues_list)) { + if (qpd->reset_wavefronts) { + pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", + dqm->dev); + /* dbgdev_wave_reset_wavefronts has to be called before + * deallocate_vmid(), i.e. when vmid is still in use. + */ + dbgdev_wave_reset_wavefronts(dqm->dev, + qpd->pqm->process); + qpd->reset_wavefronts = false; + } + deallocate_vmid(dqm, qpd, q); + } if (q->properties.is_active) dqm->queue_count--; - /* - * Unconditionally decrement this counter, regardless of the queue's - * type - */ - dqm->total_queue_count--; - pr_debug("Total of %d queues are accountable so far\n", - dqm->total_queue_count); + return retval; +} -out: +static int destroy_queue_nocpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + struct queue *q) +{ + int retval; + + mutex_lock(&dqm->lock); + retval = destroy_queue_nocpsch_locked(dqm, qpd, q); mutex_unlock(&dqm->lock); + return retval; } @@ -921,10 +929,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, uint32_t filter_param) { - int retval; - struct kfd_process_device *pdd; - - retval = 0; + int retval = 0; if (!dqm->active_runlist) return retval; @@ -948,12 +953,9 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, /* should be timed out */ retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); - if (retval) { - pdd = kfd_get_process_device_data(dqm->dev, - kfd_get_process(current)); - pdd->reset_wavefronts = true; + if (retval) return retval; - } + pm_release_ib(&dqm->packets); dqm->active_runlist = false; @@ -1015,7 +1017,10 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, if (q->properties.is_active) dqm->queue_count--; - execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + retval = execute_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + if (retval == -ETIME) + qpd->reset_wavefronts = true; mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); @@ -1105,6 +1110,108 @@ out: return retval; } +static int process_termination_nocpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct queue *q, *next; + struct device_process_node *cur, *next_dpn; + int retval = 0; + + mutex_lock(&dqm->lock); + + /* Clear all user mode queues */ + list_for_each_entry_safe(q, next, &qpd->queues_list, list) { + int ret; + + ret = destroy_queue_nocpsch_locked(dqm, qpd, q); + if (ret) + retval = ret; + } + + /* Unregister process */ + list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { + if (qpd == cur->qpd) { + list_del(&cur->list); + kfree(cur); + dqm->processes_count--; + break; + } + } + + mutex_unlock(&dqm->lock); + return retval; +} + + +static int process_termination_cpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + int retval; + struct queue *q, *next; + struct kernel_queue *kq, *kq_next; + struct mqd_manager *mqd; + struct device_process_node *cur, *next_dpn; + enum kfd_unmap_queues_filter filter = + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; + + retval = 0; + + mutex_lock(&dqm->lock); + + /* Clean all kernel queues */ + list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { + list_del(&kq->list); + dqm->queue_count--; + qpd->is_debug = false; + dqm->total_queue_count--; + filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; + } + + /* Clear all user mode queues */ + list_for_each_entry(q, &qpd->queues_list, list) { + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + dqm->sdma_queue_count--; + + if (q->properties.is_active) + dqm->queue_count--; + + dqm->total_queue_count--; + } + + /* Unregister process */ + list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { + if (qpd == cur->qpd) { + list_del(&cur->list); + kfree(cur); + dqm->processes_count--; + break; + } + } + + retval = execute_queues_cpsch(dqm, filter, 0); + if (retval || qpd->reset_wavefronts) { + pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); + dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); + qpd->reset_wavefronts = false; + } + + /* lastly, free mqd resources */ + list_for_each_entry_safe(q, next, &qpd->queues_list, list) { + mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); + if (!mqd) { + retval = -ENOMEM; + goto out; + } + list_del(&q->list); + mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + } + +out: + mutex_unlock(&dqm->lock); + return retval; +} + struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) { struct device_queue_manager *dqm; @@ -1133,6 +1240,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; dqm->ops.set_cache_memory_policy = set_cache_memory_policy; + dqm->ops.process_termination = process_termination_cpsch; break; case KFD_SCHED_POLICY_NO_HWS: /* initialize dqm for no cp scheduling */ @@ -1147,6 +1255,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.initialize = initialize_nocpsch; dqm->ops.uninitialize = uninitialize; dqm->ops.set_cache_memory_policy = set_cache_memory_policy; + dqm->ops.process_termination = process_termination_nocpsch; break; default: pr_err("Invalid scheduling policy %d\n", sched_policy); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 60d46ce0..31c2b1f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -77,6 +77,8 @@ struct device_process_node { * @set_cache_memory_policy: Sets memory policy (cached/ non cached) for the * memory apertures. * + * @process_termination: Clears all process queues belongs to that device. + * */ struct device_queue_manager_ops { @@ -120,6 +122,9 @@ struct device_queue_manager_ops { enum cache_policy alternate_policy, void __user *alternate_aperture_base, uint64_t alternate_aperture_size); + + int (*process_termination)(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); }; struct device_queue_manager_asic_ops { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 8af0d6f..2e13f5d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -421,6 +421,12 @@ struct qcm_process_device { unsigned int queue_count; unsigned int vmid; bool is_debug; + + /* This flag tells if we should reset all wavefronts on + * process termination + */ + bool reset_wavefronts; + /* * All the memory management data should be here too */ @@ -454,6 +460,8 @@ struct kfd_process_device { /* The device that owns this data. */ struct kfd_dev *dev; + /* The process that owns this kfd_process_device. */ + struct kfd_process *process; /* per-process-per device QCM data structure */ struct qcm_process_device qpd; @@ -469,10 +477,12 @@ struct kfd_process_device { /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ enum kfd_pdd_bound bound; - /* This flag tells if we should reset all - * wavefronts on process termination + /* Flag used to tell the pdd has dequeued from the dqm. + * This is used to prevent dev->dqm->ops.process_termination() from + * being called twice when it is already called in IOMMU callback + * function. */ - bool reset_wavefronts; + bool already_dequeued; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) @@ -659,6 +669,8 @@ struct process_queue_node { struct list_head process_queue_list; }; +void kfd_process_dequeue_from_device(struct kfd_process_device *pdd); +void kfd_process_dequeue_from_all_devices(struct kfd_process *p); int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p); void pqm_uninit(struct process_queue_manager *pqm); int pqm_create_queue(struct process_queue_manager *pqm, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 1325f88..3ccb3b5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -171,9 +171,6 @@ static void kfd_process_wq_release(struct work_struct *work) pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n", pdd->dev->id, p->pasid); - if (pdd->reset_wavefronts) - dbgdev_wave_reset_wavefronts(pdd->dev, p); - if (pdd->bound == PDD_BOUND) amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); @@ -237,24 +234,17 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, mutex_lock(&p->mutex); - /* In case our notifier is called before IOMMU notifier */ + kfd_process_dequeue_from_all_devices(p); pqm_uninit(&p->pqm); /* Iterate over all process device data structure and check - * if we should delete debug managers and reset all wavefronts + * if we should delete debug managers */ - list_for_each_entry(pdd, &p->per_device_data, per_device_list) { + list_for_each_entry(pdd, &p->per_device_data, per_device_list) if ((pdd->dev->dbgmgr) && (pdd->dev->dbgmgr->pasid == p->pasid)) kfd_dbgmgr_destroy(pdd->dev->dbgmgr); - if (pdd->reset_wavefronts) { - pr_warn("Resetting all wave fronts\n"); - dbgdev_wave_reset_wavefronts(pdd->dev, p); - pdd->reset_wavefronts = false; - } - } - mutex_unlock(&p->mutex); /* @@ -368,8 +358,9 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, INIT_LIST_HEAD(&pdd->qpd.queues_list); INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); pdd->qpd.dqm = dev->dqm; - pdd->reset_wavefronts = false; + pdd->process = p; pdd->bound = PDD_UNBOUND; + pdd->already_dequeued = false; list_add(&pdd->per_device_list, &p->per_device_data); } @@ -498,19 +489,12 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid)) kfd_dbgmgr_destroy(dev->dbgmgr); - pqm_uninit(&p->pqm); - pdd = kfd_get_process_device_data(dev, p); - - if (!pdd) { - mutex_unlock(&p->mutex); - return; - } - - if (pdd->reset_wavefronts) { - dbgdev_wave_reset_wavefronts(pdd->dev, p); - pdd->reset_wavefronts = false; - } + if (pdd) + /* For GPU relying on IOMMU, we need to dequeue here + * when PASID is still bound. + */ + kfd_process_dequeue_from_device(pdd); mutex_unlock(&p->mutex); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 68fe0d9..31ec3ca 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -63,6 +63,25 @@ static int find_available_queue_slot(struct process_queue_manager *pqm, return 0; } +void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) +{ + struct kfd_dev *dev = pdd->dev; + + if (pdd->already_dequeued) + return; + + dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd); + pdd->already_dequeued = true; +} + +void kfd_process_dequeue_from_all_devices(struct kfd_process *p) +{ + struct kfd_process_device *pdd; + + list_for_each_entry(pdd, &p->per_device_data, per_device_list) + kfd_process_dequeue_from_device(pdd); +} + int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) { INIT_LIST_HEAD(&pqm->queues); @@ -78,21 +97,14 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) void pqm_uninit(struct process_queue_manager *pqm) { - int retval; struct process_queue_node *pqn, *next; list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { - retval = pqm_destroy_queue( - pqm, - (pqn->q != NULL) ? - pqn->q->properties.queue_id : - pqn->kq->queue->properties.queue_id); - - if (retval != 0) { - pr_err("failed to destroy queue\n"); - return; - } + uninit_queue(pqn->q); + list_del(&pqn->process_queue_list); + kfree(pqn); } + kfree(pqm->queue_slot_bitmap); pqm->queue_slot_bitmap = NULL; } @@ -290,9 +302,6 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (pqn->q) { dqm = pqn->q->device->dqm; retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); - if (retval != 0) - return retval; - uninit_queue(pqn->q); } -- 2.7.4