drm/amdgpu: add configurable grace period for unmap queues
authorJonathan Kim <jonathan.kim@amd.com>
Thu, 23 Mar 2023 21:17:20 +0000 (17:17 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 9 Jun 2023 16:35:31 +0000 (12:35 -0400)
The HWS schedule allows a grace period for wave completion prior to
preemption for better performance by avoiding CWSR on waves that can
potentially complete quickly. The debugger, on the other hand, will
want to inspect wave status immediately after it actively triggers
preemption (a suspend function to be provided).

To minimize latency between preemption and debugger wave inspection, allow
immediate preemption by setting the grace period to 0.

Note that setting the preepmtion grace period to 0 will result in an
infinite grace period being set due to a CP FW bug so set it to 1 for now.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
14 files changed:
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
drivers/gpu/drm/amd/amdkfd/kfd_priv.h

index a6f98141c29c77ba09949a0a5fd10f3e44e943d3..b811a098505037c0e26c15e32414a797f070312e 100644 (file)
@@ -82,5 +82,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
        .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
        .enable_debug_trap = kgd_aldebaran_enable_debug_trap,
        .disable_debug_trap = kgd_aldebaran_disable_debug_trap,
+       .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+       .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
        .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
 };
index d2918e5c0dea32ee729ebd82a4a61194fbd921dc..a62bd0068515c6bccc97f78644e49378bb76edc9 100644 (file)
@@ -410,6 +410,8 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
                                kgd_gfx_v9_set_vm_context_page_table_base,
        .enable_debug_trap = kgd_arcturus_enable_debug_trap,
        .disable_debug_trap = kgd_arcturus_disable_debug_trap,
+       .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+       .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
        .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
        .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings
 };
index 240f5006e278dcb13972a17cfd0c75b114ae72e7..98006c7021ddfb27b6b63e68e1a4c72213a6eee8 100644 (file)
@@ -803,6 +803,47 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
        return 0;
 }
 
+/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
+ * The values read are:
+ *     ib_offload_wait_time     -- Wait Count for Indirect Buffer Offloads.
+ *     atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
+ *     wrm_offload_wait_time    -- Wait Count for WAIT_REG_MEM Offloads.
+ *     gws_wait_time            -- Wait Count for Global Wave Syncs.
+ *     que_sleep_wait_time      -- Wait Count for Dequeue Retry.
+ *     sch_wave_wait_time       -- Wait Count for Scheduling Wave Message.
+ *     sem_rearm_wait_time      -- Wait Count for Semaphore re-arm.
+ *     deq_retry_wait_time      -- Wait Count for Global Wave Syncs.
+ */
+void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
+                                       uint32_t *wait_times)
+
+{
+       *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
+}
+
+void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
+                                               uint32_t wait_times,
+                                               uint32_t grace_period,
+                                               uint32_t *reg_offset,
+                                               uint32_t *reg_data)
+{
+       *reg_data = wait_times;
+
+       /*
+        * The CP cannont handle a 0 grace period input and will result in
+        * an infinite grace period being set so set to 1 to prevent this.
+        */
+       if (grace_period == 0)
+               grace_period = 1;
+
+       *reg_data = REG_SET_FIELD(*reg_data,
+                       CP_IQ_WAIT_TIME2,
+                       SCH_WAVE,
+                       grace_period);
+
+       *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
+}
+
 static void program_trap_handler_settings(struct amdgpu_device *adev,
                uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
                uint32_t inst)
@@ -848,5 +889,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
        .set_vm_context_page_table_base = set_vm_context_page_table_base,
        .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
        .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
+       .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
+       .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
        .program_trap_handler_settings = program_trap_handler_settings,
 };
index 251d61fbde079848c3bd423ecf5c742396c875df..1e993a21364643919c5100fc92ffd5fdf47d027e 100644 (file)
@@ -26,3 +26,9 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
 uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
                                        bool keep_trap_enabled,
                                        uint32_t vmid);
+void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times);
+void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
+                                              uint32_t wait_times,
+                                              uint32_t grace_period,
+                                              uint32_t *reg_offset,
+                                              uint32_t *reg_data);
index 8b293f3dcbd2bc2ef2d111bc7e4fbd953fc78560..387bdf4823c90ea7646a4f81966a8e26931b8077 100644 (file)
@@ -672,6 +672,8 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
        .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3,
        .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
        .program_trap_handler_settings = program_trap_handler_settings_v10_3,
+       .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
+       .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
        .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
        .disable_debug_trap = kgd_gfx_v10_disable_debug_trap
 };
index 8d7d04704b00f99de2b48ace2f05f86a9386f158..829ee720cc44897eaa6bf8847a5f053f0fd43068 100644 (file)
@@ -739,6 +739,24 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
        return 0;
 }
 
+/* kgd_gfx_v9_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
+ * The values read are:
+ *     ib_offload_wait_time     -- Wait Count for Indirect Buffer Offloads.
+ *     atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
+ *     wrm_offload_wait_time    -- Wait Count for WAIT_REG_MEM Offloads.
+ *     gws_wait_time            -- Wait Count for Global Wave Syncs.
+ *     que_sleep_wait_time      -- Wait Count for Dequeue Retry.
+ *     sch_wave_wait_time       -- Wait Count for Scheduling Wave Message.
+ *     sem_rearm_wait_time      -- Wait Count for Semaphore re-arm.
+ *     deq_retry_wait_time      -- Wait Count for Global Wave Syncs.
+ */
+void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
+                                       uint32_t *wait_times)
+
+{
+       *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
+}
+
 void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
                        uint32_t vmid, uint64_t page_table_base)
 {
@@ -926,6 +944,29 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
                                adev->gfx.cu_info.max_waves_per_simd;
 }
 
+void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
+               uint32_t wait_times,
+               uint32_t grace_period,
+               uint32_t *reg_offset,
+               uint32_t *reg_data)
+{
+       *reg_data = wait_times;
+
+       /*
+        * The CP cannont handle a 0 grace period input and will result in
+        * an infinite grace period being set so set to 1 to prevent this.
+        */
+       if (grace_period == 0)
+               grace_period = 1;
+
+       *reg_data = REG_SET_FIELD(*reg_data,
+                       CP_IQ_WAIT_TIME2,
+                       SCH_WAVE,
+                       grace_period);
+
+       *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
+}
+
 void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
                uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst)
 {
@@ -969,6 +1010,8 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
        .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
        .enable_debug_trap = kgd_gfx_v9_enable_debug_trap,
        .disable_debug_trap = kgd_gfx_v9_disable_debug_trap,
+       .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+       .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
        .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
        .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
 };
index 9588ff055393d58000c1e2562262577eaaafee31..fed5b7f18b1adccbc7b047d05955ee61650e7d2a 100644 (file)
@@ -20,8 +20,6 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-
-
 void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
                uint32_t sh_mem_config,
                uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
@@ -73,3 +71,9 @@ uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
 uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
                                        bool keep_trap_enabled,
                                        uint32_t vmid);
+void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times);
+void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
+                                              uint32_t wait_times,
+                                              uint32_t grace_period,
+                                              uint32_t *reg_offset,
+                                              uint32_t *reg_data);
index 2baa0781eafc2ffc16bfb97ff3628452641d5f52..0b88a64e61fe2f2d37f7279af99a00a608583472 100644 (file)
@@ -46,10 +46,13 @@ static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
 
 static int execute_queues_cpsch(struct device_queue_manager *dqm,
                                enum kfd_unmap_queues_filter filter,
-                               uint32_t filter_param);
+                               uint32_t filter_param,
+                               uint32_t grace_period);
 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
                                enum kfd_unmap_queues_filter filter,
-                               uint32_t filter_param, bool reset);
+                               uint32_t filter_param,
+                               uint32_t grace_period,
+                               bool reset);
 
 static int map_queues_cpsch(struct device_queue_manager *dqm);
 
@@ -866,7 +869,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
        if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
                if (!dqm->dev->kfd->shared_resources.enable_mes)
                        retval = unmap_queues_cpsch(dqm,
-                                                   KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false);
+                                                   KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false);
                else if (prev_active)
                        retval = remove_queue_mes(dqm, q, &pdd->qpd);
 
@@ -1042,7 +1045,8 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
                retval = execute_queues_cpsch(dqm,
                                              qpd->is_debug ?
                                              KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
-                                             KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+                                             KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
+                                             USE_DEFAULT_GRACE_PERIOD);
 
 out:
        dqm_unlock(dqm);
@@ -1182,8 +1186,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
        }
        if (!dqm->dev->kfd->shared_resources.enable_mes)
                retval = execute_queues_cpsch(dqm,
-                                             KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
-
+                                             KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
        eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
        atomic64_add(eviction_duration, &pdd->evict_duration_counter);
 vm_not_acquired:
@@ -1525,6 +1528,9 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
 
        init_sdma_bitmaps(dqm);
 
+       if (dqm->dev->kfd2kgd->get_iq_wait_times)
+               dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
+                                       &dqm->wait_times);
        return 0;
 }
 
@@ -1563,8 +1569,9 @@ static int start_cpsch(struct device_queue_manager *dqm)
        dqm->is_hws_hang = false;
        dqm->is_resetting = false;
        dqm->sched_running = true;
+
        if (!dqm->dev->kfd->shared_resources.enable_mes)
-               execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+               execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
        dqm_unlock(dqm);
 
        return 0;
@@ -1589,7 +1596,7 @@ static int stop_cpsch(struct device_queue_manager *dqm)
 
        if (!dqm->is_hws_hang) {
                if (!dqm->dev->kfd->shared_resources.enable_mes)
-                       unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false);
+                       unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false);
                else
                        remove_all_queues_mes(dqm);
        }
@@ -1631,7 +1638,8 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
        list_add(&kq->list, &qpd->priv_queue_list);
        increment_queue_count(dqm, qpd, kq->queue);
        qpd->is_debug = true;
-       execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+       execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
+                       USE_DEFAULT_GRACE_PERIOD);
        dqm_unlock(dqm);
 
        return 0;
@@ -1645,7 +1653,8 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
        list_del(&kq->list);
        decrement_queue_count(dqm, qpd, kq->queue);
        qpd->is_debug = false;
-       execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
+       execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
+                       USE_DEFAULT_GRACE_PERIOD);
        /*
         * Unconditionally decrement this counter, regardless of the queue's
         * type.
@@ -1722,7 +1731,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 
                if (!dqm->dev->kfd->shared_resources.enable_mes)
                        retval = execute_queues_cpsch(dqm,
-                                       KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+                                       KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
                else
                        retval = add_queue_mes(dqm, q, qpd);
                if (retval)
@@ -1811,7 +1820,9 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
 /* dqm->lock mutex has to be locked before calling this function */
 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
                                enum kfd_unmap_queues_filter filter,
-                               uint32_t filter_param, bool reset)
+                               uint32_t filter_param,
+                               uint32_t grace_period,
+                               bool reset)
 {
        int retval = 0;
        struct mqd_manager *mqd_mgr;
@@ -1823,6 +1834,12 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
        if (!dqm->active_runlist)
                return retval;
 
+       if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
+               retval = pm_update_grace_period(&dqm->packet_mgr, grace_period);
+               if (retval)
+                       return retval;
+       }
+
        retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset);
        if (retval)
                return retval;
@@ -1855,6 +1872,13 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
                return -ETIME;
        }
 
+       /* We need to reset the grace period value for this device */
+       if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
+               if (pm_update_grace_period(&dqm->packet_mgr,
+                                       USE_DEFAULT_GRACE_PERIOD))
+                       pr_err("Failed to reset grace period\n");
+       }
+
        pm_release_ib(&dqm->packet_mgr);
        dqm->active_runlist = false;
 
@@ -1870,7 +1894,7 @@ static int reset_queues_cpsch(struct device_queue_manager *dqm,
        dqm_lock(dqm);
 
        retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID,
-                       pasid, true);
+                       pasid, USE_DEFAULT_GRACE_PERIOD, true);
 
        dqm_unlock(dqm);
        return retval;
@@ -1879,13 +1903,14 @@ static int reset_queues_cpsch(struct device_queue_manager *dqm,
 /* dqm->lock mutex has to be locked before calling this function */
 static int execute_queues_cpsch(struct device_queue_manager *dqm,
                                enum kfd_unmap_queues_filter filter,
-                               uint32_t filter_param)
+                               uint32_t filter_param,
+                               uint32_t grace_period)
 {
        int retval;
 
        if (dqm->is_hws_hang)
                return -EIO;
-       retval = unmap_queues_cpsch(dqm, filter, filter_param, false);
+       retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false);
        if (retval)
                return retval;
 
@@ -1943,7 +1968,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
                if (!dqm->dev->kfd->shared_resources.enable_mes) {
                        decrement_queue_count(dqm, qpd, q);
                        retval = execute_queues_cpsch(dqm,
-                                                     KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+                                                     KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
+                                                     USE_DEFAULT_GRACE_PERIOD);
                        if (retval == -ETIME)
                                qpd->reset_wavefronts = true;
                } else {
@@ -2228,7 +2254,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
        }
 
        if (!dqm->dev->kfd->shared_resources.enable_mes)
-               retval = execute_queues_cpsch(dqm, filter, 0);
+               retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD);
 
        if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
                pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
@@ -2589,7 +2615,8 @@ int dqm_debugfs_hang_hws(struct device_queue_manager *dqm)
                return r;
        }
        dqm->active_runlist = true;
-       r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
+       r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES,
+                               0, USE_DEFAULT_GRACE_PERIOD);
        dqm_unlock(dqm);
 
        return r;
index cd4383bb207f4cec6bd055d10a79e3c6dd90b46a..d4dd3b4acbf0a8af2414de79ade63e73ee1348a4 100644 (file)
@@ -37,6 +37,7 @@
 
 #define KFD_MES_PROCESS_QUANTUM                100000
 #define KFD_MES_GANG_QUANTUM           10000
+#define USE_DEFAULT_GRACE_PERIOD 0xffffffff
 
 struct device_process_node {
        struct qcm_process_device *qpd;
@@ -259,6 +260,8 @@ struct device_queue_manager {
 
        /* used for GFX 9.4.3 only */
        uint32_t                current_logical_xcc_start;
+
+       uint32_t                wait_times;
 };
 
 void device_queue_manager_init_cik(
index 2f54172e9175c12dc2bcb1bf67cf178eccaf477c..401096c103b2f1e9d51d0cea56089eb94195eb5c 100644 (file)
@@ -370,6 +370,38 @@ out:
        return retval;
 }
 
+int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
+{
+       int retval = 0;
+       uint32_t *buffer, size;
+
+       size = pm->pmf->set_grace_period_size;
+
+       mutex_lock(&pm->lock);
+
+       if (size) {
+               kq_acquire_packet_buffer(pm->priv_queue,
+                       size / sizeof(uint32_t),
+                       (unsigned int **)&buffer);
+
+               if (!buffer) {
+                       pr_err("Failed to allocate buffer on kernel queue\n");
+                       retval = -ENOMEM;
+                       goto out;
+               }
+
+               retval = pm->pmf->set_grace_period(pm, buffer, grace_period);
+               if (!retval)
+                       kq_submit_packet(pm->priv_queue);
+               else
+                       kq_rollback_packet(pm->priv_queue);
+       }
+
+out:
+       mutex_unlock(&pm->lock);
+       return retval;
+}
+
 int pm_send_unmap_queue(struct packet_manager *pm,
                        enum kfd_unmap_queues_filter filter,
                        uint32_t filter_param, bool reset)
index 44cf3a5f6fdbae760fa2ce47cba2a3907deeead3..1fda6dcf84b16fc95afff52762c60c24378183f8 100644 (file)
@@ -262,6 +262,41 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
        return 0;
 }
 
+static int pm_set_grace_period_v9(struct packet_manager *pm,
+               uint32_t *buffer,
+               uint32_t grace_period)
+{
+       struct pm4_mec_write_data_mmio *packet;
+       uint32_t reg_offset = 0;
+       uint32_t reg_data = 0;
+
+       pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
+                       pm->dqm->dev->adev,
+                       pm->dqm->wait_times,
+                       grace_period,
+                       &reg_offset,
+                       &reg_data);
+
+       if (grace_period == USE_DEFAULT_GRACE_PERIOD)
+               reg_data = pm->dqm->wait_times;
+
+       packet = (struct pm4_mec_write_data_mmio *)buffer;
+       memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio));
+
+       packet->header.u32All = pm_build_pm4_header(IT_WRITE_DATA,
+                                       sizeof(struct pm4_mec_write_data_mmio));
+
+       packet->bitfields2.dst_sel  = dst_sel___write_data__mem_mapped_register;
+       packet->bitfields2.addr_incr =
+                       addr_incr___write_data__do_not_increment_address;
+
+       packet->bitfields3.dst_mmreg_addr = reg_offset;
+
+       packet->data = reg_data;
+
+       return 0;
+}
+
 static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
                        enum kfd_unmap_queues_filter filter,
                        uint32_t filter_param, bool reset)
@@ -345,6 +380,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
        .set_resources          = pm_set_resources_v9,
        .map_queues             = pm_map_queues_v9,
        .unmap_queues           = pm_unmap_queues_v9,
+       .set_grace_period       = pm_set_grace_period_v9,
        .query_status           = pm_query_status_v9,
        .release_mem            = NULL,
        .map_process_size       = sizeof(struct pm4_mes_map_process),
@@ -352,6 +388,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
+       .set_grace_period_size  = sizeof(struct pm4_mec_write_data_mmio),
        .query_status_size      = sizeof(struct pm4_mes_query_status),
        .release_mem_size       = 0,
 };
@@ -362,6 +399,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
        .set_resources          = pm_set_resources_v9,
        .map_queues             = pm_map_queues_v9,
        .unmap_queues           = pm_unmap_queues_v9,
+       .set_grace_period       = pm_set_grace_period_v9,
        .query_status           = pm_query_status_v9,
        .release_mem            = NULL,
        .map_process_size       = sizeof(struct pm4_mes_map_process_aldebaran),
@@ -369,6 +407,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
+       .set_grace_period_size  = sizeof(struct pm4_mec_write_data_mmio),
        .query_status_size      = sizeof(struct pm4_mes_query_status),
        .release_mem_size       = 0,
 };
index faf4772ed317c6dab8e6a025c1d2c29192a84e97..c1199d06d131b6ef8db8e8034f433dd38454cc4f 100644 (file)
@@ -303,6 +303,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = {
        .set_resources          = pm_set_resources_vi,
        .map_queues             = pm_map_queues_vi,
        .unmap_queues           = pm_unmap_queues_vi,
+       .set_grace_period       = NULL,
        .query_status           = pm_query_status_vi,
        .release_mem            = pm_release_mem_vi,
        .map_process_size       = sizeof(struct pm4_mes_map_process),
@@ -310,6 +311,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = {
        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
+       .set_grace_period_size  = 0,
        .query_status_size      = sizeof(struct pm4_mes_query_status),
        .release_mem_size       = sizeof(struct pm4_mec_release_mem)
 };
index 2ad708c640125b3149db75217db2ac572c8f23e7..206f1960857fbf55cf39718383cf7ca2a9773c71 100644 (file)
@@ -584,6 +584,71 @@ struct pm4_mec_release_mem {
 
 #endif
 
+#ifndef PM4_MEC_WRITE_DATA_DEFINED
+#define PM4_MEC_WRITE_DATA_DEFINED
+
+enum WRITE_DATA_dst_sel_enum {
+       dst_sel___write_data__mem_mapped_register = 0,
+       dst_sel___write_data__tc_l2 = 2,
+       dst_sel___write_data__gds = 3,
+       dst_sel___write_data__memory = 5,
+       dst_sel___write_data__memory_mapped_adc_persistent_state = 6,
+};
+
+enum WRITE_DATA_addr_incr_enum {
+       addr_incr___write_data__increment_address = 0,
+       addr_incr___write_data__do_not_increment_address = 1
+};
+
+enum WRITE_DATA_wr_confirm_enum {
+       wr_confirm___write_data__do_not_wait_for_write_confirmation = 0,
+       wr_confirm___write_data__wait_for_write_confirmation = 1
+};
+
+enum WRITE_DATA_cache_policy_enum {
+       cache_policy___write_data__lru = 0,
+       cache_policy___write_data__stream = 1
+};
+
+
+struct pm4_mec_write_data_mmio {
+       union {
+               union PM4_MES_TYPE_3_HEADER header;     /*header */
+               unsigned int ordinal1;
+       };
+
+       union {
+               struct {
+                       unsigned int reserved1:8;
+                       unsigned int dst_sel:4;
+                       unsigned int reserved2:4;
+                       unsigned int addr_incr:1;
+                       unsigned int reserved3:2;
+                       unsigned int resume_vf:1;
+                       unsigned int wr_confirm:1;
+                       unsigned int reserved4:4;
+                       unsigned int cache_policy:2;
+                       unsigned int reserved5:5;
+               } bitfields2;
+               unsigned int ordinal2;
+       };
+
+       union {
+               struct {
+                       unsigned int dst_mmreg_addr:18;
+                       unsigned int reserved6:14;
+               } bitfields3;
+               unsigned int ordinal3;
+       };
+
+       uint32_t reserved7;
+
+       uint32_t data;
+
+};
+
+#endif
+
 enum {
        CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014
 };
index 1b272f879b4c4ed6e3592b7fd5ab24f001c0bc97..4c912b7735b5b1a174226dd98a419123eaaafe42 100644 (file)
@@ -1350,6 +1350,8 @@ struct packet_manager_funcs {
        int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
                        enum kfd_unmap_queues_filter mode,
                        uint32_t filter_param, bool reset);
+       int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer,
+                       uint32_t grace_period);
        int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
                        uint64_t fence_address, uint64_t fence_value);
        int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
@@ -1360,6 +1362,7 @@ struct packet_manager_funcs {
        int set_resources_size;
        int map_queues_size;
        int unmap_queues_size;
+       int set_grace_period_size;
        int query_status_size;
        int release_mem_size;
 };
@@ -1382,6 +1385,8 @@ int pm_send_unmap_queue(struct packet_manager *pm,
 
 void pm_release_ib(struct packet_manager *pm);
 
+int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period);
+
 /* Following PM funcs can be shared among VI and AI */
 unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);