From a300de40f66b87fa90703c94ffb22917f98eb902 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 27 Jul 2020 15:20:12 +0800 Subject: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v5) MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit what: the MQD's save and restore of KCQ (kernel compute queue) cost lots of clocks during world switch which impacts a lot to multi-VF performance how: introduce a paramter to control the number of KCQ to avoid performance drop if there is no kernel compute queue needed notes: this paramter only affects gfx 8/9/10 v2: refine namings v3: choose queues for each ring to that try best to cross pipes evenly. v4: fix indentation some cleanupsin the gfx_compute_queue_acquire() v5: further fix on indentations more cleanupsin gfx_compute_queue_acquire() TODO: in the future we will let hypervisor driver to set this paramter automatically thus no need for user to configure it through modprobe in virtual machine Signed-off-by: Monk Liu Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 49 ++++++++++++------------------ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 +++++++++--------- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 29 +++++++++--------- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 31 ++++++++++--------- 7 files changed, 76 insertions(+), 73 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e2eed5c..75631b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -202,6 +202,7 @@ extern int amdgpu_si_support; #ifdef CONFIG_DRM_AMDGPU_CIK extern int amdgpu_cik_support; #endif +extern int amdgpu_num_kcq; #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d01d796..79b0da3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) amdgpu_gmc_tmz_set(adev); + if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) { + amdgpu_num_kcq = 8; + dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid paramter provided by user\n"); + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index dc47549..6d66705 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -150,6 +150,7 @@ int amdgpu_noretry; int amdgpu_force_asic_type = -1; int amdgpu_tmz = 0; int amdgpu_reset_method = -1; /* auto */ +int amdgpu_num_kcq = -1; struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), @@ -776,6 +777,9 @@ module_param_named(reset_method, amdgpu_reset_method, int, 0444); MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = auto(default value), 0 = disable bad page retirement)"); module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444); +MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)"); +module_param_named(num_kcq, amdgpu_num_kcq, int, 0444); + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 8eff017..0cd9de6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -202,40 +202,29 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) { - int i, queue, pipe, mec; + int i, queue, pipe; bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev); - - /* policy for amdgpu compute queue ownership */ - for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { - queue = i % adev->gfx.mec.num_queue_per_pipe; - pipe = (i / adev->gfx.mec.num_queue_per_pipe) - % adev->gfx.mec.num_pipe_per_mec; - mec = (i / adev->gfx.mec.num_queue_per_pipe) - / adev->gfx.mec.num_pipe_per_mec; - - /* we've run out of HW */ - if (mec >= adev->gfx.mec.num_mec) - break; - - if (multipipe_policy) { - /* policy: amdgpu owns the first two queues of the first MEC */ - if (mec == 0 && queue < 2) - set_bit(i, adev->gfx.mec.queue_bitmap); - } else { - /* policy: amdgpu owns all queues in the first pipe */ - if (mec == 0 && pipe == 0) - set_bit(i, adev->gfx.mec.queue_bitmap); + int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe, + adev->gfx.num_compute_rings); + + if (multipipe_policy) { + /* policy: make queues evenly cross all pipes on MEC1 only */ + for (i = 0; i < max_queues_per_mec; i++) { + pipe = i % adev->gfx.mec.num_pipe_per_mec; + queue = (i / adev->gfx.mec.num_pipe_per_mec) % + adev->gfx.mec.num_queue_per_pipe; + + set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue, + adev->gfx.mec.queue_bitmap); } + } else { + /* policy: amdgpu owns all queues in the given pipe */ + for (i = 0; i < max_queues_per_mec; ++i) + set_bit(i, adev->gfx.mec.queue_bitmap); } - /* update the number of active compute rings */ - adev->gfx.num_compute_rings = - bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); - - /* If you hit this case and edited the policy, you probably just - * need to increase AMDGPU_MAX_COMPUTE_RINGS */ - if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)) - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; + dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)); } void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index fe8ccc9..622f442 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev) amdgpu_gfx_compute_queue_acquire(adev); mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE; - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, - &adev->gfx.mec.hpd_eop_obj, - &adev->gfx.mec.hpd_eop_gpu_addr, - (void **)&hpd); - if (r) { - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); - gfx_v10_0_mec_fini(adev); - return r; - } + if (mec_hpd_size) { + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.hpd_eop_obj, + &adev->gfx.mec.hpd_eop_gpu_addr, + (void **)&hpd); + if (r) { + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); + gfx_v10_0_mec_fini(adev); + return r; + } - memset(hpd, 0, mec_hpd_size); + memset(hpd, 0, mec_hpd_size); - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); + } if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; @@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle) break; } - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; + adev->gfx.num_compute_rings = amdgpu_num_kcq; gfx_v10_0_set_kiq_pm4_funcs(adev); gfx_v10_0_set_ring_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 8d72089..7df567a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) amdgpu_gfx_compute_queue_acquire(adev); mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; + if (mec_hpd_size) { + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.mec.hpd_eop_obj, + &adev->gfx.mec.hpd_eop_gpu_addr, + (void **)&hpd); + if (r) { + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); + return r; + } - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.mec.hpd_eop_obj, - &adev->gfx.mec.hpd_eop_gpu_addr, - (void **)&hpd); - if (r) { - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); - return r; - } - - memset(hpd, 0, mec_hpd_size); + memset(hpd, 0, mec_hpd_size); - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); + } return 0; } @@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; + adev->gfx.num_compute_rings = amdgpu_num_kcq; adev->gfx.funcs = &gfx_v8_0_gfx_funcs; gfx_v8_0_set_ring_funcs(adev); gfx_v8_0_set_irq_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index e4e751f..ef07e59 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev) /* take ownership of the relevant compute queues */ amdgpu_gfx_compute_queue_acquire(adev); mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; + if (mec_hpd_size) { + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.mec.hpd_eop_obj, + &adev->gfx.mec.hpd_eop_gpu_addr, + (void **)&hpd); + if (r) { + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); + gfx_v9_0_mec_fini(adev); + return r; + } - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.mec.hpd_eop_obj, - &adev->gfx.mec.hpd_eop_gpu_addr, - (void **)&hpd); - if (r) { - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); - gfx_v9_0_mec_fini(adev); - return r; - } - - memset(hpd, 0, mec_hpd_size); + memset(hpd, 0, mec_hpd_size); - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); + } mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; @@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle) adev->gfx.num_gfx_rings = 0; else adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; + adev->gfx.num_compute_rings = amdgpu_num_kcq; gfx_v9_0_set_kiq_pm4_funcs(adev); gfx_v9_0_set_ring_funcs(adev); gfx_v9_0_set_irq_funcs(adev); -- 2.7.4