From 233bb3733bd43966696f4a5e95129476e86bf4e3 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 19 Jan 2023 14:47:22 +0530 Subject: [PATCH] drm/amdgpu: Use unique doorbell range per xcc Program different ranges in each XCC with MEC_DOORBELL_RANGE_LOWER/HIGHER. Keeping the same range causes CPF in other XCCs also to be busy when an IB packet is submitted to KCQ. Only the XCC which processes the packet comes back to idle afterwards and this causes other CPs not be idle. This in turn affects clockgating behavior as RLC doesn't get idle interrupt. LOWER/HIGHER covers only KIQ/KCQs which are per XCC queues. Assigning different ranges doesn't seem to have any side effect as user queue ranges are outside of this range. User queue tests - PM4 through KFD and AQL through rocr - have the same results after this change. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 35 +++++++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 5 ++- .../drm/amd/amdgpu/aqua_vanjaram_reg_init.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 31 +++++++++++----- 4 files changed, 47 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 3c3ae2b4dbc8..f637574644c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -86,6 +86,8 @@ struct amdgpu_doorbell_index { uint32_t max_assignment; /* Per engine SDMA doorbell size in dword */ uint32_t sdma_doorbell_range; + /* Per xcc doorbell size for KIQ/KCQ */ + uint32_t xcc_doorbell_range; }; typedef enum _AMDGPU_DOORBELL_ASSIGNMENT @@ -309,28 +311,31 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT AMDGPU_DOORBELL64_INVALID = 0xFFFF } AMDGPU_DOORBELL64_ASSIGNMENT; -typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 -{ - /* KIQ: 0~7 for maximum 8 XCD */ - AMDGPU_DOORBELL_LAYOUT1_KIQ_START = 0x000, - AMDGPU_DOORBELL_LAYOUT1_HIQ = 0x008, - AMDGPU_DOORBELL_LAYOUT1_DIQ = 0x009, - /* Compute: 0x0A ~ 0x49 */ - AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START = 0x00A, - AMDGPU_DOORBELL_LAYOUT1_MEC_RING_END = 0x049, - AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START = 0x04A, - AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END = 0x0C9, +typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 { + /* XCC0: 0x00 ~20, XCC1: 20 ~ 2F ... */ + + /* KIQ/HIQ/DIQ */ + AMDGPU_DOORBELL_LAYOUT1_KIQ_START = 0x000, + AMDGPU_DOORBELL_LAYOUT1_HIQ = 0x001, + AMDGPU_DOORBELL_LAYOUT1_DIQ = 0x002, + /* Compute: 0x08 ~ 0x20 */ + AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START = 0x008, + AMDGPU_DOORBELL_LAYOUT1_MEC_RING_END = 0x00F, + AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START = 0x010, + AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END = 0x01F, + AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE = 0x020, + /* SDMA: 0x100 ~ 0x19F */ - AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START = 0x100, - AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F, + AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START = 0x100, + AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F, /* IH: 0x1A0 ~ 0x1AF */ AMDGPU_DOORBELL_LAYOUT1_IH = 0x1A0, /* VCN: 0x1B0 ~ 0x1D4 */ AMDGPU_DOORBELL_LAYOUT1_VCN_START = 0x1B0, AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1D4, - AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START, - AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_VCN_END, + AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START, + AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_VCN_END, AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1D4, AMDGPU_DOORBELL_LAYOUT1_INVALID = 0xFFFF diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index f895a4b8ca0d..70c6099353b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -316,7 +316,10 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, ring->use_doorbell = true; ring->xcc_id = xcc_id; ring->vm_hub = AMDGPU_GFXHUB(xcc_id); - ring->doorbell_index = (adev->doorbell_index.kiq + xcc_id) << 1; + ring->doorbell_index = + (adev->doorbell_index.kiq + + xcc_id * adev->doorbell_index.xcc_doorbell_range) + << 1; r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c index 6591d39c6518..55a6ebb940ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c @@ -44,6 +44,7 @@ void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.userqueue_start = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START; adev->doorbell_index.userqueue_end = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END; + adev->doorbell_index.xcc_doorbell_range = AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE; adev->doorbell_index.sdma_doorbell_range = 20; for (i = 0; i < adev->sdma.num_instances; i++) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 55d99c4ea48c..557a2458ef5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -729,8 +729,10 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, unsigned irq_type; struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; unsigned int hw_prio; + uint32_t xcc_doorbell_start; - ring = &adev->gfx.compute_ring[ring_id]; + ring = &adev->gfx.compute_ring[xcc_id * adev->gfx.num_compute_rings + + ring_id]; /* mec0 is me1 */ ring->xcc_id = xcc_id; @@ -740,9 +742,12 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; - ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr - + (ring_id * GFX9_MEC_HPD_SIZE); + xcc_doorbell_start = adev->doorbell_index.mec_ring0 + + xcc_id * adev->doorbell_index.xcc_doorbell_range; + ring->doorbell_index = (xcc_doorbell_start + ring_id) << 1; + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + + (ring_id + xcc_id * adev->gfx.num_compute_rings) * + GFX9_MEC_HPD_SIZE; ring->vm_hub = AMDGPU_GFXHUB(xcc_id); sprintf(ring->name, "comp_%d.%d.%d.%d", ring->xcc_id, ring->me, ring->pipe, ring->queue); @@ -801,8 +806,8 @@ static int gfx_v9_4_3_sw_init(void *handle) } /* set up the compute queues - allocate horizontally across pipes */ - ring_id = 0; for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + ring_id = 0; for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; @@ -1654,10 +1659,18 @@ static int gfx_v9_4_3_xcc_kiq_init_register(struct amdgpu_ring *ring, /* enable the doorbell if requested */ if (ring->use_doorbell) { - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DOORBELL_RANGE_LOWER, - (adev->doorbell_index.kiq * 2) << 2); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DOORBELL_RANGE_UPPER, - (adev->doorbell_index.userqueue_end * 2) << 2); + WREG32_SOC15( + GC, GET_INST(GC, xcc_id), + regCP_MEC_DOORBELL_RANGE_LOWER, + ((adev->doorbell_index.kiq + + xcc_id * adev->doorbell_index.xcc_doorbell_range) * + 2) << 2); + WREG32_SOC15( + GC, GET_INST(GC, xcc_id), + regCP_MEC_DOORBELL_RANGE_UPPER, + ((adev->doorbell_index.userqueue_end + + xcc_id * adev->doorbell_index.xcc_doorbell_range) * + 2) << 2); } WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, -- 2.34.1