From 527c670e5323414dbef8f4719dc9b348a50ac1c8 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 14 Sep 2022 12:48:08 +0530 Subject: [PATCH] drm/amdgpu: Add sdma instance specific functions SDMA 4.4.2 supports multiple instances. Add functions to support handling of each SDMA instance separately. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 162 +++++++++++++++++-------------- 1 file changed, 91 insertions(+), 71 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 016813b..6a971e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -94,7 +94,8 @@ static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id) } } -static void sdma_v4_4_2_init_golden_registers(struct amdgpu_device *adev) +static void sdma_v4_4_2_inst_init_golden_registers(struct amdgpu_device *adev, + uint32_t inst_mask) { u32 val; int i; @@ -418,13 +419,14 @@ static void sdma_v4_4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 * * Stop the gfx async dma ring buffers. */ -static void sdma_v4_4_2_gfx_stop(struct amdgpu_device *adev) +static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, + uint32_t inst_mask) { struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 rb_cntl, ib_cntl; int i, unset = 0; - for (i = 0; i < adev->sdma.num_instances; i++) { + for_each_inst(i, inst_mask) { sdma[i] = &adev->sdma.instance[i].ring; if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) { @@ -448,7 +450,8 @@ static void sdma_v4_4_2_gfx_stop(struct amdgpu_device *adev) * * Stop the compute async dma queues. */ -static void sdma_v4_4_2_rlc_stop(struct amdgpu_device *adev) +static void sdma_v4_4_2_inst_rlc_stop(struct amdgpu_device *adev, + uint32_t inst_mask) { /* XXX todo */ } @@ -460,14 +463,15 @@ static void sdma_v4_4_2_rlc_stop(struct amdgpu_device *adev) * * Stop the page async dma ring buffers. */ -static void sdma_v4_4_2_page_stop(struct amdgpu_device *adev) +static void sdma_v4_4_2_inst_page_stop(struct amdgpu_device *adev, + uint32_t inst_mask) { struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 rb_cntl, ib_cntl; int i; bool unset = false; - for (i = 0; i < adev->sdma.num_instances; i++) { + for_each_inst(i, inst_mask) { sdma[i] = &adev->sdma.instance[i].page; if ((adev->mman.buffer_funcs_ring == sdma[i]) && @@ -495,7 +499,8 @@ static void sdma_v4_4_2_page_stop(struct amdgpu_device *adev) * * Halt or unhalt the async dma engines context switch. */ -static void sdma_v4_4_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable) +static void sdma_v4_4_2_inst_ctx_switch_enable(struct amdgpu_device *adev, + bool enable, uint32_t inst_mask) { u32 f32_cntl, phase_quantum = 0; int i; @@ -524,7 +529,7 @@ static void sdma_v4_4_2_ctx_switch_enable(struct amdgpu_device *adev, bool enabl unit << SDMA_PHASE0_QUANTUM__UNIT__SHIFT; } - for (i = 0; i < adev->sdma.num_instances; i++) { + for_each_inst(i, inst_mask) { f32_cntl = RREG32_SDMA(i, regSDMA_CNTL); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA_CNTL, AUTO_CTXSW_ENABLE, enable ? 1 : 0); @@ -538,7 +543,6 @@ static void sdma_v4_4_2_ctx_switch_enable(struct amdgpu_device *adev, bool enabl /* Extend page fault timeout to avoid interrupt storm */ WREG32_SDMA(i, regSDMA_UTCL1_TIMEOUT, 0x00800080); } - } /** @@ -546,22 +550,24 @@ static void sdma_v4_4_2_ctx_switch_enable(struct amdgpu_device *adev, bool enabl * * @adev: amdgpu_device pointer * @enable: enable/disable the DMA MEs. + * @inst_mask: mask of dma engine instances to be enabled * * Halt or unhalt the async dma engines. */ -static void sdma_v4_4_2_enable(struct amdgpu_device *adev, bool enable) +static void sdma_v4_4_2_inst_enable(struct amdgpu_device *adev, bool enable, + uint32_t inst_mask) { u32 f32_cntl; int i; if (!enable) { - sdma_v4_4_2_gfx_stop(adev); - sdma_v4_4_2_rlc_stop(adev); + sdma_v4_4_2_inst_gfx_stop(adev, inst_mask); + sdma_v4_4_2_inst_rlc_stop(adev, inst_mask); if (adev->sdma.has_page_queue) - sdma_v4_4_2_page_stop(adev); + sdma_v4_4_2_inst_page_stop(adev, inst_mask); } - for (i = 0; i < adev->sdma.num_instances; i++) { + for_each_inst(i, inst_mask) { f32_cntl = RREG32_SDMA(i, regSDMA_F32_CNTL); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA_F32_CNTL, HALT, enable ? 0 : 1); WREG32_SDMA(i, regSDMA_F32_CNTL, f32_cntl); @@ -780,7 +786,8 @@ static void sdma_v4_4_2_init_pg(struct amdgpu_device *adev) * Set up the compute DMA queues and enable them. * Returns 0 for success, error for failure. */ -static int sdma_v4_4_2_rlc_resume(struct amdgpu_device *adev) +static int sdma_v4_4_2_inst_rlc_resume(struct amdgpu_device *adev, + uint32_t inst_mask) { sdma_v4_4_2_init_pg(adev); @@ -795,7 +802,8 @@ static int sdma_v4_4_2_rlc_resume(struct amdgpu_device *adev) * Loads the sDMA0/1 ucode. * Returns 0 for success, -EINVAL if the ucode is not available. */ -static int sdma_v4_4_2_load_microcode(struct amdgpu_device *adev) +static int sdma_v4_4_2_inst_load_microcode(struct amdgpu_device *adev, + uint32_t inst_mask) { const struct sdma_firmware_header_v1_0 *hdr; const __le32 *fw_data; @@ -803,9 +811,9 @@ static int sdma_v4_4_2_load_microcode(struct amdgpu_device *adev) int i, j; /* halt the MEs */ - sdma_v4_4_2_enable(adev, false); + sdma_v4_4_2_inst_enable(adev, false, inst_mask); - for (i = 0; i < adev->sdma.num_instances; i++) { + for_each_inst(i, inst_mask) { if (!adev->sdma.instance[i].fw) return -EINVAL; @@ -831,38 +839,41 @@ static int sdma_v4_4_2_load_microcode(struct amdgpu_device *adev) } /** - * sdma_v4_4_2_start - setup and start the async dma engines + * sdma_v4_4_2_inst_start - setup and start the async dma engines * * @adev: amdgpu_device pointer * * Set up the DMA engines and enable them. * Returns 0 for success, error for failure. */ -static int sdma_v4_4_2_start(struct amdgpu_device *adev) +static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, + uint32_t inst_mask) { struct amdgpu_ring *ring; + uint32_t tmp_mask; int i, r = 0; if (amdgpu_sriov_vf(adev)) { - sdma_v4_4_2_ctx_switch_enable(adev, false); - sdma_v4_4_2_enable(adev, false); + sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask); + sdma_v4_4_2_inst_enable(adev, false, inst_mask); } else { /* bypass sdma microcode loading on Gopher */ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP && - !(adev->pdev->device == 0x49) && !(adev->pdev->device == 0x50)) { - r = sdma_v4_4_2_load_microcode(adev); + adev->sdma.instance[0].fw) { + r = sdma_v4_4_2_inst_load_microcode(adev, inst_mask); if (r) return r; } /* unhalt the MEs */ - sdma_v4_4_2_enable(adev, true); + sdma_v4_4_2_inst_enable(adev, true, inst_mask); /* enable sdma ring preemption */ - sdma_v4_4_2_ctx_switch_enable(adev, true); + sdma_v4_4_2_inst_ctx_switch_enable(adev, true, inst_mask); } /* start the gfx rings and rlc compute queues */ - for (i = 0; i < adev->sdma.num_instances; i++) { + tmp_mask = inst_mask; + for_each_inst(i, tmp_mask) { uint32_t temp; WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); @@ -889,15 +900,16 @@ static int sdma_v4_4_2_start(struct amdgpu_device *adev) } if (amdgpu_sriov_vf(adev)) { - sdma_v4_4_2_ctx_switch_enable(adev, true); - sdma_v4_4_2_enable(adev, true); + sdma_v4_4_2_inst_ctx_switch_enable(adev, true, inst_mask); + sdma_v4_4_2_inst_enable(adev, true, inst_mask); } else { - r = sdma_v4_4_2_rlc_resume(adev); + r = sdma_v4_4_2_inst_rlc_resume(adev, inst_mask); if (r) return r; } - for (i = 0; i < adev->sdma.num_instances; i++) { + tmp_mask = inst_mask; + for_each_inst(i, tmp_mask) { ring = &adev->sdma.instance[i].ring; r = amdgpu_ring_test_helper(ring); @@ -1383,14 +1395,17 @@ static int sdma_v4_4_2_hw_init(void *handle) { int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t inst_mask; + /* TODO: Check if this is needed */ if (adev->flags & AMD_IS_APU) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false); + inst_mask = GENMASK(adev->sdma.num_instances - 1, 0); if (!amdgpu_sriov_vf(adev)) - sdma_v4_4_2_init_golden_registers(adev); + sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask); - r = sdma_v4_4_2_start(adev); + r = sdma_v4_4_2_inst_start(adev, inst_mask); return r; } @@ -1398,22 +1413,27 @@ static int sdma_v4_4_2_hw_init(void *handle) static int sdma_v4_4_2_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t inst_mask; int i; if (amdgpu_sriov_vf(adev)) return 0; + inst_mask = GENMASK(adev->sdma.num_instances - 1, 0); for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0 + i); } - sdma_v4_4_2_ctx_switch_enable(adev, false); - sdma_v4_4_2_enable(adev, false); + sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask); + sdma_v4_4_2_inst_enable(adev, false, inst_mask); return 0; } +static int sdma_v4_4_2_set_clockgating_state(void *handle, + enum amd_clockgating_state state); + static int sdma_v4_4_2_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1650,15 +1670,39 @@ static int sdma_v4_4_2_process_srbm_write_irq(struct amdgpu_device *adev, return 0; } -static void sdma_v4_4_2_update_medium_grain_clock_gating( - struct amdgpu_device *adev, - bool enable) +static void sdma_v4_4_2_inst_update_medium_grain_light_sleep( + struct amdgpu_device *adev, bool enable, uint32_t inst_mask) +{ + uint32_t data, def; + int i; + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { + for_each_inst(i, inst_mask) { + /* 1-not override: enable sdma mem light sleep */ + def = data = RREG32_SDMA(i, regSDMA_POWER_CNTL); + data |= SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + if (def != data) + WREG32_SDMA(i, regSDMA_POWER_CNTL, data); + } + } else { + for_each_inst(i, inst_mask) { + /* 0-override:disable sdma mem light sleep */ + def = data = RREG32_SDMA(i, regSDMA_POWER_CNTL); + data &= ~SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + if (def != data) + WREG32_SDMA(i, regSDMA_POWER_CNTL, data); + } + } +} + +static void sdma_v4_4_2_inst_update_medium_grain_clock_gating( + struct amdgpu_device *adev, bool enable, uint32_t inst_mask) { uint32_t data, def; int i; if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { - for (i = 0; i < adev->sdma.num_instances; i++) { + for_each_inst(i, inst_mask) { def = data = RREG32_SDMA(i, regSDMA_CLK_CTRL); data &= ~(SDMA_CLK_CTRL__SOFT_OVERRIDE7_MASK | SDMA_CLK_CTRL__SOFT_OVERRIDE6_MASK | @@ -1672,7 +1716,7 @@ static void sdma_v4_4_2_update_medium_grain_clock_gating( WREG32_SDMA(i, regSDMA_CLK_CTRL, data); } } else { - for (i = 0; i < adev->sdma.num_instances; i++) { + for_each_inst(i, inst_mask) { def = data = RREG32_SDMA(i, regSDMA_CLK_CTRL); data |= (SDMA_CLK_CTRL__SOFT_OVERRIDE7_MASK | SDMA_CLK_CTRL__SOFT_OVERRIDE6_MASK | @@ -1688,45 +1732,21 @@ static void sdma_v4_4_2_update_medium_grain_clock_gating( } } - -static void sdma_v4_4_2_update_medium_grain_light_sleep( - struct amdgpu_device *adev, - bool enable) -{ - uint32_t data, def; - int i; - - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { - for (i = 0; i < adev->sdma.num_instances; i++) { - /* 1-not override: enable sdma mem light sleep */ - def = data = RREG32_SDMA(0, regSDMA_POWER_CNTL); - data |= SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; - if (def != data) - WREG32_SDMA(0, regSDMA_POWER_CNTL, data); - } - } else { - for (i = 0; i < adev->sdma.num_instances; i++) { - /* 0-override:disable sdma mem light sleep */ - def = data = RREG32_SDMA(0, regSDMA_POWER_CNTL); - data &= ~SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; - if (def != data) - WREG32_SDMA(0, regSDMA_POWER_CNTL, data); - } - } -} - static int sdma_v4_4_2_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t inst_mask; if (amdgpu_sriov_vf(adev)) return 0; - sdma_v4_4_2_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE); - sdma_v4_4_2_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE); + inst_mask = GENMASK(adev->sdma.num_instances - 1, 0); + + sdma_v4_4_2_inst_update_medium_grain_clock_gating( + adev, state == AMD_CG_STATE_GATE, inst_mask); + sdma_v4_4_2_inst_update_medium_grain_light_sleep( + adev, state == AMD_CG_STATE_GATE, inst_mask); return 0; } -- 2.7.4