From: Ruijing Dong Date: Tue, 31 May 2022 18:20:42 +0000 (-0400) Subject: drm/amdgpu/vcn: support unified queue only in vcn4 X-Git-Tag: v6.6.17~6802^2~17^2~109 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=bb4f196b47b6554ba89f02ec60246f0c643a4bf8;p=platform%2Fkernel%2Flinux-rpi.git drm/amdgpu/vcn: support unified queue only in vcn4 - remove multiple queue support - add unified queue related functions Acked-by: Leo Liu Signed-off-by: Ruijing Dong Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 9119e96..d6f134e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -29,7 +29,6 @@ #include "soc15d.h" #include "soc15_hw_ip.h" #include "vcn_v2_0.h" -#include "vcn_sw_ring.h" #include "vcn/vcn_4_0_0_offset.h" #include "vcn/vcn_4_0_0_sh_mask.h" @@ -45,15 +44,12 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 -bool unifiedQ_enabled = false; - static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 }; -static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev); -static void vcn_v4_0_set_enc_ring_funcs(struct amdgpu_device *adev); +static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state); @@ -71,36 +67,15 @@ static int vcn_v4_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (unifiedQ_enabled) { - adev->vcn.num_vcn_inst = 1; - adev->vcn.num_enc_rings = 1; - } else { - adev->vcn.num_enc_rings = 2; - } - - if (!unifiedQ_enabled) - vcn_v4_0_set_dec_ring_funcs(adev); + /* re-use enc ring as unified ring */ + adev->vcn.num_enc_rings = 1; - vcn_v4_0_set_enc_ring_funcs(adev); + vcn_v4_0_set_unified_ring_funcs(adev); vcn_v4_0_set_irq_funcs(adev); return 0; } -static void amdgpu_vcn_setup_unified_queue_ucode(struct amdgpu_device *adev) -{ - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - const struct common_firmware_header *hdr; - - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; - adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; - adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); - DRM_INFO("PSP loading VCN firmware\n"); - } -} - /** * vcn_v4_0_sw_init - sw init for VCN block * @@ -111,17 +86,14 @@ static void amdgpu_vcn_setup_unified_queue_ucode(struct amdgpu_device *adev) static int vcn_v4_0_sw_init(void *handle) { struct amdgpu_ring *ring; - int i, j, r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, r; r = amdgpu_vcn_sw_init(adev); if (r) return r; - if (unifiedQ_enabled) - amdgpu_vcn_setup_unified_queue_ucode(adev); - else - amdgpu_vcn_setup_ucode(adev); + amdgpu_vcn_setup_ucode(adev); r = amdgpu_vcn_resume(adev); if (r) @@ -129,81 +101,40 @@ static int vcn_v4_0_sw_init(void *handle) for (i = 0; i < adev->vcn.num_vcn_inst; i++) { volatile struct amdgpu_vcn4_fw_shared *fw_shared; + if (adev->vcn.harvest_config & (1 << i)) continue; - /* VCN DEC TRAP */ + + atomic_set(&adev->vcn.inst[i].sched_score, 0); + + /* VCN UNIFIED TRAP */ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], - VCN_4_0__SRCID__UVD_TRAP, &adev->vcn.inst[i].irq); + VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); if (r) return r; - atomic_set(&adev->vcn.inst[i].sched_score, 0); - if (!unifiedQ_enabled) { - ring = &adev->vcn.inst[i].ring_dec; - ring->use_doorbell = true; - - /* VCN4 doorbell layout - * 1: VCN_JPEG_DB_CTRL UVD_JRBC_RB_WPTR; (jpeg) - * 2: VCN_RB1_DB_CTRL UVD_RB_WPTR; (decode/encode for unified queue) - * 3: VCN_RB2_DB_CTRL UVD_RB_WPTR2; (encode only for swqueue) - * 4: VCN_RB3_DB_CTRL UVD_RB_WPTR3; (Reserved) - * 5: VCN_RB4_DB_CTRL UVD_RB_WPTR4; (decode only for swqueue) - */ - - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) - + 5 + 8 * i; - - sprintf(ring->name, "vcn_dec_%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, - AMDGPU_RING_PRIO_DEFAULT, - &adev->vcn.inst[i].sched_score); - if (r) - return r; - } - for (j = 0; j < adev->vcn.num_enc_rings; ++j) { - /* VCN ENC TRAP */ - r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], - j + VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); - if (r) - return r; - - ring = &adev->vcn.inst[i].ring_enc[j]; - ring->use_doorbell = true; - - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i; - - if (unifiedQ_enabled) { - sprintf(ring->name, "vcn_unified%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, - AMDGPU_RING_PRIO_DEFAULT, NULL); - } else { - enum amdgpu_ring_priority_level hw_prio; + ring = &adev->vcn.inst[i].ring_enc[0]; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; - hw_prio = amdgpu_vcn_get_enc_ring_prio(j); - sprintf(ring->name, "vcn_enc_%d.%d", i, j); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, - hw_prio, &adev->vcn.inst[i].sched_score); - } - if (r) - return r; - } + sprintf(ring->name, "vcn_unified_%d", i); - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - fw_shared->present_flag_0 = 0; + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, + AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score); + if (r) + return r; - if (unifiedQ_enabled) { - fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); - fw_shared->sq.is_enabled = 1; - } + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); + fw_shared->sq.is_enabled = 1; if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); } - if (!unifiedQ_enabled) { - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) - adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode; - } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode; + return 0; } @@ -220,19 +151,19 @@ static int vcn_v4_0_sw_fini(void *handle) int i, r, idx; if (drm_dev_enter(&adev->ddev, &idx)) { - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_vcn4_fw_shared *fw_shared; - if (adev->vcn.harvest_config & (1 << i)) - continue; + if (adev->vcn.harvest_config & (1 << i)) + continue; - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - fw_shared->present_flag_0 = 0; - fw_shared->sq.is_enabled = 0; - } + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = 0; + fw_shared->sq.is_enabled = 0; + } - drm_dev_exit(idx); - } + drm_dev_exit(idx); + } r = amdgpu_vcn_suspend(adev); if (r) @@ -254,15 +185,13 @@ static int vcn_v4_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; - int i, j, r; + int i, r; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; - if (unifiedQ_enabled) - ring = &adev->vcn.inst[i].ring_enc[0]; - else - ring = &adev->vcn.inst[i].ring_dec; + + ring = &adev->vcn.inst[i].ring_enc[0]; adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i); @@ -270,13 +199,6 @@ static int vcn_v4_0_hw_init(void *handle) r = amdgpu_ring_test_helper(ring); if (r) goto done; - - for (j = 0; j < adev->vcn.num_enc_rings; ++j) { - ring = &adev->vcn.inst[i].ring_enc[j]; - r = amdgpu_ring_test_helper(ring); - if (r) - goto done; - } } done: @@ -464,7 +386,6 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); - } if (!indirect) @@ -888,7 +809,6 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t tmp; - int i; /* disable register anti-hang mechanism */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1, @@ -974,74 +894,32 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr)); - if (unifiedQ_enabled) { - ring = &adev->vcn.inst[inst_idx].ring_enc[0]; - fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; - } else - ring = &adev->vcn.inst[inst_idx].ring_dec; - - WREG32_SOC15(VCN, inst_idx, regVCN_RB4_DB_CTRL, - ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT | - VCN_RB4_DB_CTRL__EN_MASK); - - /* program the RB_BASE for ring buffer */ - WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO4, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI4, - upper_32_bits(ring->gpu_addr)); + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; - WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE4, ring->ring_size / sizeof(uint32_t)); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4); - /* reseting ring, fw should not check RB ring */ tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); - tmp &= ~(VCN_RB_ENABLE__RB4_EN_MASK); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0); - /* Initialize the ring buffer's read and write pointers */ - tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR4); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4, tmp); - ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4); + tmp= RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); - tmp |= VCN_RB_ENABLE__RB4_EN_MASK; + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); - WREG32_SOC15(VCN, inst_idx, regUVD_SCRATCH2, 0); - - if (unifiedQ_enabled) - fw_shared->sq.queue_mode &= ~FW_QUEUE_RING_RESET; - - for (i = 0; i < adev->vcn.num_enc_rings; i++) { - ring = &adev->vcn.inst[inst_idx].ring_enc[i]; - - if (i) { - ring = &adev->vcn.inst[inst_idx].ring_enc[1]; - - WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO2, ring->gpu_addr); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE2, ring->ring_size / 4); - tmp= RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR2); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2, tmp); - ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2); - - WREG32_SOC15(VCN, inst_idx, regVCN_RB2_DB_CTRL, - ring->doorbell_index << VCN_RB2_DB_CTRL__OFFSET__SHIFT | - VCN_RB2_DB_CTRL__EN_MASK); - } else { - ring = &adev->vcn.inst[inst_idx].ring_enc[0]; - - WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4); - tmp= RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp); - ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); + WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); - WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL, - ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | - VCN_RB1_DB_CTRL__EN_MASK); - } - } return 0; } @@ -1064,6 +942,8 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) amdgpu_dpm_enable_uvd(adev, true); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { r = vcn_v4_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram); continue; @@ -1081,15 +961,15 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) /* enable VCPU clock */ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), - UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); /* disable master interrupt */ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0, - ~UVD_MASTINT_EN__VCPU_EN_MASK); + ~UVD_MASTINT_EN__VCPU_EN_MASK); /* enable LMI MC and UMC channels */ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; @@ -1099,10 +979,10 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) /* setup regUVD_LMI_CTRL */ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL); WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp | - UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | - UVD_LMI_CTRL__MASK_MC_URGENT_MASK | - UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | - UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); /* setup regUVD_MPC_CNTL */ tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL); @@ -1112,37 +992,37 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) /* setup UVD_MPC_SET_MUXA0 */ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0, - ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | - (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | - (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); /* setup UVD_MPC_SET_MUXB0 */ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0, - ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | - (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | - (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); /* setup UVD_MPC_SET_MUX */ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX, - ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | - (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); vcn_v4_0_mc_resume(adev, i); /* VCN global tiling registers */ WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); + adev->gfx.config.gb_addr_config); /* unblock VCPU register access */ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0, - ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); /* release VCPU reset to boot */ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + ~UVD_VCPU_CNTL__BLK_RST_MASK); for (j = 0; j < 10; ++j) { uint32_t status; @@ -1166,13 +1046,13 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) if (status & 2) break; - dev_err(adev->dev, "VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i); - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), - UVD_VCPU_CNTL__BLK_RST_MASK, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + dev_err(adev->dev, "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i); + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); mdelay(10); WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + ~UVD_VCPU_CNTL__BLK_RST_MASK); mdelay(10); r = -1; @@ -1180,78 +1060,43 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) } if (r) { - dev_err(adev->dev, "VCN[%d] decode not responding, giving up!!!\n", i); + dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i); return r; } /* enable master interrupt */ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), - UVD_MASTINT_EN__VCPU_EN_MASK, - ~UVD_MASTINT_EN__VCPU_EN_MASK); + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); /* clear the busy bit of VCN_STATUS */ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0, - ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); - - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - if (unifiedQ_enabled) { - ring = &adev->vcn.inst[i].ring_enc[0]; - fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; - } else { - ring = &adev->vcn.inst[i].ring_dec; + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); - WREG32_SOC15(VCN, i, regVCN_RB4_DB_CTRL, - ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT | - VCN_RB4_DB_CTRL__EN_MASK); - - /* program the RB_BASE for ring buffer */ - WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO4, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI4, - upper_32_bits(ring->gpu_addr)); - - WREG32_SOC15(VCN, i, regUVD_RB_SIZE4, ring->ring_size / sizeof(uint32_t)); - - /* resetting ring, fw should not check RB ring */ - tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); - tmp &= ~(VCN_RB_ENABLE__RB4_EN_MASK); - WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + ring = &adev->vcn.inst[i].ring_enc[0]; + WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); - /* Initialize the ring buffer's read and write pointers */ - tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR4); - WREG32_SOC15(VCN, i, regUVD_RB_WPTR4, tmp); - ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR4); + WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4); - tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); - tmp |= VCN_RB_ENABLE__RB4_EN_MASK; - WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0); - ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_RPTR4); - } - ring = &adev->vcn.inst[i].ring_enc[0]; - WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL, - ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | - VCN_RB1_DB_CTRL__EN_MASK); tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR); WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp); ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR); - WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4); - if (unifiedQ_enabled) - fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); - else { - ring = &adev->vcn.inst[i].ring_enc[1]; - WREG32_SOC15(VCN, i, regVCN_RB2_DB_CTRL, - ring->doorbell_index << VCN_RB2_DB_CTRL__OFFSET__SHIFT | - VCN_RB2_DB_CTRL__EN_MASK); - tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR2); - WREG32_SOC15(VCN, i, regUVD_RB_WPTR2, tmp); - ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR2); - WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO2, ring->gpu_addr); - WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, regUVD_RB_SIZE2, ring->ring_size / 4); - } + + tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); } return 0; @@ -1277,12 +1122,6 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF); - tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2); - SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR2, tmp, 0xFFFFFFFF); - - tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4); - SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR4, tmp, 0xFFFFFFFF); - SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); @@ -1301,10 +1140,14 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) */ static int vcn_v4_0_stop(struct amdgpu_device *adev) { + volatile struct amdgpu_vcn4_fw_shared *fw_shared; uint32_t tmp; int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { r = vcn_v4_0_stop_dpg_mode(adev, i); continue; @@ -1414,8 +1257,6 @@ static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, /* unpause dpg, no need to wait */ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data); - SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); } adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; } @@ -1424,165 +1265,72 @@ static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, } /** - * vcn_v4_0_dec_ring_get_rptr - get read pointer + * vcn_v4_0_unified_ring_get_rptr - get unified read pointer * * @ring: amdgpu_ring pointer * - * Returns the current hardware read pointer + * Returns the current hardware unified read pointer */ -static uint64_t vcn_v4_0_dec_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t vcn_v4_0_unified_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR4); + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR); } /** - * vcn_v4_0_dec_ring_get_wptr - get write pointer + * vcn_v4_0_unified_ring_get_wptr - get unified write pointer * * @ring: amdgpu_ring pointer * - * Returns the current hardware write pointer + * Returns the current hardware unified write pointer */ -static uint64_t vcn_v4_0_dec_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t vcn_v4_0_unified_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + if (ring->use_doorbell) return *ring->wptr_cpu_addr; else - return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR4); + return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR); } /** - * vcn_v4_0_dec_ring_set_wptr - set write pointer + * vcn_v4_0_unified_ring_set_wptr - set enc write pointer * * @ring: amdgpu_ring pointer * - * Commits the write pointer to the hardware + * Commits the enc write pointer to the hardware */ -static void vcn_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring) +static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - WREG32_SOC15(VCN, ring->me, regUVD_SCRATCH2, - lower_32_bits(ring->wptr)); - } + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); if (ring->use_doorbell) { *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { - WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR4, lower_32_bits(ring->wptr)); - } -} - -static const struct amdgpu_ring_funcs vcn_v4_0_dec_sw_ring_vm_funcs = { - .type = AMDGPU_RING_TYPE_VCN_DEC, - .align_mask = 0x3f, - .nop = VCN_DEC_SW_CMD_NO_OP, - .vmhub = AMDGPU_MMHUB_0, - .get_rptr = vcn_v4_0_dec_ring_get_rptr, - .get_wptr = vcn_v4_0_dec_ring_get_wptr, - .set_wptr = vcn_v4_0_dec_ring_set_wptr, - .emit_frame_size = - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + - VCN_SW_RING_EMIT_FRAME_SIZE, - .emit_ib_size = 5, /* vcn_dec_sw_ring_emit_ib */ - .emit_ib = vcn_dec_sw_ring_emit_ib, - .emit_fence = vcn_dec_sw_ring_emit_fence, - .emit_vm_flush = vcn_dec_sw_ring_emit_vm_flush, - .test_ring = amdgpu_vcn_dec_sw_ring_test_ring, - .test_ib = amdgpu_vcn_dec_sw_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, - .insert_end = vcn_dec_sw_ring_insert_end, - .pad_ib = amdgpu_ring_generic_pad_ib, - .begin_use = amdgpu_vcn_ring_begin_use, - .end_use = amdgpu_vcn_ring_end_use, - .emit_wreg = vcn_dec_sw_ring_emit_wreg, - .emit_reg_wait = vcn_dec_sw_ring_emit_reg_wait, - .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, -}; - -/** - * vcn_v4_0_enc_ring_get_rptr - get enc read pointer - * - * @ring: amdgpu_ring pointer - * - * Returns the current hardware enc read pointer - */ -static uint64_t vcn_v4_0_enc_ring_get_rptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) - return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR); - else - return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR2); -} - -/** - * vcn_v4_0_enc_ring_get_wptr - get enc write pointer - * - * @ring: amdgpu_ring pointer - * - * Returns the current hardware enc write pointer - */ -static uint64_t vcn_v4_0_enc_ring_get_wptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { - if (ring->use_doorbell) - return *ring->wptr_cpu_addr; - else - return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR); - } else { - if (ring->use_doorbell) - return *ring->wptr_cpu_addr; - else - return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR2); - } -} - -/** - * vcn_v4_0_enc_ring_set_wptr - set enc write pointer - * - * @ring: amdgpu_ring pointer - * - * Commits the enc write pointer to the hardware - */ -static void vcn_v4_0_enc_ring_set_wptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { - if (ring->use_doorbell) { - *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); - } else { - WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr)); - } - } else { - if (ring->use_doorbell) { - *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); - } else { - WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - } + WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr)); } } -static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = { +static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, .vmhub = AMDGPU_MMHUB_0, - .get_rptr = vcn_v4_0_enc_ring_get_rptr, - .get_wptr = vcn_v4_0_enc_ring_get_wptr, - .set_wptr = vcn_v4_0_enc_ring_set_wptr, + .get_rptr = vcn_v4_0_unified_ring_get_rptr, + .get_wptr = vcn_v4_0_unified_ring_get_wptr, + .set_wptr = vcn_v4_0_unified_ring_set_wptr, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + @@ -1594,7 +1342,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = { .emit_fence = vcn_v2_0_enc_ring_emit_fence, .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, .test_ring = amdgpu_vcn_enc_ring_test_ring, - .test_ib = amdgpu_vcn_enc_ring_test_ib, + .test_ib = amdgpu_vcn_unified_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .insert_end = vcn_v2_0_enc_ring_insert_end, .pad_ib = amdgpu_ring_generic_pad_ib, @@ -1606,13 +1354,13 @@ static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = { }; /** - * vcn_v4_0_set_dec_ring_funcs - set dec ring functions + * vcn_v4_0_set_unified_ring_funcs - set unified ring functions * * @adev: amdgpu_device pointer * - * Set decode ring functions + * Set unified ring functions */ -static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev) +static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev) { int i; @@ -1620,32 +1368,10 @@ static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; - adev->vcn.inst[i].ring_dec.funcs = &vcn_v4_0_dec_sw_ring_vm_funcs; - adev->vcn.inst[i].ring_dec.me = i; - DRM_INFO("VCN(%d) decode software ring is enabled in VM mode\n", i); - } -} - -/** - * vcn_v4_0_set_enc_ring_funcs - set enc ring functions - * - * @adev: amdgpu_device pointer - * - * Set encode ring functions - */ -static void vcn_v4_0_set_enc_ring_funcs(struct amdgpu_device *adev) -{ - int i, j; - - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; + adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_unified_ring_vm_funcs; + adev->vcn.inst[i].ring_enc[0].me = i; - for (j = 0; j < adev->vcn.num_enc_rings; ++j) { - adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v4_0_enc_ring_vm_funcs; - adev->vcn.inst[i].ring_enc[j].me = i; - } - DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i); + DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i); } } @@ -1798,18 +1524,9 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_ DRM_DEBUG("IH: VCN TRAP\n"); switch (entry->src_id) { - case VCN_4_0__SRCID__UVD_TRAP: - if (!unifiedQ_enabled) { - amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec); - break; - } - break; case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE: amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); break; - case VCN_4_0__SRCID__UVD_ENC_LOW_LATENCY: - amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]); - break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);