From: Jane Jian Date: Fri, 8 Jul 2022 10:07:38 +0000 (+0800) Subject: drm/amdgpu/jpeg: enable jpeg v4_0 for sriov X-Git-Tag: v6.6.7~2898^2~17^2~112 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=bf35dbc135854c809b5cb6bcce320838c6d1370e;p=platform%2Fkernel%2Flinux-starfive.git drm/amdgpu/jpeg: enable jpeg v4_0 for sriov - skip direct jpeg registers read&write since it is not allowed - reset Doorbell range layout for sriov Signed-off-by: Jane Jian Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 77a8b05..6edaa43 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1948,9 +1948,8 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(4, 0, 2): case IP_VERSION(4, 0, 4): amdgpu_device_ip_block_add(adev, &vcn_v4_0_ip_block); - if (!amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block); - break; + amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block); + return 0; default: dev_err(adev->dev, "Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index 479d9bc..b07c000f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -118,6 +118,10 @@ int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; + /* JPEG in SRIOV does not support direct register read/write */ + if (amdgpu_sriov_vf(adev)) + return 0; + WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) @@ -202,17 +206,18 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) } else { r = 0; } - - for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch); - if (tmp == 0xDEADBEEF) - break; - udelay(1); + if (!amdgpu_sriov_vf(adev)) { + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch); + if (tmp == 0xDEADBEEF) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; } - if (i >= adev->usec_timeout) - r = -ETIMEDOUT; - dma_fence_put(fence); error: return r; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 3129094..5f2a034 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -28,6 +28,7 @@ #include "soc15d.h" #include "jpeg_v2_0.h" #include "jpeg_v4_0.h" +#include "mmsch_v4_0.h" #include "vcn/vcn_4_0_0_offset.h" #include "vcn/vcn_4_0_0_sh_mask.h" @@ -35,12 +36,15 @@ #define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f +static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev); static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v4_0_set_irq_funcs(struct amdgpu_device *adev); static int jpeg_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state); static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev); +static void jpeg_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring); + /** * jpeg_v4_0_early_init - set function pointers * @@ -103,7 +107,8 @@ static int jpeg_v4_0_sw_init(void *handle) ring = &adev->jpeg.inst->ring_dec; ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; + ring->doorbell_index = amdgpu_sriov_vf(adev) ? (((adev->doorbell_index.vcn.vcn_ring0_1) << 1) + 4) : ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1); + sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -153,16 +158,26 @@ static int jpeg_v4_0_hw_init(void *handle) struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; int r; - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); + if (amdgpu_sriov_vf(adev)) { + r = jpeg_v4_0_start_sriov(adev); + if (r) + return r; + ring->wptr = 0; + ring->wptr_old = 0; + jpeg_v4_0_dec_ring_set_wptr(ring); + ring->sched.ready = true; + } else { + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); - WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, - ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | - VCN_JPEG_DB_CTRL__EN_MASK); + WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, + ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); - r = amdgpu_ring_test_helper(ring); - if (r) - return r; + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n"); @@ -181,11 +196,11 @@ static int jpeg_v4_0_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; cancel_delayed_work_sync(&adev->vcn.idle_work); - - if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && - RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS)) - jpeg_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE); - + if (!amdgpu_sriov_vf(adev)) { + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS)) + jpeg_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + } amdgpu_irq_put(adev, &adev->jpeg.inst->irq, 0); return 0; @@ -390,6 +405,120 @@ static int jpeg_v4_0_start(struct amdgpu_device *adev) return 0; } +static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + uint64_t ctx_addr; + uint32_t param, resp, expected; + uint32_t tmp, timeout; + + struct amdgpu_mm_table *table = &adev->virt.mm_table; + uint32_t *table_loc; + uint32_t table_size; + uint32_t size, size_dw; + uint32_t init_status; + + struct mmsch_v4_0_cmd_direct_write + direct_wt = { {0} }; + struct mmsch_v4_0_cmd_end end = { {0} }; + struct mmsch_v4_0_init_header header; + + direct_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_WRITE; + end.cmd_header.command_type = + MMSCH_COMMAND__END; + + header.version = MMSCH_VERSION; + header.total_size = sizeof(struct mmsch_v4_0_init_header) >> 2; + + header.jpegdec.init_status = 0; + header.jpegdec.table_offset = 0; + header.jpegdec.table_size = 0; + + table_loc = (uint32_t *)table->cpu_addr; + table_loc += header.total_size; + + table_size = 0; + + ring = &adev->jpeg.inst->ring_dec; + + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(JPEG, 0, + regUVD_LMI_JRBC_RB_64BIT_BAR_LOW), + lower_32_bits(ring->gpu_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(JPEG, 0, + regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH), + upper_32_bits(ring->gpu_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(JPEG, 0, + regUVD_JRBC_RB_SIZE), ring->ring_size / 4); + + /* add end packet */ + MMSCH_V4_0_INSERT_END(); + + /* refine header */ + header.jpegdec.init_status = 0; + header.jpegdec.table_offset = header.total_size; + header.jpegdec.table_size = table_size; + header.total_size += table_size; + + /* Update init table header in memory */ + size = sizeof(struct mmsch_v4_0_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy((void *)table_loc, &header, size); + + /* message MMSCH (in VCN[0]) to initialize this client + * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr + * of memory descriptor location + */ + ctx_addr = table->gpu_addr; + WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); + WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); + + /* 2, update vmid of descriptor */ + tmp = RREG32_SOC15(VCN, 0, regMMSCH_VF_VMID); + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; + /* use domain0 for MM scheduler */ + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); + WREG32_SOC15(VCN, 0, regMMSCH_VF_VMID, tmp); + + /* 3, notify mmsch about the size of this descriptor */ + size = header.total_size; + WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_SIZE, size); + + /* 4, set resp to zero */ + WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP, 0); + + /* 5, kick off the initialization and wait until + * MMSCH_VF_MAILBOX_RESP becomes non-zero + */ + param = 0x00000001; + WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_HOST, param); + tmp = 0; + timeout = 1000; + resp = 0; + expected = MMSCH_VF_MAILBOX_RESP__OK; + init_status = ((struct mmsch_v4_0_init_header *)(table_loc))->jpegdec.init_status; + while (resp != expected) { + resp = RREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP); + + if (resp != 0) + break; + udelay(10); + tmp = tmp + 10; + if (tmp >= timeout) { + DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\ + " waiting for regMMSCH_VF_MAILBOX_RESP "\ + "(expected=0x%08x, readback=0x%08x)\n", + tmp, expected, resp); + return -EBUSY; + } + } + if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE && init_status != MMSCH_VF_ENGINE_STATUS__PASS) + DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n", resp, init_status); + + return 0; + +} + /** * jpeg_v4_0_stop - stop JPEG block * @@ -513,6 +642,11 @@ static int jpeg_v4_0_set_powergating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + if (amdgpu_sriov_vf(adev)) { + adev->jpeg.cur_state = AMD_PG_STATE_UNGATE; + return 0; + } + if (state == adev->jpeg.cur_state) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h index 0312c71..83653a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h @@ -38,6 +38,11 @@ #define MMSCH_VF_MAILBOX_RESP__OK 0x1 #define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2 +#define MMSCH_VF_ENGINE_STATUS__PASS 0x1 + +#define MMSCH_VF_MAILBOX_RESP__OK 0x1 +#define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2 + enum mmsch_v4_0_command_type { MMSCH_COMMAND__DIRECT_REG_WRITE = 0, MMSCH_COMMAND__DIRECT_REG_POLLING = 2,