From d8f3060bd915e6ba6cc01086978d126e70bfea92 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 16 Mar 2023 13:24:45 +1000 Subject: [PATCH] radv/video: start adding gfx11 vcn decoder On gfx11 the vcn decoder moved into the vcn encoder ring, now known as the unified vcn ring. Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_private.h | 4 ++ src/amd/vulkan/radv_video.c | 150 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 147 insertions(+), 7 deletions(-) diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index dd0d0f3..e8a0d29 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -79,6 +79,7 @@ #include "ac_spm.h" #include "ac_sqtt.h" #include "ac_surface.h" +#include "ac_vcn.h" #include "radv_constants.h" #include "radv_descriptor_set.h" #include "radv_radeon_winsys.h" @@ -248,6 +249,7 @@ radv_float_to_ufixed(float value, unsigned frac_bits) struct radv_image_view; struct radv_instance; +struct rvcn_decode_buffer_s; /* A non-fatal assert. Useful for debugging. */ #ifdef NDEBUG @@ -1808,6 +1810,8 @@ struct radv_cmd_buffer { struct { struct radv_video_session *vid; struct radv_video_session_params *params; + struct rvcn_sq_var sq; + struct rvcn_decode_buffer_s *decode_buffer; } video; uint64_t shader_upload_seq; diff --git a/src/amd/vulkan/radv_video.c b/src/amd/vulkan/radv_video.c index fbed8bf..e99d79b 100644 --- a/src/amd/vulkan/radv_video.c +++ b/src/amd/vulkan/radv_video.c @@ -50,6 +50,50 @@ radv_vid_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, out_offset, ptr); } +/* vcn unified queue (sq) ib header */ +static void +radv_vcn_sq_header(struct radeon_cmdbuf *cs, + struct rvcn_sq_var *sq, + bool enc) +{ + /* vcn ib signature */ + radeon_emit(cs, RADEON_VCN_SIGNATURE_SIZE); + radeon_emit(cs, RADEON_VCN_SIGNATURE); + sq->ib_checksum = &cs->buf[cs->cdw]; + radeon_emit(cs, 0); + sq->ib_total_size_in_dw = &cs->buf[cs->cdw]; + radeon_emit(cs, 0); + + /* vcn ib engine info */ + radeon_emit(cs, RADEON_VCN_ENGINE_INFO_SIZE); + radeon_emit(cs, RADEON_VCN_ENGINE_INFO); + radeon_emit(cs, enc ? RADEON_VCN_ENGINE_TYPE_ENCODE + : RADEON_VCN_ENGINE_TYPE_DECODE); + radeon_emit(cs, 0); +} + +static void +radv_vcn_sq_tail(struct radeon_cmdbuf *cs, + struct rvcn_sq_var *sq) +{ + uint32_t *end; + uint32_t size_in_dw; + uint32_t checksum = 0; + + if (sq->ib_checksum == NULL || sq->ib_total_size_in_dw == NULL) + return; + + end = &cs->buf[cs->cdw]; + size_in_dw = end - sq->ib_total_size_in_dw - 1; + *sq->ib_total_size_in_dw = size_in_dw; + *(sq->ib_total_size_in_dw + 4) = size_in_dw * sizeof(uint32_t); + + for (int i = 0; i < size_in_dw; i++) + checksum += *(sq->ib_checksum + 2 + i); + + *sq->ib_checksum = checksum; +} + /* generate an stream handle */ static unsigned si_vid_alloc_stream_handle() { @@ -68,7 +112,10 @@ static unsigned si_vid_alloc_stream_handle() void radv_init_physical_device_decoder(struct radv_physical_device *pdevice) { - if (radv_has_uvd(pdevice)) + if (pdevice->rad_info.family >= CHIP_GFX1100 || + pdevice->rad_info.family == CHIP_GFX940) + pdevice->vid_decode_ip = AMD_IP_VCN_UNIFIED; + else if (radv_has_uvd(pdevice)) pdevice->vid_decode_ip = AMD_IP_UVD; else pdevice->vid_decode_ip = AMD_IP_VCN_DEC; @@ -114,6 +161,16 @@ radv_init_physical_device_decoder(struct radv_physical_device *pdevice) pdevice->vid_dec_reg.cmd = RDECODE_VCN2_5_GPCOM_VCPU_CMD; pdevice->vid_dec_reg.cntl = RDECODE_VCN2_5_ENGINE_CNTL; break; + case CHIP_GFX940: + pdevice->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX9; + break; + case CHIP_GFX1100: + case CHIP_GFX1101: + case CHIP_GFX1102: + case CHIP_GFX1103_R1: + case CHIP_GFX1103_R2: + pdevice->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX11; + break; default: if (radv_has_uvd(pdevice)) { pdevice->vid_dec_reg.data0 = RUVD_GPCOM_VCPU_DATA0; @@ -560,9 +617,62 @@ static void send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd, radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo); addr = radv_buffer_get_va(bo); addr += offset; - set_reg(cmd_buffer, pdev->vid_dec_reg.data0, addr); - set_reg(cmd_buffer, pdev->vid_dec_reg.data1, addr >> 32); - set_reg(cmd_buffer, pdev->vid_dec_reg.cmd, cmd << 1); + + if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) { + set_reg(cmd_buffer, pdev->vid_dec_reg.data0, addr); + set_reg(cmd_buffer, pdev->vid_dec_reg.data1, addr >> 32); + set_reg(cmd_buffer, pdev->vid_dec_reg.cmd, cmd << 1); + return; + } + switch(cmd) { + case RDECODE_CMD_MSG_BUFFER: + cmd_buffer->video.decode_buffer->valid_buf_flag |= RDECODE_CMDBUF_FLAGS_MSG_BUFFER; + cmd_buffer->video.decode_buffer->msg_buffer_address_hi = (addr >> 32); + cmd_buffer->video.decode_buffer->msg_buffer_address_lo = (addr); + break; + case RDECODE_CMD_DPB_BUFFER: + cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DPB_BUFFER); + cmd_buffer->video.decode_buffer->dpb_buffer_address_hi = (addr >> 32); + cmd_buffer->video.decode_buffer->dpb_buffer_address_lo = (addr); + break; + case RDECODE_CMD_DECODING_TARGET_BUFFER: + cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER); + cmd_buffer->video.decode_buffer->target_buffer_address_hi = (addr >> 32); + cmd_buffer->video.decode_buffer->target_buffer_address_lo = (addr); + break; + case RDECODE_CMD_FEEDBACK_BUFFER: + cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER); + cmd_buffer->video.decode_buffer->feedback_buffer_address_hi = (addr >> 32); + cmd_buffer->video.decode_buffer->feedback_buffer_address_lo = (addr); + break; + case RDECODE_CMD_PROB_TBL_BUFFER: + cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER); + cmd_buffer->video.decode_buffer->prob_tbl_buffer_address_hi = (addr >> 32); + cmd_buffer->video.decode_buffer->prob_tbl_buffer_address_lo = (addr); + break; + case RDECODE_CMD_SESSION_CONTEXT_BUFFER: + cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER); + cmd_buffer->video.decode_buffer->session_contex_buffer_address_hi = (addr >> 32); + cmd_buffer->video.decode_buffer->session_contex_buffer_address_lo = (addr); + break; + case RDECODE_CMD_BITSTREAM_BUFFER: + cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER); + cmd_buffer->video.decode_buffer->bitstream_buffer_address_hi = (addr >> 32); + cmd_buffer->video.decode_buffer->bitstream_buffer_address_lo = (addr); + break; + case RDECODE_CMD_IT_SCALING_TABLE_BUFFER: + cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER); + cmd_buffer->video.decode_buffer->it_sclr_table_buffer_address_hi = (addr >> 32); + cmd_buffer->video.decode_buffer->it_sclr_table_buffer_address_lo = (addr); + break; + case RDECODE_CMD_CONTEXT_BUFFER: + cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_CONTEXT_BUFFER); + cmd_buffer->video.decode_buffer->context_buffer_address_hi = (addr >> 32); + cmd_buffer->video.decode_buffer->context_buffer_address_lo = (addr); + break; + default: + assert(0); + } } static void rvcn_dec_message_create(struct radv_video_session *vid, @@ -1482,6 +1592,22 @@ radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer, cmd_buffer->video.vid = vid; cmd_buffer->video.params = params; + + if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED) { + radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, false); + rvcn_decode_ib_package_t *ib_header = + (rvcn_decode_ib_package_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]); + ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + + sizeof(struct rvcn_decode_ib_package_s); + cmd_buffer->cs->cdw++; + ib_header->package_type = (RDECODE_IB_PARAM_DECODE_BUFFER); + cmd_buffer->cs->cdw++; + cmd_buffer->video.decode_buffer = + (rvcn_decode_buffer_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]); + cmd_buffer->cs->cdw += sizeof(struct rvcn_decode_buffer_s) / 4; + memset(cmd_buffer->video.decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s)); + } + } static void @@ -1499,8 +1625,11 @@ radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer) send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset); send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, out_offset); /* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */ - for (unsigned i = 0; i < 8; i++) - radeon_emit(cmd_buffer->cs, 0x81ff); + + if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) { + for (unsigned i = 0; i < 8; i++) + radeon_emit(cmd_buffer->cs, 0x81ff); + } } static void @@ -1539,6 +1668,12 @@ void radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoEndCodingInfoKHR *pEndCodingInfo) { + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + + if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) + return; + + radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq); } static void @@ -1662,7 +1797,8 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, if (have_it(vid)) send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_bo, it_offset); - set_reg(cmd_buffer, cmd_buffer->device->physical_device->vid_dec_reg.cntl, 1); + if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) + set_reg(cmd_buffer, cmd_buffer->device->physical_device->vid_dec_reg.cntl, 1); } void -- 2.7.4