radv/video: start adding gfx11 vcn decoder
authorDave Airlie <airlied@redhat.com>
Thu, 16 Mar 2023 03:24:45 +0000 (13:24 +1000)
committerMarge Bot <emma+marge@anholt.net>
Thu, 27 Apr 2023 02:00:03 +0000 (02:00 +0000)
On gfx11 the vcn decoder moved into the vcn encoder ring,
now known as the unified vcn ring.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21980>

src/amd/vulkan/radv_private.h
src/amd/vulkan/radv_video.c

index dd0d0f3..e8a0d29 100644 (file)
@@ -79,6 +79,7 @@
 #include "ac_spm.h"
 #include "ac_sqtt.h"
 #include "ac_surface.h"
+#include "ac_vcn.h"
 #include "radv_constants.h"
 #include "radv_descriptor_set.h"
 #include "radv_radeon_winsys.h"
@@ -248,6 +249,7 @@ radv_float_to_ufixed(float value, unsigned frac_bits)
 
 struct radv_image_view;
 struct radv_instance;
+struct rvcn_decode_buffer_s;
 
 /* A non-fatal assert.  Useful for debugging. */
 #ifdef NDEBUG
@@ -1808,6 +1810,8 @@ struct radv_cmd_buffer {
    struct {
       struct radv_video_session *vid;
       struct radv_video_session_params *params;
+      struct rvcn_sq_var sq;
+      struct rvcn_decode_buffer_s *decode_buffer;
    } video;
 
    uint64_t shader_upload_seq;
index fbed8bf..e99d79b 100644 (file)
@@ -50,6 +50,50 @@ radv_vid_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size,
                                                out_offset, ptr);
 }
 
+/* vcn unified queue (sq) ib header */
+static void
+radv_vcn_sq_header(struct radeon_cmdbuf *cs,
+                   struct rvcn_sq_var *sq,
+                   bool enc)
+{
+   /* vcn ib signature */
+   radeon_emit(cs, RADEON_VCN_SIGNATURE_SIZE);
+   radeon_emit(cs, RADEON_VCN_SIGNATURE);
+   sq->ib_checksum = &cs->buf[cs->cdw];
+   radeon_emit(cs, 0);
+   sq->ib_total_size_in_dw = &cs->buf[cs->cdw];
+   radeon_emit(cs, 0);
+
+   /* vcn ib engine info */
+   radeon_emit(cs, RADEON_VCN_ENGINE_INFO_SIZE);
+   radeon_emit(cs, RADEON_VCN_ENGINE_INFO);
+   radeon_emit(cs, enc ? RADEON_VCN_ENGINE_TYPE_ENCODE
+                       : RADEON_VCN_ENGINE_TYPE_DECODE);
+   radeon_emit(cs, 0);
+}
+
+static void
+radv_vcn_sq_tail(struct radeon_cmdbuf *cs,
+                 struct rvcn_sq_var *sq)
+{
+   uint32_t *end;
+   uint32_t size_in_dw;
+   uint32_t checksum = 0;
+
+   if (sq->ib_checksum == NULL || sq->ib_total_size_in_dw == NULL)
+      return;
+
+   end = &cs->buf[cs->cdw];
+   size_in_dw = end - sq->ib_total_size_in_dw - 1;
+   *sq->ib_total_size_in_dw = size_in_dw;
+   *(sq->ib_total_size_in_dw + 4) = size_in_dw * sizeof(uint32_t);
+
+   for (int i = 0; i < size_in_dw; i++)
+      checksum += *(sq->ib_checksum + 2 + i);
+
+   *sq->ib_checksum = checksum;
+}
+
 /* generate an stream handle */
 static unsigned si_vid_alloc_stream_handle()
 {
@@ -68,7 +112,10 @@ static unsigned si_vid_alloc_stream_handle()
 void
 radv_init_physical_device_decoder(struct radv_physical_device *pdevice)
 {
-   if (radv_has_uvd(pdevice))
+   if (pdevice->rad_info.family >= CHIP_GFX1100 ||
+       pdevice->rad_info.family == CHIP_GFX940)
+      pdevice->vid_decode_ip = AMD_IP_VCN_UNIFIED;
+   else if (radv_has_uvd(pdevice))
       pdevice->vid_decode_ip = AMD_IP_UVD;
    else
       pdevice->vid_decode_ip = AMD_IP_VCN_DEC;
@@ -114,6 +161,16 @@ radv_init_physical_device_decoder(struct radv_physical_device *pdevice)
       pdevice->vid_dec_reg.cmd = RDECODE_VCN2_5_GPCOM_VCPU_CMD;
       pdevice->vid_dec_reg.cntl = RDECODE_VCN2_5_ENGINE_CNTL;
       break;
+   case CHIP_GFX940:
+      pdevice->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX9;
+      break;
+   case CHIP_GFX1100:
+   case CHIP_GFX1101:
+   case CHIP_GFX1102:
+   case CHIP_GFX1103_R1:
+   case CHIP_GFX1103_R2:
+      pdevice->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX11;
+      break;
    default:
       if (radv_has_uvd(pdevice)) {
          pdevice->vid_dec_reg.data0 = RUVD_GPCOM_VCPU_DATA0;
@@ -560,9 +617,62 @@ static void send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd,
    radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
    addr = radv_buffer_get_va(bo);
    addr += offset;
-   set_reg(cmd_buffer, pdev->vid_dec_reg.data0, addr);
-   set_reg(cmd_buffer, pdev->vid_dec_reg.data1, addr >> 32);
-   set_reg(cmd_buffer, pdev->vid_dec_reg.cmd, cmd << 1);
+
+   if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
+      set_reg(cmd_buffer, pdev->vid_dec_reg.data0, addr);
+      set_reg(cmd_buffer, pdev->vid_dec_reg.data1, addr >> 32);
+      set_reg(cmd_buffer, pdev->vid_dec_reg.cmd, cmd << 1);
+      return;
+   }
+   switch(cmd) {
+   case RDECODE_CMD_MSG_BUFFER:
+      cmd_buffer->video.decode_buffer->valid_buf_flag |= RDECODE_CMDBUF_FLAGS_MSG_BUFFER;
+      cmd_buffer->video.decode_buffer->msg_buffer_address_hi = (addr >> 32);
+      cmd_buffer->video.decode_buffer->msg_buffer_address_lo = (addr);
+      break;
+   case RDECODE_CMD_DPB_BUFFER:
+      cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DPB_BUFFER);
+      cmd_buffer->video.decode_buffer->dpb_buffer_address_hi = (addr >> 32);
+      cmd_buffer->video.decode_buffer->dpb_buffer_address_lo = (addr);
+      break;
+   case RDECODE_CMD_DECODING_TARGET_BUFFER:
+      cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER);
+      cmd_buffer->video.decode_buffer->target_buffer_address_hi = (addr >> 32);
+      cmd_buffer->video.decode_buffer->target_buffer_address_lo = (addr);
+      break;
+   case RDECODE_CMD_FEEDBACK_BUFFER:
+      cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER);
+      cmd_buffer->video.decode_buffer->feedback_buffer_address_hi = (addr >> 32);
+      cmd_buffer->video.decode_buffer->feedback_buffer_address_lo = (addr);
+      break;
+   case RDECODE_CMD_PROB_TBL_BUFFER:
+      cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER);
+      cmd_buffer->video.decode_buffer->prob_tbl_buffer_address_hi = (addr >> 32);
+      cmd_buffer->video.decode_buffer->prob_tbl_buffer_address_lo = (addr);
+      break;
+   case RDECODE_CMD_SESSION_CONTEXT_BUFFER:
+      cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER);
+      cmd_buffer->video.decode_buffer->session_contex_buffer_address_hi = (addr >> 32);
+      cmd_buffer->video.decode_buffer->session_contex_buffer_address_lo = (addr);
+      break;
+   case RDECODE_CMD_BITSTREAM_BUFFER:
+      cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER);
+      cmd_buffer->video.decode_buffer->bitstream_buffer_address_hi = (addr >> 32);
+      cmd_buffer->video.decode_buffer->bitstream_buffer_address_lo = (addr);
+      break;
+   case RDECODE_CMD_IT_SCALING_TABLE_BUFFER:
+      cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER);
+      cmd_buffer->video.decode_buffer->it_sclr_table_buffer_address_hi = (addr >> 32);
+      cmd_buffer->video.decode_buffer->it_sclr_table_buffer_address_lo = (addr);
+      break;
+   case RDECODE_CMD_CONTEXT_BUFFER:
+      cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_CONTEXT_BUFFER);
+      cmd_buffer->video.decode_buffer->context_buffer_address_hi = (addr >> 32);
+      cmd_buffer->video.decode_buffer->context_buffer_address_lo = (addr);
+      break;
+   default:
+      assert(0);
+   }
 }
 
 static void rvcn_dec_message_create(struct radv_video_session *vid,
@@ -1482,6 +1592,22 @@ radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer,
 
    cmd_buffer->video.vid = vid;
    cmd_buffer->video.params = params;
+
+   if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED) {
+      radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, false);
+      rvcn_decode_ib_package_t *ib_header =
+         (rvcn_decode_ib_package_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
+      ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) +
+         sizeof(struct rvcn_decode_ib_package_s);
+      cmd_buffer->cs->cdw++;
+      ib_header->package_type = (RDECODE_IB_PARAM_DECODE_BUFFER);
+      cmd_buffer->cs->cdw++;
+      cmd_buffer->video.decode_buffer =
+         (rvcn_decode_buffer_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
+      cmd_buffer->cs->cdw += sizeof(struct rvcn_decode_buffer_s) / 4;
+      memset(cmd_buffer->video.decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s));
+   }
+
 }
 
 static void
@@ -1499,8 +1625,11 @@ radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
    send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
    send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, out_offset);
    /* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */
-   for (unsigned i = 0; i < 8; i++)
-      radeon_emit(cmd_buffer->cs, 0x81ff);
+
+   if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
+      for (unsigned i = 0; i < 8; i++)
+         radeon_emit(cmd_buffer->cs, 0x81ff);
+   }
 }
 
 static void
@@ -1539,6 +1668,12 @@ void
 radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer,
                           const VkVideoEndCodingInfoKHR *pEndCodingInfo)
 {
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+
+   if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED)
+      return;
+
+   radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
 }
 
 static void
@@ -1662,7 +1797,8 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer,
    if (have_it(vid))
       send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_bo, it_offset);
 
-   set_reg(cmd_buffer, cmd_buffer->device->physical_device->vid_dec_reg.cntl, 1);
+   if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED)
+      set_reg(cmd_buffer, cmd_buffer->device->physical_device->vid_dec_reg.cntl, 1);
 }
 
 void