From dbd75e0d4818d40a6472e811262e8cf0d2bca352 Mon Sep 17 00:00:00 2001 From: Boyuan Zhang Date: Sat, 26 Feb 2022 20:14:30 -0500 Subject: [PATCH] radeonsi/vcn: add encode context ib for vcn 4.0 Implement encode context ib based on new reconstructed picture and interface for VCN 4.0, and modify dpb setup accordingly. Signed-off-by: Boyuan Zhang Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/radeon_vcn_enc.c | 20 ++++++--- src/gallium/drivers/radeonsi/radeon_vcn_enc.h | 14 ++++++ src/gallium/drivers/radeonsi/radeon_vcn_enc_4_0.c | 55 +++++++++++++++++++++++ 3 files changed, 83 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc.c b/src/gallium/drivers/radeonsi/radeon_vcn_enc.c index 9a6a608..22e8d4b 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc.c +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc.c @@ -476,7 +476,8 @@ static void radeon_enc_get_feedback(struct pipe_video_codec *encoder, void *feed FREE(fb); } -static int setup_dpb(struct radeon_encoder *enc, enum pipe_format buffer_format) +static int setup_dpb(struct radeon_encoder *enc, enum pipe_format buffer_format, + enum chip_class chip_class) { uint32_t aligned_width = align(enc->base.width, 16); uint32_t aligned_height = align(enc->base.height, 16); @@ -493,10 +494,17 @@ static int setup_dpb(struct radeon_encoder *enc, enum pipe_format buffer_format) int i; for (i = 0; i < num_reconstructed_pictures; i++) { - enc->enc_pic.ctx_buf.reconstructed_pictures[i].luma_offset = offset; - offset += luma_size; - enc->enc_pic.ctx_buf.reconstructed_pictures[i].chroma_offset = offset; - offset += chroma_size; + if (chip_class >= GFX11) { + enc->enc_pic.ctx_buf.reconstructed_pictures_v4_0[i].luma_offset = offset; + offset += luma_size; + enc->enc_pic.ctx_buf.reconstructed_pictures_v4_0[i].chroma_offset = offset; + offset += chroma_size; + } else { + enc->enc_pic.ctx_buf.reconstructed_pictures[i].luma_offset = offset; + offset += luma_size; + enc->enc_pic.ctx_buf.reconstructed_pictures[i].chroma_offset = offset; + offset += chroma_size; + } } for (; i < RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES; i++) { enc->enc_pic.ctx_buf.reconstructed_pictures[i].luma_offset = 0; @@ -574,7 +582,7 @@ struct pipe_video_codec *radeon_create_encoder(struct pipe_context *context, cpb_size = cpb_size * enc->cpb_num; tmp_buf->destroy(tmp_buf); - cpb_size += setup_dpb(enc, templat.buffer_format); + cpb_size += setup_dpb(enc, templat.buffer_format, sscreen->info.chip_class); if (!si_vid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) { RVID_ERR("Can't create CPB buffer.\n"); diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc.h b/src/gallium/drivers/radeonsi/radeon_vcn_enc.h index 25f0d29..70d5edb 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc.h +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc.h @@ -353,6 +353,18 @@ typedef struct rvcn_enc_reconstructed_picture_s { uint32_t chroma_offset; } rvcn_enc_reconstructed_picture_t; +typedef struct rvcn_enc_reconstructed_picture_v4_0_s { + uint32_t luma_offset; + uint32_t chroma_offset; + union { + struct + { + uint32_t unused_offset1; + uint32_t unused_offset2; + } unused; + }; +} rvcn_enc_reconstructed_picture_v4_0_t; + typedef struct rvcn_enc_picture_info_s { bool in_use; @@ -381,12 +393,14 @@ typedef struct rvcn_enc_encode_context_buffer_s { uint32_t rec_chroma_pitch; uint32_t num_reconstructed_pictures; rvcn_enc_reconstructed_picture_t reconstructed_pictures[RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES]; + rvcn_enc_reconstructed_picture_v4_0_t reconstructed_pictures_v4_0[RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES]; uint32_t pre_encode_picture_luma_pitch; uint32_t pre_encode_picture_chroma_pitch; rvcn_enc_reconstructed_picture_t pre_encode_reconstructed_pictures[RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES]; rvcn_enc_pre_encode_input_picture_t pre_encode_input_picture; uint32_t two_pass_search_center_map_offset; + uint32_t colloc_buffer_offset; } rvcn_enc_encode_context_buffer_t; typedef struct rvcn_enc_video_bitstream_buffer_s { diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc_4_0.c b/src/gallium/drivers/radeonsi/radeon_vcn_enc_4_0.c index 2a423c0..b45c2c2 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc_4_0.c +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc_4_0.c @@ -38,10 +38,65 @@ #define RENCODE_FW_INTERFACE_MAJOR_VERSION 1 #define RENCODE_FW_INTERFACE_MINOR_VERSION 0 +static void radeon_enc_ctx(struct radeon_encoder *enc) +{ + enc->enc_pic.ctx_buf.swizzle_mode = 0; + enc->enc_pic.ctx_buf.two_pass_search_center_map_offset = 0; + enc->enc_pic.ctx_buf.colloc_buffer_offset = enc->dpb_size; + + uint32_t aligned_width = enc->enc_pic.session_init.aligned_picture_width; + uint32_t aligned_height = enc->enc_pic.session_init.aligned_picture_height; + + enc->enc_pic.ctx_buf.rec_luma_pitch = align(aligned_width, enc->alignment); + enc->enc_pic.ctx_buf.rec_chroma_pitch = align(aligned_width, enc->alignment); + + int luma_size = enc->enc_pic.ctx_buf.rec_luma_pitch * align(aligned_height, enc->alignment); + if (enc->enc_pic.bit_depth_luma_minus8 == 2) + luma_size *= 2; + int chroma_size = align(luma_size / 2, enc->alignment); + int offset = 0; + + for (int i = 0; i < enc->enc_pic.ctx_buf.num_reconstructed_pictures; i++) { + offset += luma_size; + offset += chroma_size; + } + + assert(offset == enc->dpb_size); + + RADEON_ENC_BEGIN(enc->cmd.ctx); + RADEON_ENC_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.swizzle_mode); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_luma_pitch); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.num_reconstructed_pictures); + + for (int i = 0; i < RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES; i++) { + RADEON_ENC_CS(enc->enc_pic.ctx_buf.reconstructed_pictures_v4_0[i].luma_offset); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.reconstructed_pictures_v4_0[i].chroma_offset); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.reconstructed_pictures_v4_0[i].unused.unused_offset1); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.reconstructed_pictures_v4_0[i].unused.unused_offset2); + } + + // 2: 1 pre encode pitch * 2 (luma + chroma) + // 136: 34 pre encode reconstructed pics * 4 (luma + chroma offsets + unused union) + // 3: 1 pre encode input pic * 3 (r,g,b offset union) + //---- + // 141 + + for (int i = 0; i < 141; i++) + RADEON_ENC_CS(0x00000000); + + RADEON_ENC_CS(enc->enc_pic.ctx_buf.two_pass_search_center_map_offset); + RADEON_ENC_CS(enc->enc_pic.ctx_buf.colloc_buffer_offset); + RADEON_ENC_END(); +} + void radeon_enc_4_0_init(struct radeon_encoder *enc) { radeon_enc_3_0_init(enc); + enc->ctx = radeon_enc_ctx; + enc->enc_pic.session_info.interface_version = ((RENCODE_FW_INTERFACE_MAJOR_VERSION << RENCODE_IF_MAJOR_VERSION_SHIFT) | (RENCODE_FW_INTERFACE_MINOR_VERSION << RENCODE_IF_MINOR_VERSION_SHIFT)); -- 2.7.4