tests/amdgpu/vcn: add unified queue support in vcn4
authorRuijing Dong <ruijing.dong@amd.com>
Mon, 18 Jul 2022 15:41:23 +0000 (11:41 -0400)
committerLeo Liu <leo.liu@amd.com>
Wed, 20 Jul 2022 22:23:59 +0000 (18:23 -0400)
add unified queue headers on the existing tests.

Reviewed-by: Boyuan Zhang <Boyuan.Zhang@amd.com>
Signed-off-by: Ruijing Dong <ruijing.dong@amd.com>
tests/amdgpu/vcn_tests.c

index e094b45..5e20fb6 100644 (file)
@@ -63,6 +63,7 @@
 #define DECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER         (0x00100000)
 
 static bool vcn_dec_sw_ring = false;
+static bool vcn_unified_ring = false;
 
 #define H264_NAL_TYPE_NON_IDR_SLICE 1
 #define H264_NAL_TYPE_DP_A_SLICE 2
@@ -172,6 +173,9 @@ static amdgpu_bo_handle ib_handle;
 static amdgpu_va_handle ib_va_handle;
 static uint64_t ib_mc_address;
 static uint32_t *ib_cpu;
+static uint32_t *ib_checksum;
+static uint32_t *ib_size_in_dw;
+
 static rvcn_decode_buffer_t *decode_buffer;
 
 static amdgpu_bo_handle resources[MAX_RESOURCES];
@@ -185,8 +189,8 @@ static struct amdgpu_vcn_reg reg[] = {
 };
 
 uint32_t gWidth, gHeight, gSliceType;
-struct drm_amdgpu_info_hw_ip einfo;
-
+static uint32_t vcn_ip_version_major;
+static uint32_t vcn_ip_version_minor;
 static void amdgpu_cs_vcn_dec_create(void);
 static void amdgpu_cs_vcn_dec_decode(void);
 static void amdgpu_cs_vcn_dec_destroy(void);
@@ -195,6 +199,8 @@ static void amdgpu_cs_vcn_enc_create(void);
 static void amdgpu_cs_vcn_enc_encode(void);
 static void amdgpu_cs_vcn_enc_destroy(void);
 
+static void amdgpu_cs_sq_head(uint32_t *base, int *offset, bool enc);
+static void amdgpu_cs_sq_ib_tail(uint32_t *end);
 static void h264_check_0s (bufferInfo * bufInfo, int count);
 static int32_t h264_se (bufferInfo * bufInfo);
 static inline uint32_t bs_read_u1(bufferInfo *bufinfo);
@@ -224,7 +230,8 @@ CU_TestInfo vcn_tests[] = {
 CU_BOOL suite_vcn_tests_enable(void)
 {
        struct drm_amdgpu_info_hw_ip info;
-       int r, ret;
+       bool enc_ring, dec_ring;
+       int r;
 
        if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
                                   &minor_version, &device_handle))
@@ -235,13 +242,31 @@ CU_BOOL suite_vcn_tests_enable(void)
        chip_rev = device_handle->info.chip_rev;
        chip_id = device_handle->info.chip_external_rev;
 
-       r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_DEC, 0, &info);
-       ret = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_ENC, 0, &einfo);
+       r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_ENC, 0, &info);
+       if (!r) {
+               vcn_ip_version_major = info.hw_ip_version_major;
+               vcn_ip_version_minor = info.hw_ip_version_minor;
+               enc_ring = !!info.available_rings;
+               /* in vcn 4.0 it re-uses encoding queue as unified queue */
+               if (vcn_ip_version_major >= 4) {
+                       vcn_unified_ring = true;
+                       vcn_dec_sw_ring = true;
+                       dec_ring = enc_ring;
+               } else {
+                       r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_DEC, 0, &info);
+                       dec_ring = !!info.available_rings;
+               }
+       }
 
        if (amdgpu_device_deinitialize(device_handle))
                return CU_FALSE;
 
-       if (r != 0 || !info.available_rings ||
+       if (r) {
+               printf("\n\nASIC query hw info failed\n");
+               return CU_FALSE;
+       }
+
+       if (!(dec_ring || enc_ring) ||
            (family_id < AMDGPU_FAMILY_RV &&
             (family_id == AMDGPU_FAMILY_AI &&
              (chip_id - chip_rev) < 0x32))) {  /* Arcturus */
@@ -249,22 +274,25 @@ CU_BOOL suite_vcn_tests_enable(void)
                return CU_FALSE;
        }
 
-       if (family_id == AMDGPU_FAMILY_AI || (ret != 0) ||
-           (!einfo.available_rings)) {
+       if (!dec_ring) {
+               amdgpu_set_test_active("VCN Tests", "VCN DEC create", CU_FALSE);
+               amdgpu_set_test_active("VCN Tests", "VCN DEC decode", CU_FALSE);
+               amdgpu_set_test_active("VCN Tests", "VCN DEC destroy", CU_FALSE);
+       }
+
+       if (family_id == AMDGPU_FAMILY_AI || !enc_ring) {
                amdgpu_set_test_active("VCN Tests", "VCN ENC create", CU_FALSE);
                amdgpu_set_test_active("VCN Tests", "VCN ENC encode", CU_FALSE);
                amdgpu_set_test_active("VCN Tests", "VCN ENC destroy", CU_FALSE);
        }
 
-       if (info.hw_ip_version_major == 1)
+       if (vcn_ip_version_major == 1)
                vcn_reg_index = 0;
-       else if (info.hw_ip_version_major == 2 && info.hw_ip_version_minor == 0)
+       else if (vcn_ip_version_major == 2 && vcn_ip_version_minor == 0)
                vcn_reg_index = 1;
-       else if ((info.hw_ip_version_major == 2 && info.hw_ip_version_minor >= 5) ||
-                 info.hw_ip_version_major == 3)
+       else if ((vcn_ip_version_major == 2 && vcn_ip_version_minor >= 5) ||
+                               vcn_ip_version_major == 3)
                vcn_reg_index = 2;
-       else
-               vcn_dec_sw_ring = true;
 
        return CU_TRUE;
 }
@@ -314,6 +342,43 @@ int suite_vcn_tests_clean(void)
        return CUE_SUCCESS;
 }
 
+static void amdgpu_cs_sq_head(uint32_t *base, int *offset, bool enc)
+{
+       /* signature */
+       *(base + (*offset)++) = 0x00000010;
+       *(base + (*offset)++) = 0x30000002;
+       ib_checksum = base + (*offset)++;
+       ib_size_in_dw = base + (*offset)++;
+
+       /* engine info */
+       *(base + (*offset)++) = 0x00000010;
+       *(base + (*offset)++) = 0x30000001;
+       *(base + (*offset)++) = enc ? 2 : 3;
+       *(base + (*offset)++) = 0x00000000;
+}
+
+static void amdgpu_cs_sq_ib_tail(uint32_t *end)
+{
+       uint32_t size_in_dw;
+       uint32_t checksum = 0;
+
+       /* if the pointers are invalid, no need to process */
+       if (ib_checksum == NULL || ib_size_in_dw == NULL)
+               return;
+
+       size_in_dw = end - ib_size_in_dw - 1;
+       *ib_size_in_dw = size_in_dw;
+       *(ib_size_in_dw + 4) = size_in_dw * sizeof(uint32_t);
+
+       for (int i = 0; i < size_in_dw; i++)
+               checksum += *(ib_checksum + 2 + i);
+
+       *ib_checksum = checksum;
+
+       ib_checksum = NULL;
+       ib_size_in_dw = NULL;
+}
+
 static int submit(unsigned ndw, unsigned ip)
 {
        struct amdgpu_cs_request ibs_request = {0};
@@ -419,10 +484,15 @@ static void vcn_dec_cmd(uint64_t addr, unsigned cmd, int *idx)
 
        /* Support decode software ring message */
        if (!(*idx)) {
-               rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)ib_cpu;
+               rvcn_decode_ib_package_t *ib_header;
 
+               if (vcn_unified_ring)
+                       amdgpu_cs_sq_head(ib_cpu, idx, false);
+
+               ib_header = (rvcn_decode_ib_package_t *)&ib_cpu[*idx];
                ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) +
                        sizeof(struct rvcn_decode_ib_package_s);
+
                (*idx)++;
                ib_header->package_type = (DECODE_IB_PARAM_DECODE_BUFFER);
                (*idx)++;
@@ -486,6 +556,7 @@ static void vcn_dec_cmd(uint64_t addr, unsigned cmd, int *idx)
 static void amdgpu_cs_vcn_dec_create(void)
 {
        struct amdgpu_vcn_bo msg_buf;
+       unsigned ip;
        int len, r;
 
        num_resources  = 0;
@@ -500,9 +571,9 @@ static void amdgpu_cs_vcn_dec_create(void)
        memcpy(msg_buf.ptr, vcn_dec_create_msg, sizeof(vcn_dec_create_msg));
 
        len = 0;
-       if (vcn_dec_sw_ring == true) {
+       if (vcn_dec_sw_ring == true)
                vcn_dec_cmd(msg_buf.addr, 0, &len);
-       else {
+       else {
                ib_cpu[len++] = reg[vcn_reg_index].data0;
                ib_cpu[len++] = msg_buf.addr;
                ib_cpu[len++] = reg[vcn_reg_index].data1;
@@ -515,7 +586,14 @@ static void amdgpu_cs_vcn_dec_create(void)
                }
        }
 
-       r = submit(len, AMDGPU_HW_IP_VCN_DEC);
+       if (vcn_unified_ring) {
+               amdgpu_cs_sq_ib_tail(ib_cpu + len);
+               ip = AMDGPU_HW_IP_VCN_ENC;
+       } else
+               ip = AMDGPU_HW_IP_VCN_DEC;
+
+       r = submit(len, ip);
+
        CU_ASSERT_EQUAL(r, 0);
 
        free_resource(&msg_buf);
@@ -527,6 +605,7 @@ static void amdgpu_cs_vcn_dec_decode(void)
        uint64_t msg_addr, fb_addr, bs_addr, dpb_addr, ctx_addr, dt_addr, it_addr, sum;
        struct amdgpu_vcn_bo dec_buf;
        int size, len, i, r;
+       unsigned ip;
        uint8_t *dec;
 
        size = 4*1024; /* msg */
@@ -588,7 +667,13 @@ static void amdgpu_cs_vcn_dec_decode(void)
                }
        }
 
-       r = submit(len, AMDGPU_HW_IP_VCN_DEC);
+       if (vcn_unified_ring) {
+               amdgpu_cs_sq_ib_tail(ib_cpu + len);
+               ip = AMDGPU_HW_IP_VCN_ENC;
+       } else
+               ip = AMDGPU_HW_IP_VCN_DEC;
+
+       r = submit(len, ip);
        CU_ASSERT_EQUAL(r, 0);
 
        for (i = 0, sum = 0; i < dt_size; ++i)
@@ -602,6 +687,7 @@ static void amdgpu_cs_vcn_dec_decode(void)
 static void amdgpu_cs_vcn_dec_destroy(void)
 {
        struct amdgpu_vcn_bo msg_buf;
+       unsigned ip;
        int len, r;
 
        num_resources = 0;
@@ -616,9 +702,9 @@ static void amdgpu_cs_vcn_dec_destroy(void)
        memcpy(msg_buf.ptr, vcn_dec_destroy_msg, sizeof(vcn_dec_destroy_msg));
 
        len = 0;
-       if (vcn_dec_sw_ring == true) {
+       if (vcn_dec_sw_ring == true)
                vcn_dec_cmd(msg_buf.addr, 0, &len);
-       else {
+       else {
                ib_cpu[len++] = reg[vcn_reg_index].data0;
                ib_cpu[len++] = msg_buf.addr;
                ib_cpu[len++] = reg[vcn_reg_index].data1;
@@ -631,7 +717,13 @@ static void amdgpu_cs_vcn_dec_destroy(void)
                }
        }
 
-       r = submit(len, AMDGPU_HW_IP_VCN_DEC);
+       if (vcn_unified_ring) {
+               amdgpu_cs_sq_ib_tail(ib_cpu + len);
+               ip = AMDGPU_HW_IP_VCN_ENC;
+       } else
+               ip = AMDGPU_HW_IP_VCN_DEC;
+
+       r = submit(len, ip);
        CU_ASSERT_EQUAL(r, 0);
 
        free_resource(&msg_buf);
@@ -646,10 +738,10 @@ static void amdgpu_cs_vcn_enc_create(void)
        unsigned width = 160, height = 128, buf_size;
        uint32_t fw_maj = 1, fw_min = 9;
 
-       if (einfo.hw_ip_version_major == 2) {
+       if (vcn_ip_version_major == 2) {
                fw_maj = 1;
                fw_min = 1;
-       } else if (einfo.hw_ip_version_major == 3) {
+       } else if (vcn_ip_version_major == 3) {
                fw_maj = 1;
                fw_min = 0;
        }
@@ -675,6 +767,10 @@ static void amdgpu_cs_vcn_enc_create(void)
        r = amdgpu_bo_cpu_unmap(cpb_buf.handle);
 
        len = 0;
+
+       if (vcn_unified_ring)
+               amdgpu_cs_sq_head(ib_cpu, &len, true);
+
        /* session info */
        st_offset = len;
        st_size = &ib_cpu[len++];       /* size */
@@ -733,7 +829,7 @@ static void amdgpu_cs_vcn_enc_create(void)
        ib_cpu[len++] = 1;      /* quarter pel enabled */
        ib_cpu[len++] = 100;    /* BASELINE profile */
        ib_cpu[len++] = 11;     /* level */
-       if (einfo.hw_ip_version_major == 3) {
+       if (vcn_ip_version_major == 3) {
                ib_cpu[len++] = 0;      /* b_picture_enabled */
                ib_cpu[len++] = 0;      /* weighted_bipred_idc */
        }
@@ -774,7 +870,7 @@ static void amdgpu_cs_vcn_enc_create(void)
        ib_cpu[len++] = 0;      /* scene change sensitivity */
        ib_cpu[len++] = 0;      /* scene change min idr interval */
        ib_cpu[len++] = 0;
-       if (einfo.hw_ip_version_major == 3)
+       if (vcn_ip_version_major == 3)
                ib_cpu[len++] = 0;
        *st_size = (len - st_offset) * 4;
 
@@ -833,6 +929,9 @@ static void amdgpu_cs_vcn_enc_create(void)
 
        *p_task_size = (len - task_offset) * 4;
 
+       if (vcn_unified_ring)
+               amdgpu_cs_sq_ib_tail(ib_cpu + len);
+
        r = submit(len, AMDGPU_HW_IP_VCN_ENC);
        CU_ASSERT_EQUAL(r, 0);
 }
@@ -1176,10 +1275,10 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type)
        uint32_t *st_size = NULL;
        uint32_t fw_maj = 1, fw_min = 9;
 
-       if (einfo.hw_ip_version_major == 2) {
+       if (vcn_ip_version_major == 2) {
                fw_maj = 1;
                fw_min = 1;
-       } else if (einfo.hw_ip_version_major == 3) {
+       } else if (vcn_ip_version_major == 3) {
                fw_maj = 1;
                fw_min = 0;
        }
@@ -1216,6 +1315,10 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type)
        CU_ASSERT_EQUAL(r, 0);
 
        len = 0;
+
+       if (vcn_unified_ring)
+               amdgpu_cs_sq_head(ib_cpu, &len, true);
+
        /* session info */
        st_offset = len;
        st_size = &ib_cpu[len++];       /* size */
@@ -1240,7 +1343,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type)
                /* sps */
                st_offset = len;
                st_size = &ib_cpu[len++];       /* size */
-               if(einfo.hw_ip_version_major == 1)
+               if(vcn_ip_version_major == 1)
                        ib_cpu[len++] = 0x00000020;     /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1 */
                else
                        ib_cpu[len++] = 0x0000000a;     /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 2,3 */
@@ -1256,7 +1359,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type)
                /* pps */
                st_offset = len;
                st_size = &ib_cpu[len++];       /* size */
-               if(einfo.hw_ip_version_major == 1)
+               if(vcn_ip_version_major == 1)
                        ib_cpu[len++] = 0x00000020;     /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1*/
                else
                        ib_cpu[len++] = 0x0000000a;     /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 2,3*/
@@ -1270,7 +1373,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type)
        /* slice header */
        st_offset = len;
        st_size = &ib_cpu[len++];       /* size */
-       if(einfo.hw_ip_version_major == 1)
+       if(vcn_ip_version_major == 1)
                ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_SLICE_HEADER vcn 1 */
        else
                ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_SLICE_HEADER vcn 2,3 */
@@ -1303,7 +1406,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type)
        /* encode params */
        st_offset = len;
        st_size = &ib_cpu[len++];       /* size */
-       if(einfo.hw_ip_version_major == 1)
+       if(vcn_ip_version_major == 1)
                ib_cpu[len++] = 0x0000000b;     /* RENCODE_IB_PARAM_ENCODE_PARAMS vcn 1*/
        else
                ib_cpu[len++] = 0x0000000f;     /* RENCODE_IB_PARAM_ENCODE_PARAMS vcn 2,3*/
@@ -1324,7 +1427,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type)
        st_offset = len;
        st_size = &ib_cpu[len++];       /* size */
        ib_cpu[len++] = 0x00200003;     /* RENCODE_H264_IB_PARAM_ENCODE_PARAMS */
-       if (einfo.hw_ip_version_major != 3) {
+       if (vcn_ip_version_major != 3) {
                ib_cpu[len++] = 0x00000000;
                ib_cpu[len++] = 0x00000000;
                ib_cpu[len++] = 0x00000000;
@@ -1353,7 +1456,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type)
        /* encode context */
        st_offset = len;
        st_size = &ib_cpu[len++];       /* size */
-       if(einfo.hw_ip_version_major == 1)
+       if(vcn_ip_version_major == 1)
                ib_cpu[len++] = 0x0000000d;     /* ENCODE_CONTEXT_BUFFER  vcn 1 */
        else
                ib_cpu[len++] = 0x00000011;     /* ENCODE_CONTEXT_BUFFER  vcn 2,3 */
@@ -1375,7 +1478,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type)
        /* bitstream buffer */
        st_offset = len;
        st_size = &ib_cpu[len++];       /* size */
-       if(einfo.hw_ip_version_major == 1)
+       if(vcn_ip_version_major == 1)
                ib_cpu[len++] = 0x0000000e;     /* VIDEO_BITSTREAM_BUFFER vcn 1 */
        else
                ib_cpu[len++] = 0x00000012;     /* VIDEO_BITSTREAM_BUFFER vcn 2,3 */
@@ -1389,7 +1492,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type)
        /* feedback */
        st_offset = len;
        st_size = &ib_cpu[len++];       /* size */
-       if(einfo.hw_ip_version_major == 1)
+       if(vcn_ip_version_major == 1)
                ib_cpu[len++] = 0x00000010;     /* FEEDBACK_BUFFER vcn 1 */
        else
                ib_cpu[len++] = 0x00000015;     /* FEEDBACK_BUFFER vcn 2,3 */
@@ -1403,7 +1506,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type)
        /* intra refresh */
        st_offset = len;
        st_size = &ib_cpu[len++];
-       if(einfo.hw_ip_version_major == 1)
+       if(vcn_ip_version_major == 1)
                ib_cpu[len++] = 0x0000000c;     /* INTRA_REFRESH vcn 1 */
        else
                ib_cpu[len++] = 0x00000010;     /* INTRA_REFRESH vcn 2,3 */
@@ -1412,7 +1515,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type)
        ib_cpu[len++] = 0x00000000;
        *st_size = (len - st_offset) * 4;
 
-       if(einfo.hw_ip_version_major != 1) {
+       if(vcn_ip_version_major != 1) {
                /* Input Format */
                st_offset = len;
                st_size = &ib_cpu[len++];
@@ -1449,6 +1552,10 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type)
        *st_size = (len - st_offset) * 4;
 
        *p_task_size = (len - task_offset) * 4;
+
+       if (vcn_unified_ring)
+               amdgpu_cs_sq_ib_tail(ib_cpu + len);
+
        r = submit(len, AMDGPU_HW_IP_VCN_ENC);
        CU_ASSERT_EQUAL(r, 0);
 
@@ -1473,10 +1580,10 @@ static void amdgpu_cs_vcn_enc_destroy(void)
        uint32_t *st_size = NULL;
        uint32_t fw_maj = 1, fw_min = 9;
 
-       if (einfo.hw_ip_version_major == 2) {
+       if (vcn_ip_version_major == 2) {
                fw_maj = 1;
                fw_min = 1;
-       } else if (einfo.hw_ip_version_major == 3) {
+       } else if (vcn_ip_version_major == 3) {
                fw_maj = 1;
                fw_min = 0;
        }
@@ -1486,6 +1593,9 @@ static void amdgpu_cs_vcn_enc_destroy(void)
        resources[num_resources++] = enc_buf.handle;
        resources[num_resources++] = ib_handle;
 
+       if (vcn_unified_ring)
+               amdgpu_cs_sq_head(ib_cpu, &len, true);
+
        /* session info */
        st_offset = len;
        st_size = &ib_cpu[len++];       /* size */
@@ -1514,6 +1624,9 @@ static void amdgpu_cs_vcn_enc_destroy(void)
 
        *p_task_size = (len - task_offset) * 4;
 
+       if (vcn_unified_ring)
+               amdgpu_cs_sq_ib_tail(ib_cpu + len);
+
        r = submit(len, AMDGPU_HW_IP_VCN_ENC);
        CU_ASSERT_EQUAL(r, 0);