2 * Copyright 2017 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
29 #include "CUnit/Basic.h"
32 #include "util_math.h"
34 #include "amdgpu_test.h"
35 #include "amdgpu_drm.h"
36 #include "amdgpu_internal.h"
37 #include "decode_messages.h"
41 #define MAX_RESOURCES 16
43 #define DECODE_CMD_MSG_BUFFER 0x00000000
44 #define DECODE_CMD_DPB_BUFFER 0x00000001
45 #define DECODE_CMD_DECODING_TARGET_BUFFER 0x00000002
46 #define DECODE_CMD_FEEDBACK_BUFFER 0x00000003
47 #define DECODE_CMD_PROB_TBL_BUFFER 0x00000004
48 #define DECODE_CMD_SESSION_CONTEXT_BUFFER 0x00000005
49 #define DECODE_CMD_BITSTREAM_BUFFER 0x00000100
50 #define DECODE_CMD_IT_SCALING_TABLE_BUFFER 0x00000204
51 #define DECODE_CMD_CONTEXT_BUFFER 0x00000206
53 #define DECODE_IB_PARAM_DECODE_BUFFER (0x00000001)
55 #define DECODE_CMDBUF_FLAGS_MSG_BUFFER (0x00000001)
56 #define DECODE_CMDBUF_FLAGS_DPB_BUFFER (0x00000002)
57 #define DECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER (0x00000004)
58 #define DECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER (0x00000008)
59 #define DECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER (0x00000010)
60 #define DECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER (0x00000200)
61 #define DECODE_CMDBUF_FLAGS_CONTEXT_BUFFER (0x00000800)
62 #define DECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER (0x00001000)
63 #define DECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER (0x00100000)
65 static bool vcn_dec_sw_ring = false;
66 static bool vcn_unified_ring = false;
68 #define H264_NAL_TYPE_NON_IDR_SLICE 1
69 #define H264_NAL_TYPE_DP_A_SLICE 2
70 #define H264_NAL_TYPE_DP_B_SLICE 3
71 #define H264_NAL_TYPE_DP_C_SLICE 0x4
72 #define H264_NAL_TYPE_IDR_SLICE 0x5
73 #define H264_NAL_TYPE_SEI 0x6
74 #define H264_NAL_TYPE_SEQ_PARAM 0x7
75 #define H264_NAL_TYPE_PIC_PARAM 0x8
76 #define H264_NAL_TYPE_ACCESS_UNIT 0x9
77 #define H264_NAL_TYPE_END_OF_SEQ 0xa
78 #define H264_NAL_TYPE_END_OF_STREAM 0xb
79 #define H264_NAL_TYPE_FILLER_DATA 0xc
80 #define H264_NAL_TYPE_SEQ_EXTENSION 0xd
82 #define H264_START_CODE 0x000001
84 struct amdgpu_vcn_bo {
85 amdgpu_bo_handle handle;
86 amdgpu_va_handle va_handle;
92 typedef struct rvcn_decode_buffer_s {
93 unsigned int valid_buf_flag;
94 unsigned int msg_buffer_address_hi;
95 unsigned int msg_buffer_address_lo;
96 unsigned int dpb_buffer_address_hi;
97 unsigned int dpb_buffer_address_lo;
98 unsigned int target_buffer_address_hi;
99 unsigned int target_buffer_address_lo;
100 unsigned int session_contex_buffer_address_hi;
101 unsigned int session_contex_buffer_address_lo;
102 unsigned int bitstream_buffer_address_hi;
103 unsigned int bitstream_buffer_address_lo;
104 unsigned int context_buffer_address_hi;
105 unsigned int context_buffer_address_lo;
106 unsigned int feedback_buffer_address_hi;
107 unsigned int feedback_buffer_address_lo;
108 unsigned int luma_hist_buffer_address_hi;
109 unsigned int luma_hist_buffer_address_lo;
110 unsigned int prob_tbl_buffer_address_hi;
111 unsigned int prob_tbl_buffer_address_lo;
112 unsigned int sclr_coeff_buffer_address_hi;
113 unsigned int sclr_coeff_buffer_address_lo;
114 unsigned int it_sclr_table_buffer_address_hi;
115 unsigned int it_sclr_table_buffer_address_lo;
116 unsigned int sclr_target_buffer_address_hi;
117 unsigned int sclr_target_buffer_address_lo;
118 unsigned int cenc_size_info_buffer_address_hi;
119 unsigned int cenc_size_info_buffer_address_lo;
120 unsigned int mpeg2_pic_param_buffer_address_hi;
121 unsigned int mpeg2_pic_param_buffer_address_lo;
122 unsigned int mpeg2_mb_control_buffer_address_hi;
123 unsigned int mpeg2_mb_control_buffer_address_lo;
124 unsigned int mpeg2_idct_coeff_buffer_address_hi;
125 unsigned int mpeg2_idct_coeff_buffer_address_lo;
126 } rvcn_decode_buffer_t;
128 typedef struct rvcn_decode_ib_package_s {
129 unsigned int package_size;
130 unsigned int package_type;
131 } rvcn_decode_ib_package_t;
134 struct amdgpu_vcn_reg {
142 typedef struct BufferInfo_t {
143 uint32_t numOfBitsInBuffer;
144 const uint8_t *decBuffer;
146 uint32_t decBufferSize;
150 typedef struct h264_decode_t {
154 uint8_t nal_unit_type;
155 uint32_t pic_width, pic_height;
159 static amdgpu_device_handle device_handle;
160 static uint32_t major_version;
161 static uint32_t minor_version;
162 static uint32_t family_id;
163 static uint32_t chip_rev;
164 static uint32_t chip_id;
165 static uint32_t asic_id;
166 static uint32_t chip_rev;
167 static struct amdgpu_vcn_bo enc_buf;
168 static struct amdgpu_vcn_bo cpb_buf;
169 static uint32_t enc_task_id;
171 static amdgpu_context_handle context_handle;
172 static amdgpu_bo_handle ib_handle;
173 static amdgpu_va_handle ib_va_handle;
174 static uint64_t ib_mc_address;
175 static uint32_t *ib_cpu;
176 static uint32_t *ib_checksum;
177 static uint32_t *ib_size_in_dw;
179 static rvcn_decode_buffer_t *decode_buffer;
181 static amdgpu_bo_handle resources[MAX_RESOURCES];
182 static unsigned num_resources;
184 static uint8_t vcn_reg_index;
185 static struct amdgpu_vcn_reg reg[] = {
186 {0x81c4, 0x81c5, 0x81c3, 0x81ff, 0x81c6},
187 {0x504, 0x505, 0x503, 0x53f, 0x506},
188 {0x10, 0x11, 0xf, 0x29, 0x26d},
191 uint32_t gWidth, gHeight, gSliceType;
192 static uint32_t vcn_ip_version_major;
193 static uint32_t vcn_ip_version_minor;
194 static void amdgpu_cs_vcn_dec_create(void);
195 static void amdgpu_cs_vcn_dec_decode(void);
196 static void amdgpu_cs_vcn_dec_destroy(void);
198 static void amdgpu_cs_vcn_enc_create(void);
199 static void amdgpu_cs_vcn_enc_encode(void);
200 static void amdgpu_cs_vcn_enc_destroy(void);
202 static void amdgpu_cs_sq_head(uint32_t *base, int *offset, bool enc);
203 static void amdgpu_cs_sq_ib_tail(uint32_t *end);
204 static void h264_check_0s (bufferInfo * bufInfo, int count);
205 static int32_t h264_se (bufferInfo * bufInfo);
206 static inline uint32_t bs_read_u1(bufferInfo *bufinfo);
207 static inline int bs_eof(bufferInfo *bufinfo);
208 static inline uint32_t bs_read_u(bufferInfo* bufinfo, int n);
209 static inline uint32_t bs_read_ue(bufferInfo* bufinfo);
210 static uint32_t remove_03 (uint8_t *bptr, uint32_t len);
211 static void scaling_list (uint32_t ix, uint32_t sizeOfScalingList, bufferInfo *bufInfo);
212 static void h264_parse_sequence_parameter_set (h264_decode * dec, bufferInfo *bufInfo);
213 static void h264_slice_header (h264_decode *dec, bufferInfo *bufInfo);
214 static uint8_t h264_parse_nal (h264_decode *dec, bufferInfo *bufInfo);
215 static uint32_t h264_find_next_start_code (uint8_t *pBuf, uint32_t bufLen);
216 static int verify_checksum(uint8_t *buffer, uint32_t buffer_size);
218 CU_TestInfo vcn_tests[] = {
220 { "VCN DEC create", amdgpu_cs_vcn_dec_create },
221 { "VCN DEC decode", amdgpu_cs_vcn_dec_decode },
222 { "VCN DEC destroy", amdgpu_cs_vcn_dec_destroy },
224 { "VCN ENC create", amdgpu_cs_vcn_enc_create },
225 { "VCN ENC encode", amdgpu_cs_vcn_enc_encode },
226 { "VCN ENC destroy", amdgpu_cs_vcn_enc_destroy },
230 CU_BOOL suite_vcn_tests_enable(void)
232 struct drm_amdgpu_info_hw_ip info;
233 bool enc_ring, dec_ring;
236 if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
237 &minor_version, &device_handle))
240 family_id = device_handle->info.family_id;
241 asic_id = device_handle->info.asic_id;
242 chip_rev = device_handle->info.chip_rev;
243 chip_id = device_handle->info.chip_external_rev;
245 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_ENC, 0, &info);
247 vcn_ip_version_major = info.hw_ip_version_major;
248 vcn_ip_version_minor = info.hw_ip_version_minor;
249 enc_ring = !!info.available_rings;
250 /* in vcn 4.0 it re-uses encoding queue as unified queue */
251 if (vcn_ip_version_major >= 4) {
252 vcn_unified_ring = true;
253 vcn_dec_sw_ring = true;
256 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_DEC, 0, &info);
257 dec_ring = !!info.available_rings;
261 if (amdgpu_device_deinitialize(device_handle))
265 printf("\n\nASIC query hw info failed\n");
269 if (!(dec_ring || enc_ring) ||
270 (family_id < AMDGPU_FAMILY_RV &&
271 (family_id == AMDGPU_FAMILY_AI &&
272 (chip_id - chip_rev) < 0x32))) { /* Arcturus */
273 printf("\n\nThe ASIC NOT support VCN, suite disabled\n");
278 amdgpu_set_test_active("VCN Tests", "VCN DEC create", CU_FALSE);
279 amdgpu_set_test_active("VCN Tests", "VCN DEC decode", CU_FALSE);
280 amdgpu_set_test_active("VCN Tests", "VCN DEC destroy", CU_FALSE);
283 if (family_id == AMDGPU_FAMILY_AI || !enc_ring) {
284 amdgpu_set_test_active("VCN Tests", "VCN ENC create", CU_FALSE);
285 amdgpu_set_test_active("VCN Tests", "VCN ENC encode", CU_FALSE);
286 amdgpu_set_test_active("VCN Tests", "VCN ENC destroy", CU_FALSE);
289 if (vcn_ip_version_major == 1)
291 else if (vcn_ip_version_major == 2 && vcn_ip_version_minor == 0)
293 else if ((vcn_ip_version_major == 2 && vcn_ip_version_minor >= 5) ||
294 vcn_ip_version_major == 3)
300 int suite_vcn_tests_init(void)
304 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
305 &minor_version, &device_handle);
307 return CUE_SINIT_FAILED;
309 family_id = device_handle->info.family_id;
311 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
313 return CUE_SINIT_FAILED;
315 r = amdgpu_bo_alloc_and_map(device_handle, IB_SIZE, 4096,
316 AMDGPU_GEM_DOMAIN_GTT, 0,
317 &ib_handle, (void**)&ib_cpu,
318 &ib_mc_address, &ib_va_handle);
320 return CUE_SINIT_FAILED;
325 int suite_vcn_tests_clean(void)
329 r = amdgpu_bo_unmap_and_free(ib_handle, ib_va_handle,
330 ib_mc_address, IB_SIZE);
332 return CUE_SCLEAN_FAILED;
334 r = amdgpu_cs_ctx_free(context_handle);
336 return CUE_SCLEAN_FAILED;
338 r = amdgpu_device_deinitialize(device_handle);
340 return CUE_SCLEAN_FAILED;
345 static void amdgpu_cs_sq_head(uint32_t *base, int *offset, bool enc)
348 *(base + (*offset)++) = 0x00000010;
349 *(base + (*offset)++) = 0x30000002;
350 ib_checksum = base + (*offset)++;
351 ib_size_in_dw = base + (*offset)++;
354 *(base + (*offset)++) = 0x00000010;
355 *(base + (*offset)++) = 0x30000001;
356 *(base + (*offset)++) = enc ? 2 : 3;
357 *(base + (*offset)++) = 0x00000000;
360 static void amdgpu_cs_sq_ib_tail(uint32_t *end)
363 uint32_t checksum = 0;
365 /* if the pointers are invalid, no need to process */
366 if (ib_checksum == NULL || ib_size_in_dw == NULL)
369 size_in_dw = end - ib_size_in_dw - 1;
370 *ib_size_in_dw = size_in_dw;
371 *(ib_size_in_dw + 4) = size_in_dw * sizeof(uint32_t);
373 for (int i = 0; i < size_in_dw; i++)
374 checksum += *(ib_checksum + 2 + i);
376 *ib_checksum = checksum;
379 ib_size_in_dw = NULL;
382 static int submit(unsigned ndw, unsigned ip)
384 struct amdgpu_cs_request ibs_request = {0};
385 struct amdgpu_cs_ib_info ib_info = {0};
386 struct amdgpu_cs_fence fence_status = {0};
390 ib_info.ib_mc_address = ib_mc_address;
393 ibs_request.ip_type = ip;
395 r = amdgpu_bo_list_create(device_handle, num_resources, resources,
396 NULL, &ibs_request.resources);
400 ibs_request.number_of_ibs = 1;
401 ibs_request.ibs = &ib_info;
402 ibs_request.fence_info.handle = NULL;
404 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
408 r = amdgpu_bo_list_destroy(ibs_request.resources);
412 fence_status.context = context_handle;
413 fence_status.ip_type = ip;
414 fence_status.fence = ibs_request.seq_no;
416 r = amdgpu_cs_query_fence_status(&fence_status,
417 AMDGPU_TIMEOUT_INFINITE,
425 static void alloc_resource(struct amdgpu_vcn_bo *vcn_bo,
426 unsigned size, unsigned domain)
428 struct amdgpu_bo_alloc_request req = {0};
429 amdgpu_bo_handle buf_handle;
430 amdgpu_va_handle va_handle;
434 req.alloc_size = ALIGN(size, 4096);
435 req.preferred_heap = domain;
436 r = amdgpu_bo_alloc(device_handle, &req, &buf_handle);
437 CU_ASSERT_EQUAL(r, 0);
438 r = amdgpu_va_range_alloc(device_handle,
439 amdgpu_gpu_va_range_general,
440 req.alloc_size, 1, 0, &va,
442 CU_ASSERT_EQUAL(r, 0);
443 r = amdgpu_bo_va_op(buf_handle, 0, req.alloc_size, va, 0,
445 CU_ASSERT_EQUAL(r, 0);
447 vcn_bo->handle = buf_handle;
448 vcn_bo->size = req.alloc_size;
449 vcn_bo->va_handle = va_handle;
450 r = amdgpu_bo_cpu_map(vcn_bo->handle, (void **)&vcn_bo->ptr);
451 CU_ASSERT_EQUAL(r, 0);
452 memset(vcn_bo->ptr, 0, size);
453 r = amdgpu_bo_cpu_unmap(vcn_bo->handle);
454 CU_ASSERT_EQUAL(r, 0);
457 static void free_resource(struct amdgpu_vcn_bo *vcn_bo)
461 r = amdgpu_bo_va_op(vcn_bo->handle, 0, vcn_bo->size,
462 vcn_bo->addr, 0, AMDGPU_VA_OP_UNMAP);
463 CU_ASSERT_EQUAL(r, 0);
465 r = amdgpu_va_range_free(vcn_bo->va_handle);
466 CU_ASSERT_EQUAL(r, 0);
468 r = amdgpu_bo_free(vcn_bo->handle);
469 CU_ASSERT_EQUAL(r, 0);
470 memset(vcn_bo, 0, sizeof(*vcn_bo));
473 static void vcn_dec_cmd(uint64_t addr, unsigned cmd, int *idx)
475 if (vcn_dec_sw_ring == false) {
476 ib_cpu[(*idx)++] = reg[vcn_reg_index].data0;
477 ib_cpu[(*idx)++] = addr;
478 ib_cpu[(*idx)++] = reg[vcn_reg_index].data1;
479 ib_cpu[(*idx)++] = addr >> 32;
480 ib_cpu[(*idx)++] = reg[vcn_reg_index].cmd;
481 ib_cpu[(*idx)++] = cmd << 1;
485 /* Support decode software ring message */
487 rvcn_decode_ib_package_t *ib_header;
489 if (vcn_unified_ring)
490 amdgpu_cs_sq_head(ib_cpu, idx, false);
492 ib_header = (rvcn_decode_ib_package_t *)&ib_cpu[*idx];
493 ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) +
494 sizeof(struct rvcn_decode_ib_package_s);
497 ib_header->package_type = (DECODE_IB_PARAM_DECODE_BUFFER);
500 decode_buffer = (rvcn_decode_buffer_t *)&(ib_cpu[*idx]);
501 *idx += sizeof(struct rvcn_decode_buffer_s) / 4;
502 memset(decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s));
506 case DECODE_CMD_MSG_BUFFER:
507 decode_buffer->valid_buf_flag |= DECODE_CMDBUF_FLAGS_MSG_BUFFER;
508 decode_buffer->msg_buffer_address_hi = (addr >> 32);
509 decode_buffer->msg_buffer_address_lo = (addr);
511 case DECODE_CMD_DPB_BUFFER:
512 decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_DPB_BUFFER);
513 decode_buffer->dpb_buffer_address_hi = (addr >> 32);
514 decode_buffer->dpb_buffer_address_lo = (addr);
516 case DECODE_CMD_DECODING_TARGET_BUFFER:
517 decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER);
518 decode_buffer->target_buffer_address_hi = (addr >> 32);
519 decode_buffer->target_buffer_address_lo = (addr);
521 case DECODE_CMD_FEEDBACK_BUFFER:
522 decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER);
523 decode_buffer->feedback_buffer_address_hi = (addr >> 32);
524 decode_buffer->feedback_buffer_address_lo = (addr);
526 case DECODE_CMD_PROB_TBL_BUFFER:
527 decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER);
528 decode_buffer->prob_tbl_buffer_address_hi = (addr >> 32);
529 decode_buffer->prob_tbl_buffer_address_lo = (addr);
531 case DECODE_CMD_SESSION_CONTEXT_BUFFER:
532 decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER);
533 decode_buffer->session_contex_buffer_address_hi = (addr >> 32);
534 decode_buffer->session_contex_buffer_address_lo = (addr);
536 case DECODE_CMD_BITSTREAM_BUFFER:
537 decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER);
538 decode_buffer->bitstream_buffer_address_hi = (addr >> 32);
539 decode_buffer->bitstream_buffer_address_lo = (addr);
541 case DECODE_CMD_IT_SCALING_TABLE_BUFFER:
542 decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER);
543 decode_buffer->it_sclr_table_buffer_address_hi = (addr >> 32);
544 decode_buffer->it_sclr_table_buffer_address_lo = (addr);
546 case DECODE_CMD_CONTEXT_BUFFER:
547 decode_buffer->valid_buf_flag |= (DECODE_CMDBUF_FLAGS_CONTEXT_BUFFER);
548 decode_buffer->context_buffer_address_hi = (addr >> 32);
549 decode_buffer->context_buffer_address_lo = (addr);
552 printf("Not Support!\n");
556 static void amdgpu_cs_vcn_dec_create(void)
558 struct amdgpu_vcn_bo msg_buf;
563 alloc_resource(&msg_buf, 4096, AMDGPU_GEM_DOMAIN_GTT);
564 resources[num_resources++] = msg_buf.handle;
565 resources[num_resources++] = ib_handle;
567 r = amdgpu_bo_cpu_map(msg_buf.handle, (void **)&msg_buf.ptr);
568 CU_ASSERT_EQUAL(r, 0);
570 memset(msg_buf.ptr, 0, 4096);
571 memcpy(msg_buf.ptr, vcn_dec_create_msg, sizeof(vcn_dec_create_msg));
574 if (vcn_dec_sw_ring == true)
575 vcn_dec_cmd(msg_buf.addr, 0, &len);
577 ib_cpu[len++] = reg[vcn_reg_index].data0;
578 ib_cpu[len++] = msg_buf.addr;
579 ib_cpu[len++] = reg[vcn_reg_index].data1;
580 ib_cpu[len++] = msg_buf.addr >> 32;
581 ib_cpu[len++] = reg[vcn_reg_index].cmd;
584 ib_cpu[len++] = reg[vcn_reg_index].nop;
589 if (vcn_unified_ring) {
590 amdgpu_cs_sq_ib_tail(ib_cpu + len);
591 ip = AMDGPU_HW_IP_VCN_ENC;
593 ip = AMDGPU_HW_IP_VCN_DEC;
597 CU_ASSERT_EQUAL(r, 0);
599 free_resource(&msg_buf);
602 static void amdgpu_cs_vcn_dec_decode(void)
604 const unsigned dpb_size = 15923584, dt_size = 737280;
605 uint64_t msg_addr, fb_addr, bs_addr, dpb_addr, ctx_addr, dt_addr, it_addr, sum;
606 struct amdgpu_vcn_bo dec_buf;
611 size = 4*1024; /* msg */
612 size += 4*1024; /* fb */
613 size += 4096; /*it_scaling_table*/
614 size += ALIGN(sizeof(uvd_bitstream), 4*1024);
615 size += ALIGN(dpb_size, 4*1024);
616 size += ALIGN(dt_size, 4*1024);
619 alloc_resource(&dec_buf, size, AMDGPU_GEM_DOMAIN_GTT);
620 resources[num_resources++] = dec_buf.handle;
621 resources[num_resources++] = ib_handle;
623 r = amdgpu_bo_cpu_map(dec_buf.handle, (void **)&dec_buf.ptr);
626 CU_ASSERT_EQUAL(r, 0);
627 memset(dec_buf.ptr, 0, size);
628 memcpy(dec_buf.ptr, vcn_dec_decode_msg, sizeof(vcn_dec_decode_msg));
629 memcpy(dec_buf.ptr + sizeof(vcn_dec_decode_msg),
630 avc_decode_msg, sizeof(avc_decode_msg));
633 memcpy(dec, feedback_msg, sizeof(feedback_msg));
635 memcpy(dec, uvd_it_scaling_table, sizeof(uvd_it_scaling_table));
638 memcpy(dec, uvd_bitstream, sizeof(uvd_bitstream));
640 dec += ALIGN(sizeof(uvd_bitstream), 4*1024);
642 dec += ALIGN(dpb_size, 4*1024);
644 msg_addr = dec_buf.addr;
645 fb_addr = msg_addr + 4*1024;
646 it_addr = fb_addr + 4*1024;
647 bs_addr = it_addr + 4*1024;
648 dpb_addr = ALIGN(bs_addr + sizeof(uvd_bitstream), 4*1024);
649 ctx_addr = ALIGN(dpb_addr + 0x006B9400, 4*1024);
650 dt_addr = ALIGN(dpb_addr + dpb_size, 4*1024);
653 vcn_dec_cmd(msg_addr, 0x0, &len);
654 vcn_dec_cmd(dpb_addr, 0x1, &len);
655 vcn_dec_cmd(dt_addr, 0x2, &len);
656 vcn_dec_cmd(fb_addr, 0x3, &len);
657 vcn_dec_cmd(bs_addr, 0x100, &len);
658 vcn_dec_cmd(it_addr, 0x204, &len);
659 vcn_dec_cmd(ctx_addr, 0x206, &len);
661 if (vcn_dec_sw_ring == false) {
662 ib_cpu[len++] = reg[vcn_reg_index].cntl;
665 ib_cpu[len++] = reg[vcn_reg_index].nop;
670 if (vcn_unified_ring) {
671 amdgpu_cs_sq_ib_tail(ib_cpu + len);
672 ip = AMDGPU_HW_IP_VCN_ENC;
674 ip = AMDGPU_HW_IP_VCN_DEC;
677 CU_ASSERT_EQUAL(r, 0);
679 for (i = 0, sum = 0; i < dt_size; ++i)
682 CU_ASSERT_EQUAL(sum, SUM_DECODE);
684 free_resource(&dec_buf);
687 static void amdgpu_cs_vcn_dec_destroy(void)
689 struct amdgpu_vcn_bo msg_buf;
694 alloc_resource(&msg_buf, 1024, AMDGPU_GEM_DOMAIN_GTT);
695 resources[num_resources++] = msg_buf.handle;
696 resources[num_resources++] = ib_handle;
698 r = amdgpu_bo_cpu_map(msg_buf.handle, (void **)&msg_buf.ptr);
699 CU_ASSERT_EQUAL(r, 0);
701 memset(msg_buf.ptr, 0, 1024);
702 memcpy(msg_buf.ptr, vcn_dec_destroy_msg, sizeof(vcn_dec_destroy_msg));
705 if (vcn_dec_sw_ring == true)
706 vcn_dec_cmd(msg_buf.addr, 0, &len);
708 ib_cpu[len++] = reg[vcn_reg_index].data0;
709 ib_cpu[len++] = msg_buf.addr;
710 ib_cpu[len++] = reg[vcn_reg_index].data1;
711 ib_cpu[len++] = msg_buf.addr >> 32;
712 ib_cpu[len++] = reg[vcn_reg_index].cmd;
715 ib_cpu[len++] = reg[vcn_reg_index].nop;
720 if (vcn_unified_ring) {
721 amdgpu_cs_sq_ib_tail(ib_cpu + len);
722 ip = AMDGPU_HW_IP_VCN_ENC;
724 ip = AMDGPU_HW_IP_VCN_DEC;
727 CU_ASSERT_EQUAL(r, 0);
729 free_resource(&msg_buf);
732 static void amdgpu_cs_vcn_enc_create(void)
735 uint32_t *p_task_size = NULL;
736 uint32_t task_offset = 0, st_offset;
737 uint32_t *st_size = NULL;
738 unsigned width = 160, height = 128, buf_size;
739 uint32_t fw_maj = 1, fw_min = 9;
741 if (vcn_ip_version_major == 2) {
744 } else if (vcn_ip_version_major == 3) {
751 buf_size = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2;
755 alloc_resource(&enc_buf, 128 * 1024, AMDGPU_GEM_DOMAIN_GTT);
756 alloc_resource(&cpb_buf, buf_size * 2, AMDGPU_GEM_DOMAIN_GTT);
757 resources[num_resources++] = enc_buf.handle;
758 resources[num_resources++] = cpb_buf.handle;
759 resources[num_resources++] = ib_handle;
761 r = amdgpu_bo_cpu_map(enc_buf.handle, (void**)&enc_buf.ptr);
762 memset(enc_buf.ptr, 0, 128 * 1024);
763 r = amdgpu_bo_cpu_unmap(enc_buf.handle);
765 r = amdgpu_bo_cpu_map(cpb_buf.handle, (void**)&enc_buf.ptr);
766 memset(enc_buf.ptr, 0, buf_size * 2);
767 r = amdgpu_bo_cpu_unmap(cpb_buf.handle);
771 if (vcn_unified_ring)
772 amdgpu_cs_sq_head(ib_cpu, &len, true);
776 st_size = &ib_cpu[len++]; /* size */
777 ib_cpu[len++] = 0x00000001; /* RENCODE_IB_PARAM_SESSION_INFO */
778 ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0));
779 ib_cpu[len++] = enc_buf.addr >> 32;
780 ib_cpu[len++] = enc_buf.addr;
781 ib_cpu[len++] = 1; /* RENCODE_ENGINE_TYPE_ENCODE; */
782 *st_size = (len - st_offset) * 4;
787 st_size = &ib_cpu[len++]; /* size */
788 ib_cpu[len++] = 0x00000002; /* RENCODE_IB_PARAM_TASK_INFO */
789 p_task_size = &ib_cpu[len++];
790 ib_cpu[len++] = enc_task_id++; /* task_id */
791 ib_cpu[len++] = 0; /* feedback */
792 *st_size = (len - st_offset) * 4;
796 st_size = &ib_cpu[len++]; /* size */
797 ib_cpu[len++] = 0x01000001; /* RENCODE_IB_OP_INITIALIZE */
798 *st_size = (len - st_offset) * 4;
802 st_size = &ib_cpu[len++]; /* size */
803 ib_cpu[len++] = 0x00000003; /* RENCODE_IB_PARAM_SESSION_INIT */
804 ib_cpu[len++] = 1; /* RENCODE_ENCODE_STANDARD_H264 */
805 ib_cpu[len++] = width;
806 ib_cpu[len++] = height;
809 ib_cpu[len++] = 0; /* pre encode mode */
810 ib_cpu[len++] = 0; /* chroma enabled : false */
811 *st_size = (len - st_offset) * 4;
815 st_size = &ib_cpu[len++]; /* size */
816 ib_cpu[len++] = 0x00200001; /* RENCODE_H264_IB_PARAM_SLICE_CONTROL */
817 ib_cpu[len++] = 0; /* RENCODE_H264_SLICE_CONTROL_MODE_FIXED_MBS */
818 ib_cpu[len++] = ALIGN(width, 16) / 16 * ALIGN(height, 16) / 16;
819 *st_size = (len - st_offset) * 4;
823 st_size = &ib_cpu[len++]; /* size */
824 ib_cpu[len++] = 0x00200002; /* RENCODE_H264_IB_PARAM_SPEC_MISC */
825 ib_cpu[len++] = 0; /* constrained intra pred flag */
826 ib_cpu[len++] = 0; /* cabac enable */
827 ib_cpu[len++] = 0; /* cabac init idc */
828 ib_cpu[len++] = 1; /* half pel enabled */
829 ib_cpu[len++] = 1; /* quarter pel enabled */
830 ib_cpu[len++] = 100; /* BASELINE profile */
831 ib_cpu[len++] = 11; /* level */
832 if (vcn_ip_version_major == 3) {
833 ib_cpu[len++] = 0; /* b_picture_enabled */
834 ib_cpu[len++] = 0; /* weighted_bipred_idc */
836 *st_size = (len - st_offset) * 4;
838 /* deblocking filter */
840 st_size = &ib_cpu[len++]; /* size */
841 ib_cpu[len++] = 0x00200004; /* RENCODE_H264_IB_PARAM_DEBLOCKING_FILTER */
842 ib_cpu[len++] = 0; /* disable deblocking filter idc */
843 ib_cpu[len++] = 0; /* alpha c0 offset */
844 ib_cpu[len++] = 0; /* tc offset */
845 ib_cpu[len++] = 0; /* cb offset */
846 ib_cpu[len++] = 0; /* cr offset */
847 *st_size = (len - st_offset) * 4;
851 st_size = &ib_cpu[len++]; /* size */
852 ib_cpu[len++] = 0x00000004; /* RENCODE_IB_PARAM_LAYER_CONTROL */
853 ib_cpu[len++] = 1; /* max temporal layer */
854 ib_cpu[len++] = 1; /* no of temporal layer */
855 *st_size = (len - st_offset) * 4;
857 /* rc_session init */
859 st_size = &ib_cpu[len++]; /* size */
860 ib_cpu[len++] = 0x00000006; /* RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT */
861 ib_cpu[len++] = 0; /* rate control */
862 ib_cpu[len++] = 48; /* vbv buffer level */
863 *st_size = (len - st_offset) * 4;
867 st_size = &ib_cpu[len++]; /* size */
868 ib_cpu[len++] = 0x00000009; /* RENCODE_IB_PARAM_QUALITY_PARAMS */
869 ib_cpu[len++] = 0; /* vbaq mode */
870 ib_cpu[len++] = 0; /* scene change sensitivity */
871 ib_cpu[len++] = 0; /* scene change min idr interval */
873 if (vcn_ip_version_major == 3)
875 *st_size = (len - st_offset) * 4;
879 st_size = &ib_cpu[len++]; /* size */
880 ib_cpu[len++] = 0x00000005; /* RENCODE_IB_PARAM_LAYER_SELECT */
881 ib_cpu[len++] = 0; /* temporal layer */
882 *st_size = (len - st_offset) * 4;
886 st_size = &ib_cpu[len++]; /* size */
887 ib_cpu[len++] = 0x00000007; /* RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT */
892 ib_cpu[len++] = 0x01312d00;
896 *st_size = (len - st_offset) * 4;
900 st_size = &ib_cpu[len++]; /* size */
901 ib_cpu[len++] = 0x00000005; /* RENCODE_IB_PARAM_LAYER_SELECT */
902 ib_cpu[len++] = 0; /* temporal layer */
903 *st_size = (len - st_offset) * 4;
907 st_size = &ib_cpu[len++]; /* size */
908 ib_cpu[len++] = 0x00000008; /* RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE */
916 *st_size = (len - st_offset) * 4;
920 st_size = &ib_cpu[len++]; /* size */
921 ib_cpu[len++] = 0x01000004; /* RENCODE_IB_OP_INIT_RC */
922 *st_size = (len - st_offset) * 4;
926 st_size = &ib_cpu[len++]; /* size */
927 ib_cpu[len++] = 0x01000005; /* RENCODE_IB_OP_INIT_RC_VBV_BUFFER_LEVEL */
928 *st_size = (len - st_offset) * 4;
930 *p_task_size = (len - task_offset) * 4;
932 if (vcn_unified_ring)
933 amdgpu_cs_sq_ib_tail(ib_cpu + len);
935 r = submit(len, AMDGPU_HW_IP_VCN_ENC);
936 CU_ASSERT_EQUAL(r, 0);
939 static int32_t h264_se (bufferInfo * bufInfo)
943 ret = bs_read_ue (bufInfo);
944 if ((ret & 0x1) == 0) {
946 int32_t temp = 0 - ret;
950 return (ret + 1) >> 1;
953 static void h264_check_0s (bufferInfo * bufInfo, int count)
957 val = bs_read_u (bufInfo, count);
959 printf ("field error - %d bits should be 0 is %x\n", count, val);
963 static inline int bs_eof(bufferInfo * bufinfo)
965 if (bufinfo->decBuffer >= bufinfo->end)
971 static inline uint32_t bs_read_u1(bufferInfo *bufinfo)
976 bufinfo->numOfBitsInBuffer--;
977 if (! bs_eof(bufinfo)) {
978 temp = (((bufinfo->decData)) >> bufinfo->numOfBitsInBuffer);
982 if (bufinfo->numOfBitsInBuffer == 0) {
983 bufinfo->decBuffer++;
984 bufinfo->decData = *bufinfo->decBuffer;
985 bufinfo->numOfBitsInBuffer = 8;
991 static inline uint32_t bs_read_u(bufferInfo* bufinfo, int n)
996 for (i = 0; i < n; i++) {
997 r |= ( bs_read_u1(bufinfo) << ( n - i - 1 ) );
1003 static inline uint32_t bs_read_ue(bufferInfo* bufinfo)
1008 while( (bs_read_u1(bufinfo) == 0) && (i < 32) && (!bs_eof(bufinfo))) {
1011 r = bs_read_u(bufinfo, i);
1016 static uint32_t remove_03 (uint8_t * bptr, uint32_t len)
1018 uint32_t nal_len = 0;
1019 while (nal_len + 2 < len) {
1020 if (bptr[0] == 0 && bptr[1] == 0 && bptr[2] == 3) {
1024 memmove (bptr, bptr + 1, len - nal_len);
1033 static void scaling_list (uint32_t ix, uint32_t sizeOfScalingList, bufferInfo * bufInfo)
1035 uint32_t lastScale = 8, nextScale = 8;
1039 for (jx = 0; jx < sizeOfScalingList; jx++) {
1040 if (nextScale != 0) {
1041 deltaScale = h264_se (bufInfo);
1042 nextScale = (lastScale + deltaScale + 256) % 256;
1044 if (nextScale == 0) {
1045 lastScale = lastScale;
1047 lastScale = nextScale;
1052 static void h264_parse_sequence_parameter_set (h264_decode * dec, bufferInfo * bufInfo)
1056 dec->profile = bs_read_u (bufInfo, 8);
1057 bs_read_u (bufInfo, 1); /* constaint_set0_flag */
1058 bs_read_u (bufInfo, 1); /* constaint_set1_flag */
1059 bs_read_u (bufInfo, 1); /* constaint_set2_flag */
1060 bs_read_u (bufInfo, 1); /* constaint_set3_flag */
1061 bs_read_u (bufInfo, 1); /* constaint_set4_flag */
1062 bs_read_u (bufInfo, 1); /* constaint_set5_flag */
1065 h264_check_0s (bufInfo, 2);
1066 dec->level_idc = bs_read_u (bufInfo, 8);
1067 bs_read_ue (bufInfo); /* SPS id*/
1069 if (dec->profile == 100 || dec->profile == 110 ||
1070 dec->profile == 122 || dec->profile == 144) {
1071 uint32_t chroma_format_idc = bs_read_ue (bufInfo);
1072 if (chroma_format_idc == 3) {
1073 bs_read_u (bufInfo, 1); /* residual_colour_transform_flag */
1075 bs_read_ue (bufInfo); /* bit_depth_luma_minus8 */
1076 bs_read_ue (bufInfo); /* bit_depth_chroma_minus8 */
1077 bs_read_u (bufInfo, 1); /* qpprime_y_zero_transform_bypass_flag */
1078 uint32_t seq_scaling_matrix_present_flag = bs_read_u (bufInfo, 1);
1080 if (seq_scaling_matrix_present_flag) {
1081 for (uint32_t ix = 0; ix < 8; ix++) {
1082 temp = bs_read_u (bufInfo, 1);
1084 scaling_list (ix, ix < 6 ? 16 : 64, bufInfo);
1090 bs_read_ue (bufInfo); /* log2_max_frame_num_minus4 */
1091 uint32_t pic_order_cnt_type = bs_read_ue (bufInfo);
1093 if (pic_order_cnt_type == 0) {
1094 bs_read_ue (bufInfo); /* log2_max_pic_order_cnt_lsb_minus4 */
1095 } else if (pic_order_cnt_type == 1) {
1096 bs_read_u (bufInfo, 1); /* delta_pic_order_always_zero_flag */
1097 h264_se (bufInfo); /* offset_for_non_ref_pic */
1098 h264_se (bufInfo); /* offset_for_top_to_bottom_field */
1099 temp = bs_read_ue (bufInfo);
1100 for (uint32_t ix = 0; ix < temp; ix++) {
1101 h264_se (bufInfo); /* offset_for_ref_frame[index] */
1104 bs_read_ue (bufInfo); /* num_ref_frames */
1105 bs_read_u (bufInfo, 1); /* gaps_in_frame_num_flag */
1106 uint32_t PicWidthInMbs = bs_read_ue (bufInfo) + 1;
1108 dec->pic_width = PicWidthInMbs * 16;
1109 uint32_t PicHeightInMapUnits = bs_read_ue (bufInfo) + 1;
1111 dec->pic_height = PicHeightInMapUnits * 16;
1112 uint32_t frame_mbs_only_flag = bs_read_u (bufInfo, 1);
1113 if (!frame_mbs_only_flag) {
1114 bs_read_u (bufInfo, 1); /* mb_adaptive_frame_field_flag */
1116 bs_read_u (bufInfo, 1); /* direct_8x8_inference_flag */
1117 temp = bs_read_u (bufInfo, 1);
1119 bs_read_ue (bufInfo); /* frame_crop_left_offset */
1120 bs_read_ue (bufInfo); /* frame_crop_right_offset */
1121 bs_read_ue (bufInfo); /* frame_crop_top_offset */
1122 bs_read_ue (bufInfo); /* frame_crop_bottom_offset */
1124 temp = bs_read_u (bufInfo, 1); /* VUI Parameters */
1127 static void h264_slice_header (h264_decode * dec, bufferInfo * bufInfo)
1131 bs_read_ue (bufInfo); /* first_mb_in_slice */
1132 temp = bs_read_ue (bufInfo);
1133 dec->slice_type = ((temp > 5) ? (temp - 5) : temp);
1136 static uint8_t h264_parse_nal (h264_decode * dec, bufferInfo * bufInfo)
1140 h264_check_0s (bufInfo, 1);
1141 dec->nal_ref_idc = bs_read_u (bufInfo, 2);
1142 dec->nal_unit_type = type = bs_read_u (bufInfo, 5);
1145 case H264_NAL_TYPE_NON_IDR_SLICE:
1146 case H264_NAL_TYPE_IDR_SLICE:
1147 h264_slice_header (dec, bufInfo);
1149 case H264_NAL_TYPE_SEQ_PARAM:
1150 h264_parse_sequence_parameter_set (dec, bufInfo);
1152 case H264_NAL_TYPE_PIC_PARAM:
1153 case H264_NAL_TYPE_SEI:
1154 case H264_NAL_TYPE_ACCESS_UNIT:
1155 case H264_NAL_TYPE_SEQ_EXTENSION:
1159 printf ("Nal type unknown %d \n ", type);
1165 static uint32_t h264_find_next_start_code (uint8_t * pBuf, uint32_t bufLen)
1168 uint32_t offset, startBytes;
1170 offset = startBytes = 0;
1171 if (pBuf[0] == 0 && pBuf[1] == 0 && pBuf[2] == 0 && pBuf[3] == 1) {
1175 } else if (pBuf[0] == 0 && pBuf[1] == 0 && pBuf[2] == 1) {
1181 while (offset < bufLen - 3) {
1185 if (val == H264_START_CODE)
1188 if ((val & 0x00ffffff) == H264_START_CODE)
1191 if (bufLen - offset <= 3 && startBytes == 0) {
1199 static int verify_checksum(uint8_t *buffer, uint32_t buffer_size)
1201 uint32_t buffer_pos = 0;
1205 memset(&dec, 0, sizeof(h264_decode));
1209 ret = h264_find_next_start_code (buffer + buffer_pos,
1210 buffer_size - buffer_pos);
1213 if (buffer_pos == 0) {
1215 "couldn't find start code in buffer from 0\n");
1218 /* have a complete NAL from buffer_pos to end */
1223 nal_len = remove_03 (buffer + buffer_pos, ret);
1224 bufinfo.decBuffer = buffer + buffer_pos + (buffer[buffer_pos + 2] == 1 ? 3 : 4);
1225 bufinfo.decBufferSize = (nal_len - (buffer[buffer_pos + 2] == 1 ? 3 : 4)) * 8;
1226 bufinfo.end = buffer + buffer_pos + nal_len;
1227 bufinfo.numOfBitsInBuffer = 8;
1228 bufinfo.decData = *bufinfo.decBuffer;
1229 h264_parse_nal (&dec, &bufinfo);
1231 buffer_pos += ret; /* buffer_pos points to next code */
1233 } while (done == 0);
1235 if ((dec.pic_width == gWidth) &&
1236 (dec.pic_height == gHeight) &&
1237 (dec.slice_type == gSliceType))
1243 static void check_result(struct amdgpu_vcn_bo fb_buf, struct amdgpu_vcn_bo bs_buf, int frame_type)
1249 /* uint64_t s[3] = {0, 1121279001727, 1059312481445}; */
1251 r = amdgpu_bo_cpu_map(fb_buf.handle, (void **)&fb_buf.ptr);
1252 CU_ASSERT_EQUAL(r, 0);
1253 fb_ptr = (uint32_t*)fb_buf.ptr;
1255 r = amdgpu_bo_cpu_unmap(fb_buf.handle);
1256 CU_ASSERT_EQUAL(r, 0);
1257 r = amdgpu_bo_cpu_map(bs_buf.handle, (void **)&bs_buf.ptr);
1258 CU_ASSERT_EQUAL(r, 0);
1260 bs_ptr = (uint8_t*)bs_buf.ptr;
1261 r = verify_checksum(bs_ptr, size);
1262 CU_ASSERT_EQUAL(r, 0);
1263 r = amdgpu_bo_cpu_unmap(bs_buf.handle);
1265 CU_ASSERT_EQUAL(r, 0);
1268 static void amdgpu_cs_vcn_enc_encode_frame(int frame_type)
1270 struct amdgpu_vcn_bo bs_buf, fb_buf, input_buf;
1272 unsigned width = 160, height = 128, buf_size;
1273 uint32_t *p_task_size = NULL;
1274 uint32_t task_offset = 0, st_offset;
1275 uint32_t *st_size = NULL;
1276 uint32_t fw_maj = 1, fw_min = 9;
1278 if (vcn_ip_version_major == 2) {
1281 } else if (vcn_ip_version_major == 3) {
1285 gSliceType = frame_type;
1286 buf_size = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2;
1289 alloc_resource(&bs_buf, 4096, AMDGPU_GEM_DOMAIN_GTT);
1290 alloc_resource(&fb_buf, 4096, AMDGPU_GEM_DOMAIN_GTT);
1291 alloc_resource(&input_buf, buf_size, AMDGPU_GEM_DOMAIN_GTT);
1292 resources[num_resources++] = enc_buf.handle;
1293 resources[num_resources++] = cpb_buf.handle;
1294 resources[num_resources++] = bs_buf.handle;
1295 resources[num_resources++] = fb_buf.handle;
1296 resources[num_resources++] = input_buf.handle;
1297 resources[num_resources++] = ib_handle;
1300 r = amdgpu_bo_cpu_map(bs_buf.handle, (void**)&bs_buf.ptr);
1301 memset(bs_buf.ptr, 0, 4096);
1302 r = amdgpu_bo_cpu_unmap(bs_buf.handle);
1304 r = amdgpu_bo_cpu_map(fb_buf.handle, (void**)&fb_buf.ptr);
1305 memset(fb_buf.ptr, 0, 4096);
1306 r = amdgpu_bo_cpu_unmap(fb_buf.handle);
1308 r = amdgpu_bo_cpu_map(input_buf.handle, (void **)&input_buf.ptr);
1309 CU_ASSERT_EQUAL(r, 0);
1311 for (int i = 0; i < ALIGN(height, 32) * 3 / 2; i++)
1312 memcpy(input_buf.ptr + i * ALIGN(width, 256), frame + i * width, width);
1314 r = amdgpu_bo_cpu_unmap(input_buf.handle);
1315 CU_ASSERT_EQUAL(r, 0);
1319 if (vcn_unified_ring)
1320 amdgpu_cs_sq_head(ib_cpu, &len, true);
1324 st_size = &ib_cpu[len++]; /* size */
1325 ib_cpu[len++] = 0x00000001; /* RENCODE_IB_PARAM_SESSION_INFO */
1326 ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0));
1327 ib_cpu[len++] = enc_buf.addr >> 32;
1328 ib_cpu[len++] = enc_buf.addr;
1329 ib_cpu[len++] = 1; /* RENCODE_ENGINE_TYPE_ENCODE */;
1330 *st_size = (len - st_offset) * 4;
1335 st_size = &ib_cpu[len++]; /* size */
1336 ib_cpu[len++] = 0x00000002; /* RENCODE_IB_PARAM_TASK_INFO */
1337 p_task_size = &ib_cpu[len++];
1338 ib_cpu[len++] = enc_task_id++; /* task_id */
1339 ib_cpu[len++] = 1; /* feedback */
1340 *st_size = (len - st_offset) * 4;
1342 if (frame_type == 2) {
1345 st_size = &ib_cpu[len++]; /* size */
1346 if(vcn_ip_version_major == 1)
1347 ib_cpu[len++] = 0x00000020; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1 */
1349 ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 2,3 */
1350 ib_cpu[len++] = 0x00000002; /* RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS */
1351 ib_cpu[len++] = 0x00000011; /* sps len */
1352 ib_cpu[len++] = 0x00000001; /* start code */
1353 ib_cpu[len++] = 0x6764440b;
1354 ib_cpu[len++] = 0xac54c284;
1355 ib_cpu[len++] = 0x68078442;
1356 ib_cpu[len++] = 0x37000000;
1357 *st_size = (len - st_offset) * 4;
1361 st_size = &ib_cpu[len++]; /* size */
1362 if(vcn_ip_version_major == 1)
1363 ib_cpu[len++] = 0x00000020; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1*/
1365 ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 2,3*/
1366 ib_cpu[len++] = 0x00000003; /* RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS */
1367 ib_cpu[len++] = 0x00000008; /* pps len */
1368 ib_cpu[len++] = 0x00000001; /* start code */
1369 ib_cpu[len++] = 0x68ce3c80;
1370 *st_size = (len - st_offset) * 4;
1375 st_size = &ib_cpu[len++]; /* size */
1376 if(vcn_ip_version_major == 1)
1377 ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_SLICE_HEADER vcn 1 */
1379 ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_SLICE_HEADER vcn 2,3 */
1380 if (frame_type == 2) {
1381 ib_cpu[len++] = 0x65000000;
1382 ib_cpu[len++] = 0x11040000;
1384 ib_cpu[len++] = 0x41000000;
1385 ib_cpu[len++] = 0x34210000;
1387 ib_cpu[len++] = 0xe0000000;
1388 for(i = 0; i < 13; i++)
1389 ib_cpu[len++] = 0x00000000;
1391 ib_cpu[len++] = 0x00000001;
1392 ib_cpu[len++] = 0x00000008;
1393 ib_cpu[len++] = 0x00020000;
1394 ib_cpu[len++] = 0x00000000;
1395 ib_cpu[len++] = 0x00000001;
1396 ib_cpu[len++] = 0x00000015;
1397 ib_cpu[len++] = 0x00020001;
1398 ib_cpu[len++] = 0x00000000;
1399 ib_cpu[len++] = 0x00000001;
1400 ib_cpu[len++] = 0x00000003;
1401 for(i = 0; i < 22; i++)
1402 ib_cpu[len++] = 0x00000000;
1404 *st_size = (len - st_offset) * 4;
1408 st_size = &ib_cpu[len++]; /* size */
1409 if(vcn_ip_version_major == 1)
1410 ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_ENCODE_PARAMS vcn 1*/
1412 ib_cpu[len++] = 0x0000000f; /* RENCODE_IB_PARAM_ENCODE_PARAMS vcn 2,3*/
1413 ib_cpu[len++] = frame_type;
1414 ib_cpu[len++] = 0x0001f000;
1415 ib_cpu[len++] = input_buf.addr >> 32;
1416 ib_cpu[len++] = input_buf.addr;
1417 ib_cpu[len++] = (input_buf.addr + ALIGN(width, 256) * ALIGN(height, 32)) >> 32;
1418 ib_cpu[len++] = input_buf.addr + ALIGN(width, 256) * ALIGN(height, 32);
1419 ib_cpu[len++] = 0x00000100;
1420 ib_cpu[len++] = 0x00000080;
1421 ib_cpu[len++] = 0x00000000;
1422 ib_cpu[len++] = 0xffffffff;
1423 ib_cpu[len++] = 0x00000000;
1424 *st_size = (len - st_offset) * 4;
1426 /* encode params h264 */
1428 st_size = &ib_cpu[len++]; /* size */
1429 ib_cpu[len++] = 0x00200003; /* RENCODE_H264_IB_PARAM_ENCODE_PARAMS */
1430 if (vcn_ip_version_major != 3) {
1431 ib_cpu[len++] = 0x00000000;
1432 ib_cpu[len++] = 0x00000000;
1433 ib_cpu[len++] = 0x00000000;
1434 ib_cpu[len++] = 0xffffffff;
1436 ib_cpu[len++] = 0x00000000;
1437 ib_cpu[len++] = 0x00000000;
1438 ib_cpu[len++] = 0x00000000;
1439 ib_cpu[len++] = 0x00000000;
1440 ib_cpu[len++] = 0x00000000;
1441 ib_cpu[len++] = 0x00000000;
1442 ib_cpu[len++] = 0x00000000;
1443 ib_cpu[len++] = 0xffffffff;
1444 ib_cpu[len++] = 0x00000000;
1445 ib_cpu[len++] = 0x00000000;
1446 ib_cpu[len++] = 0x00000000;
1447 ib_cpu[len++] = 0x00000000;
1448 ib_cpu[len++] = 0xffffffff;
1449 ib_cpu[len++] = 0x00000000;
1450 ib_cpu[len++] = 0x00000000;
1451 ib_cpu[len++] = 0x00000000;
1452 ib_cpu[len++] = 0x00000000;
1454 *st_size = (len - st_offset) * 4;
1456 /* encode context */
1458 st_size = &ib_cpu[len++]; /* size */
1459 if(vcn_ip_version_major == 1)
1460 ib_cpu[len++] = 0x0000000d; /* ENCODE_CONTEXT_BUFFER vcn 1 */
1462 ib_cpu[len++] = 0x00000011; /* ENCODE_CONTEXT_BUFFER vcn 2,3 */
1463 ib_cpu[len++] = cpb_buf.addr >> 32;
1464 ib_cpu[len++] = cpb_buf.addr;
1465 ib_cpu[len++] = 0x00000000; /* swizzle mode */
1466 ib_cpu[len++] = 0x00000100; /* luma pitch */
1467 ib_cpu[len++] = 0x00000100; /* chroma pitch */
1468 ib_cpu[len++] = 0x00000003; /* no reconstructed picture */
1469 ib_cpu[len++] = 0x00000000; /* reconstructed pic 1 luma offset */
1470 ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32); /* pic1 chroma offset */
1471 ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2; /* pic2 luma offset */
1472 ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32) * 5 / 2; /* pic2 chroma offset */
1474 for (int i = 0; i < 136; i++)
1475 ib_cpu[len++] = 0x00000000;
1476 *st_size = (len - st_offset) * 4;
1478 /* bitstream buffer */
1480 st_size = &ib_cpu[len++]; /* size */
1481 if(vcn_ip_version_major == 1)
1482 ib_cpu[len++] = 0x0000000e; /* VIDEO_BITSTREAM_BUFFER vcn 1 */
1484 ib_cpu[len++] = 0x00000012; /* VIDEO_BITSTREAM_BUFFER vcn 2,3 */
1485 ib_cpu[len++] = 0x00000000; /* mode */
1486 ib_cpu[len++] = bs_buf.addr >> 32;
1487 ib_cpu[len++] = bs_buf.addr;
1488 ib_cpu[len++] = 0x0001f000;
1489 ib_cpu[len++] = 0x00000000;
1490 *st_size = (len - st_offset) * 4;
1494 st_size = &ib_cpu[len++]; /* size */
1495 if(vcn_ip_version_major == 1)
1496 ib_cpu[len++] = 0x00000010; /* FEEDBACK_BUFFER vcn 1 */
1498 ib_cpu[len++] = 0x00000015; /* FEEDBACK_BUFFER vcn 2,3 */
1499 ib_cpu[len++] = 0x00000000;
1500 ib_cpu[len++] = fb_buf.addr >> 32;
1501 ib_cpu[len++] = fb_buf.addr;
1502 ib_cpu[len++] = 0x00000010;
1503 ib_cpu[len++] = 0x00000028;
1504 *st_size = (len - st_offset) * 4;
1508 st_size = &ib_cpu[len++];
1509 if(vcn_ip_version_major == 1)
1510 ib_cpu[len++] = 0x0000000c; /* INTRA_REFRESH vcn 1 */
1512 ib_cpu[len++] = 0x00000010; /* INTRA_REFRESH vcn 2,3 */
1513 ib_cpu[len++] = 0x00000000;
1514 ib_cpu[len++] = 0x00000000;
1515 ib_cpu[len++] = 0x00000000;
1516 *st_size = (len - st_offset) * 4;
1518 if(vcn_ip_version_major != 1) {
1521 st_size = &ib_cpu[len++];
1522 ib_cpu[len++] = 0x0000000c;
1523 ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_VOLUME_G22_BT709 */
1524 ib_cpu[len++] = 0x00000000;
1525 ib_cpu[len++] = 0x00000000;
1526 ib_cpu[len++] = 0x00000000;
1527 ib_cpu[len++] = 0x00000000;
1528 ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_BIT_DEPTH_8_BIT */
1529 ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_PACKING_FORMAT_NV12 */
1530 *st_size = (len - st_offset) * 4;
1534 st_size = &ib_cpu[len++];
1535 ib_cpu[len++] = 0x0000000d;
1536 ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_VOLUME_G22_BT709 */
1537 ib_cpu[len++] = 0x00000000;
1538 ib_cpu[len++] = 0x00000000;
1539 ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_BIT_DEPTH_8_BIT */
1540 *st_size = (len - st_offset) * 4;
1544 st_size = &ib_cpu[len++];
1545 ib_cpu[len++] = 0x01000006; /* SPEED_ENCODING_MODE */
1546 *st_size = (len - st_offset) * 4;
1550 st_size = &ib_cpu[len++];
1551 ib_cpu[len++] = 0x01000003;
1552 *st_size = (len - st_offset) * 4;
1554 *p_task_size = (len - task_offset) * 4;
1556 if (vcn_unified_ring)
1557 amdgpu_cs_sq_ib_tail(ib_cpu + len);
1559 r = submit(len, AMDGPU_HW_IP_VCN_ENC);
1560 CU_ASSERT_EQUAL(r, 0);
1563 check_result(fb_buf, bs_buf, frame_type);
1565 free_resource(&fb_buf);
1566 free_resource(&bs_buf);
1567 free_resource(&input_buf);
1570 static void amdgpu_cs_vcn_enc_encode(void)
1572 amdgpu_cs_vcn_enc_encode_frame(2); /* IDR frame */
1575 static void amdgpu_cs_vcn_enc_destroy(void)
1578 uint32_t *p_task_size = NULL;
1579 uint32_t task_offset = 0, st_offset;
1580 uint32_t *st_size = NULL;
1581 uint32_t fw_maj = 1, fw_min = 9;
1583 if (vcn_ip_version_major == 2) {
1586 } else if (vcn_ip_version_major == 3) {
1592 /* alloc_resource(&enc_buf, 128 * 1024, AMDGPU_GEM_DOMAIN_GTT); */
1593 resources[num_resources++] = enc_buf.handle;
1594 resources[num_resources++] = ib_handle;
1596 if (vcn_unified_ring)
1597 amdgpu_cs_sq_head(ib_cpu, &len, true);
1601 st_size = &ib_cpu[len++]; /* size */
1602 ib_cpu[len++] = 0x00000001; /* RENCODE_IB_PARAM_SESSION_INFO */
1603 ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0));
1604 ib_cpu[len++] = enc_buf.addr >> 32;
1605 ib_cpu[len++] = enc_buf.addr;
1606 ib_cpu[len++] = 1; /* RENCODE_ENGINE_TYPE_ENCODE; */
1607 *st_size = (len - st_offset) * 4;
1612 st_size = &ib_cpu[len++]; /* size */
1613 ib_cpu[len++] = 0x00000002; /* RENCODE_IB_PARAM_TASK_INFO */
1614 p_task_size = &ib_cpu[len++];
1615 ib_cpu[len++] = enc_task_id++; /* task_id */
1616 ib_cpu[len++] = 0; /* feedback */
1617 *st_size = (len - st_offset) * 4;
1621 st_size = &ib_cpu[len++];
1622 ib_cpu[len++] = 0x01000002; /* RENCODE_IB_OP_CLOSE_SESSION */
1623 *st_size = (len - st_offset) * 4;
1625 *p_task_size = (len - task_offset) * 4;
1627 if (vcn_unified_ring)
1628 amdgpu_cs_sq_ib_tail(ib_cpu + len);
1630 r = submit(len, AMDGPU_HW_IP_VCN_ENC);
1631 CU_ASSERT_EQUAL(r, 0);
1633 free_resource(&cpb_buf);
1634 free_resource(&enc_buf);