2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
27 #include <sys/types.h>
28 #ifdef MAJOR_IN_SYSMACROS
29 #include <sys/sysmacros.h>
38 #include "CUnit/Basic.h"
40 #include "amdgpu_test.h"
41 #include "amdgpu_drm.h"
42 #include "amdgpu_internal.h"
43 #include "util_math.h"
45 static amdgpu_device_handle device_handle;
46 static uint32_t major_version;
47 static uint32_t minor_version;
48 static uint32_t family_id;
50 static void amdgpu_query_info_test(void);
51 static void amdgpu_command_submission_gfx(void);
52 static void amdgpu_command_submission_compute(void);
53 static void amdgpu_command_submission_multi_fence(void);
54 static void amdgpu_command_submission_sdma(void);
55 static void amdgpu_userptr_test(void);
56 static void amdgpu_semaphore_test(void);
57 static void amdgpu_sync_dependency_test(void);
58 static void amdgpu_bo_eviction_test(void);
59 static void amdgpu_compute_dispatch_test(void);
60 static void amdgpu_gfx_dispatch_test(void);
61 static void amdgpu_draw_test(void);
62 static void amdgpu_gpu_reset_test(void);
64 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
65 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
66 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
67 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
69 int instance, int pm4_dw, uint32_t *pm4_src,
70 int res_cnt, amdgpu_bo_handle *resources,
71 struct amdgpu_cs_ib_info *ib_info,
72 struct amdgpu_cs_request *ibs_request);
74 CU_TestInfo basic_tests[] = {
75 { "Query Info Test", amdgpu_query_info_test },
76 { "Userptr Test", amdgpu_userptr_test },
77 { "bo eviction Test", amdgpu_bo_eviction_test },
78 { "Command submission Test (GFX)", amdgpu_command_submission_gfx },
79 { "Command submission Test (Compute)", amdgpu_command_submission_compute },
80 { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
81 { "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
82 { "SW semaphore Test", amdgpu_semaphore_test },
83 { "Sync dependency Test", amdgpu_sync_dependency_test },
84 { "Dispatch Test (Compute)", amdgpu_compute_dispatch_test },
85 { "Dispatch Test (GFX)", amdgpu_gfx_dispatch_test },
86 { "Draw Test", amdgpu_draw_test },
87 { "GPU reset Test", amdgpu_gpu_reset_test },
90 #define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
91 #define SDMA_PKT_HEADER_op_offset 0
92 #define SDMA_PKT_HEADER_op_mask 0x000000FF
93 #define SDMA_PKT_HEADER_op_shift 0
94 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
95 #define SDMA_OPCODE_CONSTANT_FILL 11
96 # define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14)
100 #define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \
101 (((sub_op) & 0xFF) << 8) | \
102 (((op) & 0xFF) << 0))
103 #define SDMA_OPCODE_WRITE 2
104 # define SDMA_WRITE_SUB_OPCODE_LINEAR 0
105 # define SDMA_WRTIE_SUB_OPCODE_TILED 1
107 #define SDMA_OPCODE_COPY 1
108 # define SDMA_COPY_SUB_OPCODE_LINEAR 0
110 #define SDMA_OPCODE_ATOMIC 10
111 # define SDMA_ATOMIC_LOOP(x) ((x) << 0)
112 /* 0 - single_pass_atomic.
113 * 1 - loop_until_compare_satisfied.
115 # define SDMA_ATOMIC_TMZ(x) ((x) << 2)
119 # define SDMA_ATOMIC_OPCODE(x) ((x) << 9)
120 /* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
124 #define GFX_COMPUTE_NOP 0xffff1000
128 #define PACKET_TYPE0 0
129 #define PACKET_TYPE1 1
130 #define PACKET_TYPE2 2
131 #define PACKET_TYPE3 3
133 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
134 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
135 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
136 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
137 #define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
139 ((n) & 0x3FFF) << 16)
140 #define CP_PACKET2 0x80000000
141 #define PACKET2_PAD_SHIFT 0
142 #define PACKET2_PAD_MASK (0x3fffffff << 0)
144 #define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
146 #define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
147 (((op) & 0xFF) << 8) | \
148 ((n) & 0x3FFF) << 16)
149 #define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
152 #define PACKET3_NOP 0x10
154 #define PACKET3_WRITE_DATA 0x37
155 #define WRITE_DATA_DST_SEL(x) ((x) << 8)
157 * 1 - memory (sync - via GRBM)
161 * 5 - memory (async - direct)
163 #define WR_ONE_ADDR (1 << 16)
164 #define WR_CONFIRM (1 << 20)
165 #define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
169 #define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
175 #define PACKET3_ATOMIC_MEM 0x1E
176 #define TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
177 #define ATOMIC_MEM_COMMAND(x) ((x) << 8)
178 /* 0 - single_pass_atomic.
179 * 1 - loop_until_compare_satisfied.
181 #define ATOMIC_MEM_CACHEPOLICAY(x) ((x) << 25)
185 #define ATOMIC_MEM_ENGINESEL(x) ((x) << 30)
189 #define PACKET3_DMA_DATA 0x50
192 * 3. SRC_ADDR_LO or DATA [31:0]
193 * 4. SRC_ADDR_HI [31:0]
194 * 5. DST_ADDR_LO [31:0]
195 * 6. DST_ADDR_HI [7:0]
196 * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
199 # define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0)
203 # define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
208 # define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
209 # define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
210 /* 0 - DST_ADDR using DAS
212 * 3 - DST_ADDR using L2
214 # define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
219 # define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
220 # define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
221 /* 0 - SRC_ADDR using SAS
224 * 3 - SRC_ADDR using L2
226 # define PACKET3_DMA_DATA_CP_SYNC (1 << 31)
228 # define PACKET3_DMA_DATA_DIS_WC (1 << 21)
229 # define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
235 # define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
241 # define PACKET3_DMA_DATA_CMD_SAS (1 << 26)
245 # define PACKET3_DMA_DATA_CMD_DAS (1 << 27)
249 # define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
250 # define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
251 # define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
253 #define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \
254 (((b) & 0x1) << 26) | \
255 (((t) & 0x1) << 23) | \
256 (((s) & 0x1) << 22) | \
257 (((cnt) & 0xFFFFF) << 0))
258 #define SDMA_OPCODE_COPY_SI 3
259 #define SDMA_OPCODE_CONSTANT_FILL_SI 13
260 #define SDMA_NOP_SI 0xf
261 #define GFX_COMPUTE_NOP_SI 0x80000000
262 #define PACKET3_DMA_DATA_SI 0x41
263 # define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27)
267 # define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20)
268 /* 0 - DST_ADDR using DAS
270 * 3 - DST_ADDR using L2
272 # define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29)
273 /* 0 - SRC_ADDR using SAS
276 * 3 - SRC_ADDR using L2
278 # define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31)
281 #define PKT3_CONTEXT_CONTROL 0x28
282 #define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
283 #define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28)
284 #define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
286 #define PKT3_CLEAR_STATE 0x12
288 #define PKT3_SET_SH_REG 0x76
289 #define PACKET3_SET_SH_REG_START 0x00002c00
291 #define PACKET3_DISPATCH_DIRECT 0x15
292 #define PACKET3_EVENT_WRITE 0x46
293 #define PACKET3_ACQUIRE_MEM 0x58
294 #define PACKET3_SET_CONTEXT_REG 0x69
295 #define PACKET3_SET_UCONFIG_REG 0x79
296 #define PACKET3_DRAW_INDEX_AUTO 0x2D
298 #define mmCOMPUTE_PGM_LO 0x2e0c
299 #define mmCOMPUTE_PGM_RSRC1 0x2e12
300 #define mmCOMPUTE_TMPRING_SIZE 0x2e18
301 #define mmCOMPUTE_USER_DATA_0 0x2e40
302 #define mmCOMPUTE_USER_DATA_1 0x2e41
303 #define mmCOMPUTE_RESOURCE_LIMITS 0x2e15
304 #define mmCOMPUTE_NUM_THREAD_X 0x2e07
308 #define SWAP_32(num) (((num & 0xff000000) >> 24) | \
309 ((num & 0x0000ff00) << 8) | \
310 ((num & 0x00ff0000) >> 8) | \
311 ((num & 0x000000ff) << 24))
318 float x = some_input;
319 for (unsigned i = 0; i < 1000000; i++)
326 static uint32_t shader_bin[] = {
327 SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
328 SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
329 SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
330 SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
333 #define CODE_OFFSET 512
334 #define DATA_OFFSET 1024
343 static const uint32_t bufferclear_cs_shader_gfx9[] = {
344 0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205,
345 0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100,
349 static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
350 {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 },
351 {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 },
352 {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
353 {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
354 {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
357 static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
359 static const uint32_t buffercopy_cs_shader_gfx9[] = {
360 0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100,
361 0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000
364 static const uint32_t preamblecache_gfx9[] = {
365 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
366 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
367 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
368 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
369 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
370 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0,
371 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
372 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
373 0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
374 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
375 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
376 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
377 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
378 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
379 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
380 0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
381 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
382 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
383 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
384 0xc0017900, 0x24b, 0x0
394 static const uint32_t ps_const_shader_gfx9[] = {
395 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
396 0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
397 0xC4001C0F, 0x00000100, 0xBF810000
400 static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
402 static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
403 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
404 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
405 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
406 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
407 { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
408 { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
409 { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
410 { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
411 { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
412 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
416 static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
420 static const uint32_t ps_num_sh_registers_gfx9 = 2;
422 static const uint32_t ps_const_sh_registers_gfx9[][2] = {
423 {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
424 {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
427 static const uint32_t ps_num_context_registers_gfx9 = 7;
429 static const uint32_t ps_const_context_reg_gfx9[][2] = {
430 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 },
431 {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 },
432 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F },
433 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 },
434 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 },
435 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */},
436 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 }
439 static const uint32_t ps_tex_shader_gfx9[] = {
440 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
441 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
442 0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
443 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
444 0x00000100, 0xBF810000
447 static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
451 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
453 static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
454 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
455 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
456 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
457 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
458 { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
459 { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
460 { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
461 { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
462 { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
463 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
467 static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
468 {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
469 {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
472 static const uint32_t ps_tex_context_reg_gfx9[][2] = {
473 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 },
474 {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 },
475 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F },
476 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 },
477 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 },
478 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */},
479 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 }
482 static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
483 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
484 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
485 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
486 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
487 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
488 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
489 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
490 0xC400020F, 0x05060403, 0xBF810000
493 static const uint32_t cached_cmd_gfx9[] = {
494 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
495 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
496 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
497 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x12,
498 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
499 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
500 0xc0026900, 0x292, 0x20, 0x60201b8,
501 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
504 unsigned int memcpy_ps_hang[] = {
505 0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
506 0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
507 0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
508 0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
509 0xF800180F, 0x03020100, 0xBF810000
512 struct amdgpu_test_shader {
514 uint32_t header_length;
515 uint32_t body_length;
516 uint32_t foot_length;
519 unsigned int memcpy_cs_hang_slow_ai_codes[] = {
520 0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
521 0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
524 struct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
525 memcpy_cs_hang_slow_ai_codes,
531 unsigned int memcpy_cs_hang_slow_rv_codes[] = {
532 0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
533 0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
536 struct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
537 memcpy_cs_hang_slow_rv_codes,
543 unsigned int memcpy_ps_hang_slow_ai_codes[] = {
544 0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
545 0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
546 0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
547 0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
548 0x03020100, 0xbf810000
551 struct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
552 memcpy_ps_hang_slow_ai_codes,
558 int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
559 unsigned alignment, unsigned heap, uint64_t alloc_flags,
560 uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
561 uint64_t *mc_address,
562 amdgpu_va_handle *va_handle)
564 struct amdgpu_bo_alloc_request request = {};
565 amdgpu_bo_handle buf_handle;
566 amdgpu_va_handle handle;
570 request.alloc_size = size;
571 request.phys_alignment = alignment;
572 request.preferred_heap = heap;
573 request.flags = alloc_flags;
575 r = amdgpu_bo_alloc(dev, &request, &buf_handle);
579 r = amdgpu_va_range_alloc(dev,
580 amdgpu_gpu_va_range_general,
581 size, alignment, 0, &vmc_addr,
586 r = amdgpu_bo_va_op_raw(dev, buf_handle, 0, ALIGN(size, getpagesize()), vmc_addr,
587 AMDGPU_VM_PAGE_READABLE |
588 AMDGPU_VM_PAGE_WRITEABLE |
589 AMDGPU_VM_PAGE_EXECUTABLE |
595 r = amdgpu_bo_cpu_map(buf_handle, cpu);
600 *mc_address = vmc_addr;
606 amdgpu_bo_cpu_unmap(buf_handle);
609 amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
612 amdgpu_bo_free(buf_handle);
618 CU_BOOL suite_basic_tests_enable(void)
622 if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
623 &minor_version, &device_handle))
626 asic_id = device_handle->info.asic_id;
628 if (amdgpu_device_deinitialize(device_handle))
631 /* disable gfx engine basic test cases for Arturus due to no CPG */
632 if (asic_is_arcturus(asic_id)) {
633 if (amdgpu_set_test_active("Basic Tests",
634 "Command submission Test (GFX)",
636 fprintf(stderr, "test deactivation failed - %s\n",
639 if (amdgpu_set_test_active("Basic Tests",
640 "Command submission Test (Multi-Fence)",
642 fprintf(stderr, "test deactivation failed - %s\n",
645 if (amdgpu_set_test_active("Basic Tests",
646 "Sync dependency Test",
648 fprintf(stderr, "test deactivation failed - %s\n",
655 int suite_basic_tests_init(void)
657 struct amdgpu_gpu_info gpu_info = {0};
660 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
661 &minor_version, &device_handle);
664 if ((r == -EACCES) && (errno == EACCES))
665 printf("\n\nError:%s. "
666 "Hint:Try to run this test program as root.",
668 return CUE_SINIT_FAILED;
671 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
673 return CUE_SINIT_FAILED;
675 family_id = gpu_info.family_id;
680 int suite_basic_tests_clean(void)
682 int r = amdgpu_device_deinitialize(device_handle);
687 return CUE_SCLEAN_FAILED;
690 static void amdgpu_query_info_test(void)
692 struct amdgpu_gpu_info gpu_info = {0};
693 uint32_t version, feature;
696 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
697 CU_ASSERT_EQUAL(r, 0);
699 r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
700 0, &version, &feature);
701 CU_ASSERT_EQUAL(r, 0);
704 static void amdgpu_command_submission_gfx_separate_ibs(void)
706 amdgpu_context_handle context_handle;
707 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
708 void *ib_result_cpu, *ib_result_ce_cpu;
709 uint64_t ib_result_mc_address, ib_result_ce_mc_address;
710 struct amdgpu_cs_request ibs_request = {0};
711 struct amdgpu_cs_ib_info ib_info[2];
712 struct amdgpu_cs_fence fence_status = {0};
715 amdgpu_bo_list_handle bo_list;
716 amdgpu_va_handle va_handle, va_handle_ce;
719 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
720 CU_ASSERT_EQUAL(r, 0);
722 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
723 AMDGPU_GEM_DOMAIN_GTT, 0,
724 &ib_result_handle, &ib_result_cpu,
725 &ib_result_mc_address, &va_handle);
726 CU_ASSERT_EQUAL(r, 0);
728 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
729 AMDGPU_GEM_DOMAIN_GTT, 0,
730 &ib_result_ce_handle, &ib_result_ce_cpu,
731 &ib_result_ce_mc_address, &va_handle_ce);
732 CU_ASSERT_EQUAL(r, 0);
734 r = amdgpu_get_bo_list(device_handle, ib_result_handle,
735 ib_result_ce_handle, &bo_list);
736 CU_ASSERT_EQUAL(r, 0);
738 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
740 /* IT_SET_CE_DE_COUNTERS */
741 ptr = ib_result_ce_cpu;
742 if (family_id != AMDGPU_FAMILY_SI) {
743 ptr[i++] = 0xc0008900;
746 ptr[i++] = 0xc0008400;
748 ib_info[0].ib_mc_address = ib_result_ce_mc_address;
750 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
752 /* IT_WAIT_ON_CE_COUNTER */
756 ib_info[1].ib_mc_address = ib_result_mc_address;
759 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
760 ibs_request.number_of_ibs = 2;
761 ibs_request.ibs = ib_info;
762 ibs_request.resources = bo_list;
763 ibs_request.fence_info.handle = NULL;
765 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
767 CU_ASSERT_EQUAL(r, 0);
769 fence_status.context = context_handle;
770 fence_status.ip_type = AMDGPU_HW_IP_GFX;
771 fence_status.ip_instance = 0;
772 fence_status.fence = ibs_request.seq_no;
774 r = amdgpu_cs_query_fence_status(&fence_status,
775 AMDGPU_TIMEOUT_INFINITE,
777 CU_ASSERT_EQUAL(r, 0);
779 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
780 ib_result_mc_address, 4096);
781 CU_ASSERT_EQUAL(r, 0);
783 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
784 ib_result_ce_mc_address, 4096);
785 CU_ASSERT_EQUAL(r, 0);
787 r = amdgpu_bo_list_destroy(bo_list);
788 CU_ASSERT_EQUAL(r, 0);
790 r = amdgpu_cs_ctx_free(context_handle);
791 CU_ASSERT_EQUAL(r, 0);
795 static void amdgpu_command_submission_gfx_shared_ib(void)
797 amdgpu_context_handle context_handle;
798 amdgpu_bo_handle ib_result_handle;
800 uint64_t ib_result_mc_address;
801 struct amdgpu_cs_request ibs_request = {0};
802 struct amdgpu_cs_ib_info ib_info[2];
803 struct amdgpu_cs_fence fence_status = {0};
806 amdgpu_bo_list_handle bo_list;
807 amdgpu_va_handle va_handle;
810 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
811 CU_ASSERT_EQUAL(r, 0);
813 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
814 AMDGPU_GEM_DOMAIN_GTT, 0,
815 &ib_result_handle, &ib_result_cpu,
816 &ib_result_mc_address, &va_handle);
817 CU_ASSERT_EQUAL(r, 0);
819 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
821 CU_ASSERT_EQUAL(r, 0);
823 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
825 /* IT_SET_CE_DE_COUNTERS */
827 if (family_id != AMDGPU_FAMILY_SI) {
828 ptr[i++] = 0xc0008900;
831 ptr[i++] = 0xc0008400;
833 ib_info[0].ib_mc_address = ib_result_mc_address;
835 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
837 ptr = (uint32_t *)ib_result_cpu + 4;
840 ib_info[1].ib_mc_address = ib_result_mc_address + 16;
843 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
844 ibs_request.number_of_ibs = 2;
845 ibs_request.ibs = ib_info;
846 ibs_request.resources = bo_list;
847 ibs_request.fence_info.handle = NULL;
849 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
851 CU_ASSERT_EQUAL(r, 0);
853 fence_status.context = context_handle;
854 fence_status.ip_type = AMDGPU_HW_IP_GFX;
855 fence_status.ip_instance = 0;
856 fence_status.fence = ibs_request.seq_no;
858 r = amdgpu_cs_query_fence_status(&fence_status,
859 AMDGPU_TIMEOUT_INFINITE,
861 CU_ASSERT_EQUAL(r, 0);
863 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
864 ib_result_mc_address, 4096);
865 CU_ASSERT_EQUAL(r, 0);
867 r = amdgpu_bo_list_destroy(bo_list);
868 CU_ASSERT_EQUAL(r, 0);
870 r = amdgpu_cs_ctx_free(context_handle);
871 CU_ASSERT_EQUAL(r, 0);
874 static void amdgpu_command_submission_gfx_cp_write_data(void)
876 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
879 static void amdgpu_command_submission_gfx_cp_const_fill(void)
881 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
884 static void amdgpu_command_submission_gfx_cp_copy_data(void)
886 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
889 static void amdgpu_bo_eviction_test(void)
891 const int sdma_write_length = 1024;
892 const int pm4_dw = 256;
893 amdgpu_context_handle context_handle;
894 amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
895 amdgpu_bo_handle *resources;
897 struct amdgpu_cs_ib_info *ib_info;
898 struct amdgpu_cs_request *ibs_request;
899 uint64_t bo1_mc, bo2_mc;
900 volatile unsigned char *bo1_cpu, *bo2_cpu;
901 int i, j, r, loop1, loop2;
902 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
903 amdgpu_va_handle bo1_va_handle, bo2_va_handle;
904 struct amdgpu_heap_info vram_info, gtt_info;
906 pm4 = calloc(pm4_dw, sizeof(*pm4));
907 CU_ASSERT_NOT_EQUAL(pm4, NULL);
909 ib_info = calloc(1, sizeof(*ib_info));
910 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
912 ibs_request = calloc(1, sizeof(*ibs_request));
913 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
915 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
916 CU_ASSERT_EQUAL(r, 0);
918 /* prepare resource */
919 resources = calloc(4, sizeof(amdgpu_bo_handle));
920 CU_ASSERT_NOT_EQUAL(resources, NULL);
922 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
924 CU_ASSERT_EQUAL(r, 0);
926 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
927 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
928 CU_ASSERT_EQUAL(r, 0);
929 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
930 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
931 CU_ASSERT_EQUAL(r, 0);
933 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
935 CU_ASSERT_EQUAL(r, 0);
937 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
938 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]);
939 CU_ASSERT_EQUAL(r, 0);
940 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
941 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]);
942 CU_ASSERT_EQUAL(r, 0);
947 /* run 9 circle to test all mapping combination */
950 /* allocate UC bo1for sDMA use */
951 r = amdgpu_bo_alloc_and_map(device_handle,
952 sdma_write_length, 4096,
953 AMDGPU_GEM_DOMAIN_GTT,
954 gtt_flags[loop1], &bo1,
955 (void**)&bo1_cpu, &bo1_mc,
957 CU_ASSERT_EQUAL(r, 0);
960 memset((void*)bo1_cpu, 0xaa, sdma_write_length);
962 /* allocate UC bo2 for sDMA use */
963 r = amdgpu_bo_alloc_and_map(device_handle,
964 sdma_write_length, 4096,
965 AMDGPU_GEM_DOMAIN_GTT,
966 gtt_flags[loop2], &bo2,
967 (void**)&bo2_cpu, &bo2_mc,
969 CU_ASSERT_EQUAL(r, 0);
972 memset((void*)bo2_cpu, 0, sdma_write_length);
976 resources[2] = vram_max[loop2];
977 resources[3] = gtt_max[loop2];
979 /* fulfill PM4: test DMA copy linear */
981 if (family_id == AMDGPU_FAMILY_SI) {
982 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
984 pm4[i++] = 0xffffffff & bo2_mc;
985 pm4[i++] = 0xffffffff & bo1_mc;
986 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
987 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
989 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
990 if (family_id >= AMDGPU_FAMILY_AI)
991 pm4[i++] = sdma_write_length - 1;
993 pm4[i++] = sdma_write_length;
995 pm4[i++] = 0xffffffff & bo1_mc;
996 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
997 pm4[i++] = 0xffffffff & bo2_mc;
998 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1001 amdgpu_test_exec_cs_helper(context_handle,
1002 AMDGPU_HW_IP_DMA, 0,
1005 ib_info, ibs_request);
1007 /* verify if SDMA test result meets with expected */
1009 while(i < sdma_write_length) {
1010 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1012 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1014 CU_ASSERT_EQUAL(r, 0);
1015 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1017 CU_ASSERT_EQUAL(r, 0);
1023 amdgpu_bo_free(vram_max[0]);
1024 amdgpu_bo_free(vram_max[1]);
1025 amdgpu_bo_free(gtt_max[0]);
1026 amdgpu_bo_free(gtt_max[1]);
1027 /* clean resources */
1034 r = amdgpu_cs_ctx_free(context_handle);
1035 CU_ASSERT_EQUAL(r, 0);
1039 static void amdgpu_command_submission_gfx(void)
1041 /* write data using the CP */
1042 amdgpu_command_submission_gfx_cp_write_data();
1043 /* const fill using the CP */
1044 amdgpu_command_submission_gfx_cp_const_fill();
1045 /* copy data using the CP */
1046 amdgpu_command_submission_gfx_cp_copy_data();
1047 /* separate IB buffers for multi-IB submission */
1048 amdgpu_command_submission_gfx_separate_ibs();
1049 /* shared IB buffer for multi-IB submission */
1050 amdgpu_command_submission_gfx_shared_ib();
1053 static void amdgpu_semaphore_test(void)
1055 amdgpu_context_handle context_handle[2];
1056 amdgpu_semaphore_handle sem;
1057 amdgpu_bo_handle ib_result_handle[2];
1058 void *ib_result_cpu[2];
1059 uint64_t ib_result_mc_address[2];
1060 struct amdgpu_cs_request ibs_request[2] = {0};
1061 struct amdgpu_cs_ib_info ib_info[2] = {0};
1062 struct amdgpu_cs_fence fence_status = {0};
1065 uint32_t sdma_nop, gfx_nop;
1066 amdgpu_bo_list_handle bo_list[2];
1067 amdgpu_va_handle va_handle[2];
1070 if (family_id == AMDGPU_FAMILY_SI) {
1071 sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1072 gfx_nop = GFX_COMPUTE_NOP_SI;
1074 sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1075 gfx_nop = GFX_COMPUTE_NOP;
1078 r = amdgpu_cs_create_semaphore(&sem);
1079 CU_ASSERT_EQUAL(r, 0);
1080 for (i = 0; i < 2; i++) {
1081 r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
1082 CU_ASSERT_EQUAL(r, 0);
1084 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1085 AMDGPU_GEM_DOMAIN_GTT, 0,
1086 &ib_result_handle[i], &ib_result_cpu[i],
1087 &ib_result_mc_address[i], &va_handle[i]);
1088 CU_ASSERT_EQUAL(r, 0);
1090 r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
1092 CU_ASSERT_EQUAL(r, 0);
1095 /* 1. same context different engine */
1096 ptr = ib_result_cpu[0];
1098 ib_info[0].ib_mc_address = ib_result_mc_address[0];
1099 ib_info[0].size = 1;
1101 ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
1102 ibs_request[0].number_of_ibs = 1;
1103 ibs_request[0].ibs = &ib_info[0];
1104 ibs_request[0].resources = bo_list[0];
1105 ibs_request[0].fence_info.handle = NULL;
1106 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1107 CU_ASSERT_EQUAL(r, 0);
1108 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
1109 CU_ASSERT_EQUAL(r, 0);
1111 r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
1112 CU_ASSERT_EQUAL(r, 0);
1113 ptr = ib_result_cpu[1];
1115 ib_info[1].ib_mc_address = ib_result_mc_address[1];
1116 ib_info[1].size = 1;
1118 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
1119 ibs_request[1].number_of_ibs = 1;
1120 ibs_request[1].ibs = &ib_info[1];
1121 ibs_request[1].resources = bo_list[1];
1122 ibs_request[1].fence_info.handle = NULL;
1124 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
1125 CU_ASSERT_EQUAL(r, 0);
1127 fence_status.context = context_handle[0];
1128 fence_status.ip_type = AMDGPU_HW_IP_GFX;
1129 fence_status.ip_instance = 0;
1130 fence_status.fence = ibs_request[1].seq_no;
1131 r = amdgpu_cs_query_fence_status(&fence_status,
1132 500000000, 0, &expired);
1133 CU_ASSERT_EQUAL(r, 0);
1134 CU_ASSERT_EQUAL(expired, true);
1136 /* 2. same engine different context */
1137 ptr = ib_result_cpu[0];
1139 ib_info[0].ib_mc_address = ib_result_mc_address[0];
1140 ib_info[0].size = 1;
1142 ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
1143 ibs_request[0].number_of_ibs = 1;
1144 ibs_request[0].ibs = &ib_info[0];
1145 ibs_request[0].resources = bo_list[0];
1146 ibs_request[0].fence_info.handle = NULL;
1147 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1148 CU_ASSERT_EQUAL(r, 0);
1149 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
1150 CU_ASSERT_EQUAL(r, 0);
1152 r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
1153 CU_ASSERT_EQUAL(r, 0);
1154 ptr = ib_result_cpu[1];
1156 ib_info[1].ib_mc_address = ib_result_mc_address[1];
1157 ib_info[1].size = 1;
1159 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
1160 ibs_request[1].number_of_ibs = 1;
1161 ibs_request[1].ibs = &ib_info[1];
1162 ibs_request[1].resources = bo_list[1];
1163 ibs_request[1].fence_info.handle = NULL;
1164 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1166 CU_ASSERT_EQUAL(r, 0);
1168 fence_status.context = context_handle[1];
1169 fence_status.ip_type = AMDGPU_HW_IP_GFX;
1170 fence_status.ip_instance = 0;
1171 fence_status.fence = ibs_request[1].seq_no;
1172 r = amdgpu_cs_query_fence_status(&fence_status,
1173 500000000, 0, &expired);
1174 CU_ASSERT_EQUAL(r, 0);
1175 CU_ASSERT_EQUAL(expired, true);
1177 for (i = 0; i < 2; i++) {
1178 r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1179 ib_result_mc_address[i], 4096);
1180 CU_ASSERT_EQUAL(r, 0);
1182 r = amdgpu_bo_list_destroy(bo_list[i]);
1183 CU_ASSERT_EQUAL(r, 0);
1185 r = amdgpu_cs_ctx_free(context_handle[i]);
1186 CU_ASSERT_EQUAL(r, 0);
1189 r = amdgpu_cs_destroy_semaphore(sem);
1190 CU_ASSERT_EQUAL(r, 0);
1193 static void amdgpu_command_submission_compute_nop(void)
1195 amdgpu_context_handle context_handle;
1196 amdgpu_bo_handle ib_result_handle;
1197 void *ib_result_cpu;
1198 uint64_t ib_result_mc_address;
1199 struct amdgpu_cs_request ibs_request;
1200 struct amdgpu_cs_ib_info ib_info;
1201 struct amdgpu_cs_fence fence_status;
1205 amdgpu_bo_list_handle bo_list;
1206 amdgpu_va_handle va_handle;
1207 struct drm_amdgpu_info_hw_ip info;
1209 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1210 CU_ASSERT_EQUAL(r, 0);
1212 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1213 CU_ASSERT_EQUAL(r, 0);
1215 for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1216 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1217 AMDGPU_GEM_DOMAIN_GTT, 0,
1218 &ib_result_handle, &ib_result_cpu,
1219 &ib_result_mc_address, &va_handle);
1220 CU_ASSERT_EQUAL(r, 0);
1222 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1224 CU_ASSERT_EQUAL(r, 0);
1226 ptr = ib_result_cpu;
1228 ptr[0]=PACKET3(PACKET3_NOP, 14);
1230 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1231 ib_info.ib_mc_address = ib_result_mc_address;
1234 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1235 ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1236 ibs_request.ring = instance;
1237 ibs_request.number_of_ibs = 1;
1238 ibs_request.ibs = &ib_info;
1239 ibs_request.resources = bo_list;
1240 ibs_request.fence_info.handle = NULL;
1242 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1243 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1244 CU_ASSERT_EQUAL(r, 0);
1246 fence_status.context = context_handle;
1247 fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1248 fence_status.ip_instance = 0;
1249 fence_status.ring = instance;
1250 fence_status.fence = ibs_request.seq_no;
1252 r = amdgpu_cs_query_fence_status(&fence_status,
1253 AMDGPU_TIMEOUT_INFINITE,
1255 CU_ASSERT_EQUAL(r, 0);
1257 r = amdgpu_bo_list_destroy(bo_list);
1258 CU_ASSERT_EQUAL(r, 0);
1260 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1261 ib_result_mc_address, 4096);
1262 CU_ASSERT_EQUAL(r, 0);
1265 r = amdgpu_cs_ctx_free(context_handle);
1266 CU_ASSERT_EQUAL(r, 0);
1269 static void amdgpu_command_submission_compute_cp_write_data(void)
1271 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1274 static void amdgpu_command_submission_compute_cp_const_fill(void)
1276 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1279 static void amdgpu_command_submission_compute_cp_copy_data(void)
1281 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1284 static void amdgpu_command_submission_compute(void)
1286 /* write data using the CP */
1287 amdgpu_command_submission_compute_cp_write_data();
1288 /* const fill using the CP */
1289 amdgpu_command_submission_compute_cp_const_fill();
1290 /* copy data using the CP */
1291 amdgpu_command_submission_compute_cp_copy_data();
1293 amdgpu_command_submission_compute_nop();
1297 * caller need create/release:
1298 * pm4_src, resources, ib_info, and ibs_request
1299 * submit command stream described in ibs_request and wait for this IB accomplished
1302 amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,
1303 amdgpu_context_handle context_handle,
1304 unsigned ip_type, int instance, int pm4_dw,
1305 uint32_t *pm4_src, int res_cnt,
1306 amdgpu_bo_handle *resources,
1307 struct amdgpu_cs_ib_info *ib_info,
1308 struct amdgpu_cs_request *ibs_request,
1314 amdgpu_bo_handle ib_result_handle;
1315 void *ib_result_cpu;
1316 uint64_t ib_result_mc_address;
1317 struct amdgpu_cs_fence fence_status = {0};
1318 amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1319 amdgpu_va_handle va_handle;
1322 CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1323 CU_ASSERT_NOT_EQUAL(resources, NULL);
1324 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1325 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1326 CU_ASSERT_TRUE(pm4_dw <= 1024);
1329 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1330 AMDGPU_GEM_DOMAIN_GTT, 0,
1331 &ib_result_handle, &ib_result_cpu,
1332 &ib_result_mc_address, &va_handle);
1333 CU_ASSERT_EQUAL(r, 0);
1335 /* copy PM4 packet to ring from caller */
1336 ring_ptr = ib_result_cpu;
1337 memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1339 ib_info->ib_mc_address = ib_result_mc_address;
1340 ib_info->size = pm4_dw;
1342 ib_info->flags |= AMDGPU_IB_FLAGS_SECURE;
1344 ibs_request->ip_type = ip_type;
1345 ibs_request->ring = instance;
1346 ibs_request->number_of_ibs = 1;
1347 ibs_request->ibs = ib_info;
1348 ibs_request->fence_info.handle = NULL;
1350 memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1351 all_res[res_cnt] = ib_result_handle;
1353 r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1354 NULL, &ibs_request->resources);
1355 CU_ASSERT_EQUAL(r, 0);
1357 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1360 r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1361 CU_ASSERT_EQUAL(r, 0);
1363 r = amdgpu_bo_list_destroy(ibs_request->resources);
1364 CU_ASSERT_EQUAL(r, 0);
1366 fence_status.ip_type = ip_type;
1367 fence_status.ip_instance = 0;
1368 fence_status.ring = ibs_request->ring;
1369 fence_status.context = context_handle;
1370 fence_status.fence = ibs_request->seq_no;
1372 /* wait for IB accomplished */
1373 r = amdgpu_cs_query_fence_status(&fence_status,
1374 AMDGPU_TIMEOUT_INFINITE,
1376 CU_ASSERT_EQUAL(r, 0);
1377 CU_ASSERT_EQUAL(expired, true);
1379 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1380 ib_result_mc_address, 4096);
1381 CU_ASSERT_EQUAL(r, 0);
1385 amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1386 unsigned ip_type, int instance, int pm4_dw,
1387 uint32_t *pm4_src, int res_cnt,
1388 amdgpu_bo_handle *resources,
1389 struct amdgpu_cs_ib_info *ib_info,
1390 struct amdgpu_cs_request *ibs_request)
1392 amdgpu_test_exec_cs_helper_raw(device_handle, context_handle,
1393 ip_type, instance, pm4_dw, pm4_src,
1394 res_cnt, resources, ib_info,
1395 ibs_request, false);
1399 amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
1401 ip_type, bool secure)
1403 const int sdma_write_length = 128;
1404 const int pm4_dw = 256;
1405 amdgpu_context_handle context_handle;
1406 amdgpu_bo_handle bo;
1407 amdgpu_bo_handle *resources;
1409 struct amdgpu_cs_ib_info *ib_info;
1410 struct amdgpu_cs_request *ibs_request;
1412 volatile uint32_t *bo_cpu;
1413 uint32_t bo_cpu_origin;
1414 int i, j, r, loop, ring_id;
1415 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1416 amdgpu_va_handle va_handle;
1417 struct drm_amdgpu_info_hw_ip hw_ip_info;
1419 pm4 = calloc(pm4_dw, sizeof(*pm4));
1420 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1422 ib_info = calloc(1, sizeof(*ib_info));
1423 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1425 ibs_request = calloc(1, sizeof(*ibs_request));
1426 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1428 r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info);
1429 CU_ASSERT_EQUAL(r, 0);
1431 for (i = 0; secure && (i < 2); i++)
1432 gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
1434 r = amdgpu_cs_ctx_create(device, &context_handle);
1436 CU_ASSERT_EQUAL(r, 0);
1438 /* prepare resource */
1439 resources = calloc(1, sizeof(amdgpu_bo_handle));
1440 CU_ASSERT_NOT_EQUAL(resources, NULL);
1442 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1445 /* allocate UC bo for sDMA use */
1446 r = amdgpu_bo_alloc_and_map(device,
1447 sdma_write_length * sizeof(uint32_t),
1448 4096, AMDGPU_GEM_DOMAIN_GTT,
1449 gtt_flags[loop], &bo, (void**)&bo_cpu,
1450 &bo_mc, &va_handle);
1451 CU_ASSERT_EQUAL(r, 0);
1454 memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1458 /* fulfill PM4: test DMA write-linear */
1460 if (ip_type == AMDGPU_HW_IP_DMA) {
1461 if (family_id == AMDGPU_FAMILY_SI)
1462 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1465 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1466 SDMA_WRITE_SUB_OPCODE_LINEAR,
1467 secure ? SDMA_ATOMIC_TMZ(1) : 0);
1468 pm4[i++] = 0xfffffffc & bo_mc;
1469 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1470 if (family_id >= AMDGPU_FAMILY_AI)
1471 pm4[i++] = sdma_write_length - 1;
1472 else if (family_id != AMDGPU_FAMILY_SI)
1473 pm4[i++] = sdma_write_length;
1474 while(j++ < sdma_write_length)
1475 pm4[i++] = 0xdeadbeaf;
1476 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1477 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1478 pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1479 pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1480 pm4[i++] = 0xfffffffc & bo_mc;
1481 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1482 while(j++ < sdma_write_length)
1483 pm4[i++] = 0xdeadbeaf;
1486 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1487 ip_type, ring_id, i, pm4,
1488 1, resources, ib_info,
1489 ibs_request, secure);
1491 /* verify if SDMA test result meets with expected */
1494 while(i < sdma_write_length) {
1495 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1497 } else if (ip_type == AMDGPU_HW_IP_GFX) {
1498 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1499 pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7);
1500 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1501 * command, 1-loop_until_compare_satisfied.
1502 * single_pass_atomic, 0-lru
1503 * engine_sel, 0-micro_engine
1505 pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 |
1506 ATOMIC_MEM_COMMAND(1) |
1507 ATOMIC_MEM_CACHEPOLICAY(0) |
1508 ATOMIC_MEM_ENGINESEL(0));
1509 pm4[i++] = 0xfffffffc & bo_mc;
1510 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1511 pm4[i++] = 0x12345678;
1513 pm4[i++] = 0xdeadbeaf;
1516 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1517 ip_type, ring_id, i, pm4,
1518 1, resources, ib_info,
1520 } else if (ip_type == AMDGPU_HW_IP_DMA) {
1521 /* restore the bo_cpu to compare */
1522 bo_cpu_origin = bo_cpu[0];
1523 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1524 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1525 * loop, 1-loop_until_compare_satisfied.
1526 * single_pass_atomic, 0-lru
1528 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1530 SDMA_ATOMIC_LOOP(1) |
1531 SDMA_ATOMIC_TMZ(1) |
1532 SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1533 pm4[i++] = 0xfffffffc & bo_mc;
1534 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1535 pm4[i++] = 0x12345678;
1537 pm4[i++] = 0xdeadbeaf;
1540 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1541 ip_type, ring_id, i, pm4,
1542 1, resources, ib_info,
1544 /* DMA's atomic behavir is unlike GFX
1545 * If the comparing data is not equal to destination data,
1546 * For GFX, loop again till gfx timeout(system hang).
1547 * For DMA, loop again till timer expired and then send interrupt.
1548 * So testcase can't use interrupt mechanism.
1549 * We take another way to verify. When the comparing data is not
1550 * equal to destination data, overwrite the source data to the destination
1551 * buffer. Otherwise, original destination data unchanged.
1552 * So if the bo_cpu data is overwritten, the result is passed.
1554 CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin);
1556 /* compare again for the case of dest_data != cmp_data */
1558 /* restore again, here dest_data should be */
1559 bo_cpu_origin = bo_cpu[0];
1560 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1561 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1563 SDMA_ATOMIC_LOOP(1) |
1564 SDMA_ATOMIC_TMZ(1) |
1565 SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1566 pm4[i++] = 0xfffffffc & bo_mc;
1567 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1568 pm4[i++] = 0x87654321;
1570 pm4[i++] = 0xdeadbeaf;
1573 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1574 ip_type, ring_id, i, pm4,
1575 1, resources, ib_info,
1577 /* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/
1578 CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin);
1581 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1582 sdma_write_length * sizeof(uint32_t));
1583 CU_ASSERT_EQUAL(r, 0);
1587 /* clean resources */
1594 r = amdgpu_cs_ctx_free(context_handle);
1595 CU_ASSERT_EQUAL(r, 0);
1598 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1600 amdgpu_command_submission_write_linear_helper_with_secure(device_handle,
1605 static void amdgpu_command_submission_sdma_write_linear(void)
1607 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1610 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1612 const int sdma_write_length = 1024 * 1024;
1613 const int pm4_dw = 256;
1614 amdgpu_context_handle context_handle;
1615 amdgpu_bo_handle bo;
1616 amdgpu_bo_handle *resources;
1618 struct amdgpu_cs_ib_info *ib_info;
1619 struct amdgpu_cs_request *ibs_request;
1621 volatile uint32_t *bo_cpu;
1622 int i, j, r, loop, ring_id;
1623 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1624 amdgpu_va_handle va_handle;
1625 struct drm_amdgpu_info_hw_ip hw_ip_info;
1627 pm4 = calloc(pm4_dw, sizeof(*pm4));
1628 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1630 ib_info = calloc(1, sizeof(*ib_info));
1631 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1633 ibs_request = calloc(1, sizeof(*ibs_request));
1634 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1636 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1637 CU_ASSERT_EQUAL(r, 0);
1639 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1640 CU_ASSERT_EQUAL(r, 0);
1642 /* prepare resource */
1643 resources = calloc(1, sizeof(amdgpu_bo_handle));
1644 CU_ASSERT_NOT_EQUAL(resources, NULL);
1646 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1649 /* allocate UC bo for sDMA use */
1650 r = amdgpu_bo_alloc_and_map(device_handle,
1651 sdma_write_length, 4096,
1652 AMDGPU_GEM_DOMAIN_GTT,
1653 gtt_flags[loop], &bo, (void**)&bo_cpu,
1654 &bo_mc, &va_handle);
1655 CU_ASSERT_EQUAL(r, 0);
1658 memset((void*)bo_cpu, 0, sdma_write_length);
1662 /* fulfill PM4: test DMA const fill */
1664 if (ip_type == AMDGPU_HW_IP_DMA) {
1665 if (family_id == AMDGPU_FAMILY_SI) {
1666 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1668 sdma_write_length / 4);
1669 pm4[i++] = 0xfffffffc & bo_mc;
1670 pm4[i++] = 0xdeadbeaf;
1671 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1673 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1674 SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1675 pm4[i++] = 0xffffffff & bo_mc;
1676 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1677 pm4[i++] = 0xdeadbeaf;
1678 if (family_id >= AMDGPU_FAMILY_AI)
1679 pm4[i++] = sdma_write_length - 1;
1681 pm4[i++] = sdma_write_length;
1683 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1684 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1685 if (family_id == AMDGPU_FAMILY_SI) {
1686 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1687 pm4[i++] = 0xdeadbeaf;
1688 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1689 PACKET3_DMA_DATA_SI_DST_SEL(0) |
1690 PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1691 PACKET3_DMA_DATA_SI_CP_SYNC;
1692 pm4[i++] = 0xffffffff & bo_mc;
1693 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1694 pm4[i++] = sdma_write_length;
1696 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1697 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1698 PACKET3_DMA_DATA_DST_SEL(0) |
1699 PACKET3_DMA_DATA_SRC_SEL(2) |
1700 PACKET3_DMA_DATA_CP_SYNC;
1701 pm4[i++] = 0xdeadbeaf;
1703 pm4[i++] = 0xfffffffc & bo_mc;
1704 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1705 pm4[i++] = sdma_write_length;
1709 amdgpu_test_exec_cs_helper(context_handle,
1713 ib_info, ibs_request);
1715 /* verify if SDMA test result meets with expected */
1717 while(i < (sdma_write_length / 4)) {
1718 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1721 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1723 CU_ASSERT_EQUAL(r, 0);
1727 /* clean resources */
1734 r = amdgpu_cs_ctx_free(context_handle);
1735 CU_ASSERT_EQUAL(r, 0);
1738 static void amdgpu_command_submission_sdma_const_fill(void)
1740 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1743 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1745 const int sdma_write_length = 1024;
1746 const int pm4_dw = 256;
1747 amdgpu_context_handle context_handle;
1748 amdgpu_bo_handle bo1, bo2;
1749 amdgpu_bo_handle *resources;
1751 struct amdgpu_cs_ib_info *ib_info;
1752 struct amdgpu_cs_request *ibs_request;
1753 uint64_t bo1_mc, bo2_mc;
1754 volatile unsigned char *bo1_cpu, *bo2_cpu;
1755 int i, j, r, loop1, loop2, ring_id;
1756 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1757 amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1758 struct drm_amdgpu_info_hw_ip hw_ip_info;
1760 pm4 = calloc(pm4_dw, sizeof(*pm4));
1761 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1763 ib_info = calloc(1, sizeof(*ib_info));
1764 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1766 ibs_request = calloc(1, sizeof(*ibs_request));
1767 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1769 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1770 CU_ASSERT_EQUAL(r, 0);
1772 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1773 CU_ASSERT_EQUAL(r, 0);
1775 /* prepare resource */
1776 resources = calloc(2, sizeof(amdgpu_bo_handle));
1777 CU_ASSERT_NOT_EQUAL(resources, NULL);
1779 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1781 /* run 9 circle to test all mapping combination */
1784 /* allocate UC bo1for sDMA use */
1785 r = amdgpu_bo_alloc_and_map(device_handle,
1786 sdma_write_length, 4096,
1787 AMDGPU_GEM_DOMAIN_GTT,
1788 gtt_flags[loop1], &bo1,
1789 (void**)&bo1_cpu, &bo1_mc,
1791 CU_ASSERT_EQUAL(r, 0);
1794 memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1796 /* allocate UC bo2 for sDMA use */
1797 r = amdgpu_bo_alloc_and_map(device_handle,
1798 sdma_write_length, 4096,
1799 AMDGPU_GEM_DOMAIN_GTT,
1800 gtt_flags[loop2], &bo2,
1801 (void**)&bo2_cpu, &bo2_mc,
1803 CU_ASSERT_EQUAL(r, 0);
1806 memset((void*)bo2_cpu, 0, sdma_write_length);
1811 /* fulfill PM4: test DMA copy linear */
1813 if (ip_type == AMDGPU_HW_IP_DMA) {
1814 if (family_id == AMDGPU_FAMILY_SI) {
1815 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1818 pm4[i++] = 0xffffffff & bo2_mc;
1819 pm4[i++] = 0xffffffff & bo1_mc;
1820 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1821 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1823 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1824 SDMA_COPY_SUB_OPCODE_LINEAR,
1826 if (family_id >= AMDGPU_FAMILY_AI)
1827 pm4[i++] = sdma_write_length - 1;
1829 pm4[i++] = sdma_write_length;
1831 pm4[i++] = 0xffffffff & bo1_mc;
1832 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1833 pm4[i++] = 0xffffffff & bo2_mc;
1834 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1836 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1837 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1838 if (family_id == AMDGPU_FAMILY_SI) {
1839 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1840 pm4[i++] = 0xfffffffc & bo1_mc;
1841 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1842 PACKET3_DMA_DATA_SI_DST_SEL(0) |
1843 PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1844 PACKET3_DMA_DATA_SI_CP_SYNC |
1845 (0xffff00000000 & bo1_mc) >> 32;
1846 pm4[i++] = 0xfffffffc & bo2_mc;
1847 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1848 pm4[i++] = sdma_write_length;
1850 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1851 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1852 PACKET3_DMA_DATA_DST_SEL(0) |
1853 PACKET3_DMA_DATA_SRC_SEL(0) |
1854 PACKET3_DMA_DATA_CP_SYNC;
1855 pm4[i++] = 0xfffffffc & bo1_mc;
1856 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1857 pm4[i++] = 0xfffffffc & bo2_mc;
1858 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1859 pm4[i++] = sdma_write_length;
1863 amdgpu_test_exec_cs_helper(context_handle,
1867 ib_info, ibs_request);
1869 /* verify if SDMA test result meets with expected */
1871 while(i < sdma_write_length) {
1872 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1874 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1876 CU_ASSERT_EQUAL(r, 0);
1877 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1879 CU_ASSERT_EQUAL(r, 0);
1885 /* clean resources */
1892 r = amdgpu_cs_ctx_free(context_handle);
1893 CU_ASSERT_EQUAL(r, 0);
1896 static void amdgpu_command_submission_sdma_copy_linear(void)
1898 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
1901 static void amdgpu_command_submission_sdma(void)
1903 amdgpu_command_submission_sdma_write_linear();
1904 amdgpu_command_submission_sdma_const_fill();
1905 amdgpu_command_submission_sdma_copy_linear();
1908 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1910 amdgpu_context_handle context_handle;
1911 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1912 void *ib_result_cpu, *ib_result_ce_cpu;
1913 uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1914 struct amdgpu_cs_request ibs_request[2] = {0};
1915 struct amdgpu_cs_ib_info ib_info[2];
1916 struct amdgpu_cs_fence fence_status[2] = {0};
1919 amdgpu_bo_list_handle bo_list;
1920 amdgpu_va_handle va_handle, va_handle_ce;
1922 int i = 0, ib_cs_num = 2;
1924 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1925 CU_ASSERT_EQUAL(r, 0);
1927 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1928 AMDGPU_GEM_DOMAIN_GTT, 0,
1929 &ib_result_handle, &ib_result_cpu,
1930 &ib_result_mc_address, &va_handle);
1931 CU_ASSERT_EQUAL(r, 0);
1933 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1934 AMDGPU_GEM_DOMAIN_GTT, 0,
1935 &ib_result_ce_handle, &ib_result_ce_cpu,
1936 &ib_result_ce_mc_address, &va_handle_ce);
1937 CU_ASSERT_EQUAL(r, 0);
1939 r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1940 ib_result_ce_handle, &bo_list);
1941 CU_ASSERT_EQUAL(r, 0);
1943 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1945 /* IT_SET_CE_DE_COUNTERS */
1946 ptr = ib_result_ce_cpu;
1947 if (family_id != AMDGPU_FAMILY_SI) {
1948 ptr[i++] = 0xc0008900;
1951 ptr[i++] = 0xc0008400;
1953 ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1954 ib_info[0].size = i;
1955 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1957 /* IT_WAIT_ON_CE_COUNTER */
1958 ptr = ib_result_cpu;
1959 ptr[0] = 0xc0008600;
1960 ptr[1] = 0x00000001;
1961 ib_info[1].ib_mc_address = ib_result_mc_address;
1962 ib_info[1].size = 2;
1964 for (i = 0; i < ib_cs_num; i++) {
1965 ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1966 ibs_request[i].number_of_ibs = 2;
1967 ibs_request[i].ibs = ib_info;
1968 ibs_request[i].resources = bo_list;
1969 ibs_request[i].fence_info.handle = NULL;
1972 r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1974 CU_ASSERT_EQUAL(r, 0);
1976 for (i = 0; i < ib_cs_num; i++) {
1977 fence_status[i].context = context_handle;
1978 fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1979 fence_status[i].fence = ibs_request[i].seq_no;
1982 r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1983 AMDGPU_TIMEOUT_INFINITE,
1985 CU_ASSERT_EQUAL(r, 0);
1987 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1988 ib_result_mc_address, 4096);
1989 CU_ASSERT_EQUAL(r, 0);
1991 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
1992 ib_result_ce_mc_address, 4096);
1993 CU_ASSERT_EQUAL(r, 0);
1995 r = amdgpu_bo_list_destroy(bo_list);
1996 CU_ASSERT_EQUAL(r, 0);
1998 r = amdgpu_cs_ctx_free(context_handle);
1999 CU_ASSERT_EQUAL(r, 0);
2002 static void amdgpu_command_submission_multi_fence(void)
2004 amdgpu_command_submission_multi_fence_wait_all(true);
2005 amdgpu_command_submission_multi_fence_wait_all(false);
2008 static void amdgpu_userptr_test(void)
2011 uint32_t *pm4 = NULL;
2015 int sdma_write_length = 4;
2016 amdgpu_bo_handle handle;
2017 amdgpu_context_handle context_handle;
2018 struct amdgpu_cs_ib_info *ib_info;
2019 struct amdgpu_cs_request *ibs_request;
2020 amdgpu_bo_handle buf_handle;
2021 amdgpu_va_handle va_handle;
2023 pm4 = calloc(pm4_dw, sizeof(*pm4));
2024 CU_ASSERT_NOT_EQUAL(pm4, NULL);
2026 ib_info = calloc(1, sizeof(*ib_info));
2027 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
2029 ibs_request = calloc(1, sizeof(*ibs_request));
2030 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
2032 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2033 CU_ASSERT_EQUAL(r, 0);
2035 posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
2036 CU_ASSERT_NOT_EQUAL(ptr, NULL);
2037 memset(ptr, 0, BUFFER_SIZE);
2039 r = amdgpu_create_bo_from_user_mem(device_handle,
2040 ptr, BUFFER_SIZE, &buf_handle);
2041 CU_ASSERT_EQUAL(r, 0);
2043 r = amdgpu_va_range_alloc(device_handle,
2044 amdgpu_gpu_va_range_general,
2045 BUFFER_SIZE, 1, 0, &bo_mc,
2047 CU_ASSERT_EQUAL(r, 0);
2049 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
2050 CU_ASSERT_EQUAL(r, 0);
2052 handle = buf_handle;
2056 if (family_id == AMDGPU_FAMILY_SI)
2057 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
2060 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
2061 SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2062 pm4[i++] = 0xffffffff & bo_mc;
2063 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
2064 if (family_id >= AMDGPU_FAMILY_AI)
2065 pm4[i++] = sdma_write_length - 1;
2066 else if (family_id != AMDGPU_FAMILY_SI)
2067 pm4[i++] = sdma_write_length;
2069 while (j++ < sdma_write_length)
2070 pm4[i++] = 0xdeadbeaf;
2077 amdgpu_test_exec_cs_helper(context_handle,
2078 AMDGPU_HW_IP_DMA, 0,
2081 ib_info, ibs_request);
2083 while (i < sdma_write_length) {
2084 CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
2090 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
2091 CU_ASSERT_EQUAL(r, 0);
2092 r = amdgpu_va_range_free(va_handle);
2093 CU_ASSERT_EQUAL(r, 0);
2094 r = amdgpu_bo_free(buf_handle);
2095 CU_ASSERT_EQUAL(r, 0);
2098 r = amdgpu_cs_ctx_free(context_handle);
2099 CU_ASSERT_EQUAL(r, 0);
2104 static void amdgpu_sync_dependency_test(void)
2106 amdgpu_context_handle context_handle[2];
2107 amdgpu_bo_handle ib_result_handle;
2108 void *ib_result_cpu;
2109 uint64_t ib_result_mc_address;
2110 struct amdgpu_cs_request ibs_request;
2111 struct amdgpu_cs_ib_info ib_info;
2112 struct amdgpu_cs_fence fence_status;
2115 amdgpu_bo_list_handle bo_list;
2116 amdgpu_va_handle va_handle;
2117 static uint32_t *ptr;
2120 r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
2121 CU_ASSERT_EQUAL(r, 0);
2122 r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
2123 CU_ASSERT_EQUAL(r, 0);
2125 r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
2126 AMDGPU_GEM_DOMAIN_GTT, 0,
2127 &ib_result_handle, &ib_result_cpu,
2128 &ib_result_mc_address, &va_handle);
2129 CU_ASSERT_EQUAL(r, 0);
2131 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
2133 CU_ASSERT_EQUAL(r, 0);
2135 ptr = ib_result_cpu;
2138 memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
2140 /* Dispatch minimal init config and verify it's executed */
2141 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2142 ptr[i++] = 0x80000000;
2143 ptr[i++] = 0x80000000;
2145 ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
2146 ptr[i++] = 0x80000000;
2149 /* Program compute regs */
2150 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2151 ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
2152 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
2153 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
2156 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2157 ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
2159 * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0
2162 FLOAT_MODE = 192 (0xc0)
2171 ptr[i++] = 0x002c0040;
2175 * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
2188 ptr[i++] = 0x00000010;
2192 * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
2196 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2197 ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
2198 ptr[i++] = 0x00000100;
2200 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2201 ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
2202 ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
2203 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2205 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2206 ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
2209 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2210 ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
2217 ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
2221 ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
2225 ptr[i++] = 0xffff1000; /* type3 nop packet */
2227 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2228 ib_info.ib_mc_address = ib_result_mc_address;
2231 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2232 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2233 ibs_request.ring = 0;
2234 ibs_request.number_of_ibs = 1;
2235 ibs_request.ibs = &ib_info;
2236 ibs_request.resources = bo_list;
2237 ibs_request.fence_info.handle = NULL;
2239 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
2240 CU_ASSERT_EQUAL(r, 0);
2241 seq_no = ibs_request.seq_no;
2245 /* Prepare second command with dependency on the first */
2247 ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
2248 ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
2249 ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
2250 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2254 ptr[i++] = 0xffff1000; /* type3 nop packet */
2256 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2257 ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2258 ib_info.size = i - j;
2260 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2261 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2262 ibs_request.ring = 0;
2263 ibs_request.number_of_ibs = 1;
2264 ibs_request.ibs = &ib_info;
2265 ibs_request.resources = bo_list;
2266 ibs_request.fence_info.handle = NULL;
2268 ibs_request.number_of_dependencies = 1;
2270 ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2271 ibs_request.dependencies[0].context = context_handle[1];
2272 ibs_request.dependencies[0].ip_instance = 0;
2273 ibs_request.dependencies[0].ring = 0;
2274 ibs_request.dependencies[0].fence = seq_no;
2277 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2278 CU_ASSERT_EQUAL(r, 0);
2281 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2282 fence_status.context = context_handle[0];
2283 fence_status.ip_type = AMDGPU_HW_IP_GFX;
2284 fence_status.ip_instance = 0;
2285 fence_status.ring = 0;
2286 fence_status.fence = ibs_request.seq_no;
2288 r = amdgpu_cs_query_fence_status(&fence_status,
2289 AMDGPU_TIMEOUT_INFINITE,0, &expired);
2290 CU_ASSERT_EQUAL(r, 0);
2292 /* Expect the second command to wait for shader to complete */
2293 CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2295 r = amdgpu_bo_list_destroy(bo_list);
2296 CU_ASSERT_EQUAL(r, 0);
2298 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2299 ib_result_mc_address, 4096);
2300 CU_ASSERT_EQUAL(r, 0);
2302 r = amdgpu_cs_ctx_free(context_handle[0]);
2303 CU_ASSERT_EQUAL(r, 0);
2304 r = amdgpu_cs_ctx_free(context_handle[1]);
2305 CU_ASSERT_EQUAL(r, 0);
2307 free(ibs_request.dependencies);
2310 static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family)
2312 struct amdgpu_test_shader *shader;
2313 int i, loop = 0x10000;
2316 case AMDGPU_FAMILY_AI:
2317 shader = &memcpy_cs_hang_slow_ai;
2319 case AMDGPU_FAMILY_RV:
2320 shader = &memcpy_cs_hang_slow_rv;
2327 memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2329 for (i = 0; i < loop; i++)
2330 memcpy(ptr + shader->header_length + shader->body_length * i,
2331 shader->shader + shader->header_length,
2332 shader->body_length * sizeof(uint32_t));
2334 memcpy(ptr + shader->header_length + shader->body_length * loop,
2335 shader->shader + shader->header_length + shader->body_length,
2336 shader->foot_length * sizeof(uint32_t));
2341 static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
2344 uint32_t shader_size;
2345 const uint32_t *shader;
2348 case CS_BUFFERCLEAR:
2349 shader = bufferclear_cs_shader_gfx9;
2350 shader_size = sizeof(bufferclear_cs_shader_gfx9);
2353 shader = buffercopy_cs_shader_gfx9;
2354 shader_size = sizeof(buffercopy_cs_shader_gfx9);
2357 shader = memcpy_ps_hang;
2358 shader_size = sizeof(memcpy_ps_hang);
2365 memcpy(ptr, shader, shader_size);
2369 static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type)
2373 /* Write context control and load shadowing register if necessary */
2374 if (ip_type == AMDGPU_HW_IP_GFX) {
2375 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2376 ptr[i++] = 0x80000000;
2377 ptr[i++] = 0x80000000;
2380 /* Issue commands to set default compute state. */
2381 /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
2382 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
2386 /* clear mmCOMPUTE_TMPRING_SIZE */
2387 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2394 static int amdgpu_dispatch_write_cumask(uint32_t *ptr)
2398 /* Issue commands to set cu mask used in current dispatch */
2399 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2400 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2402 ptr[i++] = 0xffffffff;
2403 ptr[i++] = 0xffffffff;
2404 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2405 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2407 ptr[i++] = 0xffffffff;
2408 ptr[i++] = 0xffffffff;
2413 static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr)
2419 /* Writes shader state to HW */
2420 /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
2421 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2423 ptr[i++] = (shader_addr >> 8);
2424 ptr[i++] = (shader_addr >> 40);
2426 for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
2427 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2428 /* - Gfx9ShRegBase */
2429 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
2430 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
2436 static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
2440 amdgpu_context_handle context_handle;
2441 amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
2442 volatile unsigned char *ptr_dst;
2445 uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
2446 amdgpu_va_handle va_dst, va_shader, va_cmd;
2448 int bo_dst_size = 16384;
2449 int bo_shader_size = 4096;
2450 int bo_cmd_size = 4096;
2451 struct amdgpu_cs_request ibs_request = {0};
2452 struct amdgpu_cs_ib_info ib_info= {0};
2453 amdgpu_bo_list_handle bo_list;
2454 struct amdgpu_cs_fence fence_status = {0};
2457 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2458 CU_ASSERT_EQUAL(r, 0);
2460 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2461 AMDGPU_GEM_DOMAIN_GTT, 0,
2462 &bo_cmd, (void **)&ptr_cmd,
2463 &mc_address_cmd, &va_cmd);
2464 CU_ASSERT_EQUAL(r, 0);
2465 memset(ptr_cmd, 0, bo_cmd_size);
2467 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2468 AMDGPU_GEM_DOMAIN_VRAM, 0,
2469 &bo_shader, &ptr_shader,
2470 &mc_address_shader, &va_shader);
2471 CU_ASSERT_EQUAL(r, 0);
2472 memset(ptr_shader, 0, bo_shader_size);
2474 r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR);
2475 CU_ASSERT_EQUAL(r, 0);
2477 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2478 AMDGPU_GEM_DOMAIN_VRAM, 0,
2479 &bo_dst, (void **)&ptr_dst,
2480 &mc_address_dst, &va_dst);
2481 CU_ASSERT_EQUAL(r, 0);
2484 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2486 /* Issue commands to set cu mask used in current dispatch */
2487 i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2489 /* Writes shader state to HW */
2490 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2492 /* Write constant data */
2493 /* Writes the UAV constant data to the SGPRs. */
2494 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2495 ptr_cmd[i++] = 0x240;
2496 ptr_cmd[i++] = mc_address_dst;
2497 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2498 ptr_cmd[i++] = 0x400;
2499 ptr_cmd[i++] = 0x74fac;
2501 /* Sets a range of pixel shader constants */
2502 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2503 ptr_cmd[i++] = 0x244;
2504 ptr_cmd[i++] = 0x22222222;
2505 ptr_cmd[i++] = 0x22222222;
2506 ptr_cmd[i++] = 0x22222222;
2507 ptr_cmd[i++] = 0x22222222;
2509 /* clear mmCOMPUTE_RESOURCE_LIMITS */
2510 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2511 ptr_cmd[i++] = 0x215;
2514 /* dispatch direct command */
2515 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2516 ptr_cmd[i++] = 0x10;
2522 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2524 resources[0] = bo_dst;
2525 resources[1] = bo_shader;
2526 resources[2] = bo_cmd;
2527 r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2528 CU_ASSERT_EQUAL(r, 0);
2530 ib_info.ib_mc_address = mc_address_cmd;
2532 ibs_request.ip_type = ip_type;
2533 ibs_request.ring = ring;
2534 ibs_request.resources = bo_list;
2535 ibs_request.number_of_ibs = 1;
2536 ibs_request.ibs = &ib_info;
2537 ibs_request.fence_info.handle = NULL;
2540 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2541 CU_ASSERT_EQUAL(r, 0);
2543 r = amdgpu_bo_list_destroy(bo_list);
2544 CU_ASSERT_EQUAL(r, 0);
2546 fence_status.ip_type = ip_type;
2547 fence_status.ip_instance = 0;
2548 fence_status.ring = ring;
2549 fence_status.context = context_handle;
2550 fence_status.fence = ibs_request.seq_no;
2552 /* wait for IB accomplished */
2553 r = amdgpu_cs_query_fence_status(&fence_status,
2554 AMDGPU_TIMEOUT_INFINITE,
2556 CU_ASSERT_EQUAL(r, 0);
2557 CU_ASSERT_EQUAL(expired, true);
2559 /* verify if memset test result meets with expected */
2561 while(i < bo_dst_size) {
2562 CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
2565 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2566 CU_ASSERT_EQUAL(r, 0);
2568 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2569 CU_ASSERT_EQUAL(r, 0);
2571 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2572 CU_ASSERT_EQUAL(r, 0);
2574 r = amdgpu_cs_ctx_free(context_handle);
2575 CU_ASSERT_EQUAL(r, 0);
2578 static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
2583 amdgpu_context_handle context_handle;
2584 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2585 volatile unsigned char *ptr_dst;
2587 unsigned char *ptr_src;
2589 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2590 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2592 int bo_dst_size = 16384;
2593 int bo_shader_size = 4096;
2594 int bo_cmd_size = 4096;
2595 struct amdgpu_cs_request ibs_request = {0};
2596 struct amdgpu_cs_ib_info ib_info= {0};
2597 uint32_t expired, hang_state, hangs;
2598 enum cs_type cs_type;
2599 amdgpu_bo_list_handle bo_list;
2600 struct amdgpu_cs_fence fence_status = {0};
2602 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2603 CU_ASSERT_EQUAL(r, 0);
2605 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2606 AMDGPU_GEM_DOMAIN_GTT, 0,
2607 &bo_cmd, (void **)&ptr_cmd,
2608 &mc_address_cmd, &va_cmd);
2609 CU_ASSERT_EQUAL(r, 0);
2610 memset(ptr_cmd, 0, bo_cmd_size);
2612 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2613 AMDGPU_GEM_DOMAIN_VRAM, 0,
2614 &bo_shader, &ptr_shader,
2615 &mc_address_shader, &va_shader);
2616 CU_ASSERT_EQUAL(r, 0);
2617 memset(ptr_shader, 0, bo_shader_size);
2619 cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
2620 r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type);
2621 CU_ASSERT_EQUAL(r, 0);
2623 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2624 AMDGPU_GEM_DOMAIN_VRAM, 0,
2625 &bo_src, (void **)&ptr_src,
2626 &mc_address_src, &va_src);
2627 CU_ASSERT_EQUAL(r, 0);
2629 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2630 AMDGPU_GEM_DOMAIN_VRAM, 0,
2631 &bo_dst, (void **)&ptr_dst,
2632 &mc_address_dst, &va_dst);
2633 CU_ASSERT_EQUAL(r, 0);
2635 memset(ptr_src, 0x55, bo_dst_size);
2638 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2640 /* Issue commands to set cu mask used in current dispatch */
2641 i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2643 /* Writes shader state to HW */
2644 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2646 /* Write constant data */
2647 /* Writes the texture resource constants data to the SGPRs */
2648 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2649 ptr_cmd[i++] = 0x240;
2650 ptr_cmd[i++] = mc_address_src;
2651 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2652 ptr_cmd[i++] = 0x400;
2653 ptr_cmd[i++] = 0x74fac;
2655 /* Writes the UAV constant data to the SGPRs. */
2656 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2657 ptr_cmd[i++] = 0x244;
2658 ptr_cmd[i++] = mc_address_dst;
2659 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2660 ptr_cmd[i++] = 0x400;
2661 ptr_cmd[i++] = 0x74fac;
2663 /* clear mmCOMPUTE_RESOURCE_LIMITS */
2664 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2665 ptr_cmd[i++] = 0x215;
2668 /* dispatch direct command */
2669 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2670 ptr_cmd[i++] = 0x10;
2676 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2678 resources[0] = bo_shader;
2679 resources[1] = bo_src;
2680 resources[2] = bo_dst;
2681 resources[3] = bo_cmd;
2682 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2683 CU_ASSERT_EQUAL(r, 0);
2685 ib_info.ib_mc_address = mc_address_cmd;
2687 ibs_request.ip_type = ip_type;
2688 ibs_request.ring = ring;
2689 ibs_request.resources = bo_list;
2690 ibs_request.number_of_ibs = 1;
2691 ibs_request.ibs = &ib_info;
2692 ibs_request.fence_info.handle = NULL;
2693 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2694 CU_ASSERT_EQUAL(r, 0);
2696 fence_status.ip_type = ip_type;
2697 fence_status.ip_instance = 0;
2698 fence_status.ring = ring;
2699 fence_status.context = context_handle;
2700 fence_status.fence = ibs_request.seq_no;
2702 /* wait for IB accomplished */
2703 r = amdgpu_cs_query_fence_status(&fence_status,
2704 AMDGPU_TIMEOUT_INFINITE,
2708 CU_ASSERT_EQUAL(r, 0);
2709 CU_ASSERT_EQUAL(expired, true);
2711 /* verify if memcpy test result meets with expected */
2713 while(i < bo_dst_size) {
2714 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
2718 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2719 CU_ASSERT_EQUAL(r, 0);
2720 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2723 r = amdgpu_bo_list_destroy(bo_list);
2724 CU_ASSERT_EQUAL(r, 0);
2726 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2727 CU_ASSERT_EQUAL(r, 0);
2728 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2729 CU_ASSERT_EQUAL(r, 0);
2731 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2732 CU_ASSERT_EQUAL(r, 0);
2734 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2735 CU_ASSERT_EQUAL(r, 0);
2737 r = amdgpu_cs_ctx_free(context_handle);
2738 CU_ASSERT_EQUAL(r, 0);
2741 static void amdgpu_compute_dispatch_test(void)
2744 struct drm_amdgpu_info_hw_ip info;
2747 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
2748 CU_ASSERT_EQUAL(r, 0);
2749 if (!info.available_rings)
2750 printf("SKIP ... as there's no compute ring\n");
2752 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2753 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
2754 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0);
2758 static void amdgpu_gfx_dispatch_test(void)
2761 struct drm_amdgpu_info_hw_ip info;
2764 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2765 CU_ASSERT_EQUAL(r, 0);
2766 if (!info.available_rings)
2767 printf("SKIP ... as there's no graphics ring\n");
2769 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2770 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
2771 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0);
2775 void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2778 struct drm_amdgpu_info_hw_ip info;
2781 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2782 CU_ASSERT_EQUAL(r, 0);
2783 if (!info.available_rings)
2784 printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2786 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2787 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2788 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1);
2789 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2793 static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
2794 uint32_t ip_type, uint32_t ring)
2796 amdgpu_context_handle context_handle;
2797 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2798 volatile unsigned char *ptr_dst;
2800 unsigned char *ptr_src;
2802 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2803 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2805 int bo_dst_size = 0x4000000;
2806 int bo_shader_size = 0x400000;
2807 int bo_cmd_size = 4096;
2808 struct amdgpu_cs_request ibs_request = {0};
2809 struct amdgpu_cs_ib_info ib_info= {0};
2810 uint32_t hang_state, hangs, expired;
2811 struct amdgpu_gpu_info gpu_info = {0};
2812 amdgpu_bo_list_handle bo_list;
2813 struct amdgpu_cs_fence fence_status = {0};
2815 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
2816 CU_ASSERT_EQUAL(r, 0);
2818 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2819 CU_ASSERT_EQUAL(r, 0);
2821 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2822 AMDGPU_GEM_DOMAIN_GTT, 0,
2823 &bo_cmd, (void **)&ptr_cmd,
2824 &mc_address_cmd, &va_cmd);
2825 CU_ASSERT_EQUAL(r, 0);
2826 memset(ptr_cmd, 0, bo_cmd_size);
2828 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2829 AMDGPU_GEM_DOMAIN_VRAM, 0,
2830 &bo_shader, &ptr_shader,
2831 &mc_address_shader, &va_shader);
2832 CU_ASSERT_EQUAL(r, 0);
2833 memset(ptr_shader, 0, bo_shader_size);
2835 r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id);
2836 CU_ASSERT_EQUAL(r, 0);
2838 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2839 AMDGPU_GEM_DOMAIN_VRAM, 0,
2840 &bo_src, (void **)&ptr_src,
2841 &mc_address_src, &va_src);
2842 CU_ASSERT_EQUAL(r, 0);
2844 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2845 AMDGPU_GEM_DOMAIN_VRAM, 0,
2846 &bo_dst, (void **)&ptr_dst,
2847 &mc_address_dst, &va_dst);
2848 CU_ASSERT_EQUAL(r, 0);
2850 memset(ptr_src, 0x55, bo_dst_size);
2853 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2855 /* Issue commands to set cu mask used in current dispatch */
2856 i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2858 /* Writes shader state to HW */
2859 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2861 /* Write constant data */
2862 /* Writes the texture resource constants data to the SGPRs */
2863 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2864 ptr_cmd[i++] = 0x240;
2865 ptr_cmd[i++] = mc_address_src;
2866 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2867 ptr_cmd[i++] = 0x400000;
2868 ptr_cmd[i++] = 0x74fac;
2870 /* Writes the UAV constant data to the SGPRs. */
2871 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2872 ptr_cmd[i++] = 0x244;
2873 ptr_cmd[i++] = mc_address_dst;
2874 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2875 ptr_cmd[i++] = 0x400000;
2876 ptr_cmd[i++] = 0x74fac;
2878 /* clear mmCOMPUTE_RESOURCE_LIMITS */
2879 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2880 ptr_cmd[i++] = 0x215;
2883 /* dispatch direct command */
2884 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2885 ptr_cmd[i++] = 0x10000;
2891 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2893 resources[0] = bo_shader;
2894 resources[1] = bo_src;
2895 resources[2] = bo_dst;
2896 resources[3] = bo_cmd;
2897 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2898 CU_ASSERT_EQUAL(r, 0);
2900 ib_info.ib_mc_address = mc_address_cmd;
2902 ibs_request.ip_type = ip_type;
2903 ibs_request.ring = ring;
2904 ibs_request.resources = bo_list;
2905 ibs_request.number_of_ibs = 1;
2906 ibs_request.ibs = &ib_info;
2907 ibs_request.fence_info.handle = NULL;
2908 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2909 CU_ASSERT_EQUAL(r, 0);
2911 fence_status.ip_type = ip_type;
2912 fence_status.ip_instance = 0;
2913 fence_status.ring = ring;
2914 fence_status.context = context_handle;
2915 fence_status.fence = ibs_request.seq_no;
2917 /* wait for IB accomplished */
2918 r = amdgpu_cs_query_fence_status(&fence_status,
2919 AMDGPU_TIMEOUT_INFINITE,
2922 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2923 CU_ASSERT_EQUAL(r, 0);
2924 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2926 r = amdgpu_bo_list_destroy(bo_list);
2927 CU_ASSERT_EQUAL(r, 0);
2929 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2930 CU_ASSERT_EQUAL(r, 0);
2931 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2932 CU_ASSERT_EQUAL(r, 0);
2934 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2935 CU_ASSERT_EQUAL(r, 0);
2937 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2938 CU_ASSERT_EQUAL(r, 0);
2940 r = amdgpu_cs_ctx_free(context_handle);
2941 CU_ASSERT_EQUAL(r, 0);
2944 void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2947 struct drm_amdgpu_info_hw_ip info;
2950 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2951 CU_ASSERT_EQUAL(r, 0);
2952 if (!info.available_rings)
2953 printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2955 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2956 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2957 amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id);
2958 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2962 static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family)
2964 struct amdgpu_test_shader *shader;
2965 int i, loop = 0x40000;
2968 case AMDGPU_FAMILY_AI:
2969 case AMDGPU_FAMILY_RV:
2970 shader = &memcpy_ps_hang_slow_ai;
2977 memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2979 for (i = 0; i < loop; i++)
2980 memcpy(ptr + shader->header_length + shader->body_length * i,
2981 shader->shader + shader->header_length,
2982 shader->body_length * sizeof(uint32_t));
2984 memcpy(ptr + shader->header_length + shader->body_length * loop,
2985 shader->shader + shader->header_length + shader->body_length,
2986 shader->foot_length * sizeof(uint32_t));
2991 static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type)
2994 uint32_t shader_offset= 256;
2995 uint32_t mem_offset, patch_code_offset;
2996 uint32_t shader_size, patchinfo_code_size;
2997 const uint32_t *shader;
2998 const uint32_t *patchinfo_code;
2999 const uint32_t *patchcode_offset;
3003 shader = ps_const_shader_gfx9;
3004 shader_size = sizeof(ps_const_shader_gfx9);
3005 patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
3006 patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
3007 patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
3010 shader = ps_tex_shader_gfx9;
3011 shader_size = sizeof(ps_tex_shader_gfx9);
3012 patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
3013 patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
3014 patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
3017 shader = memcpy_ps_hang;
3018 shader_size = sizeof(memcpy_ps_hang);
3020 memcpy(ptr, shader, shader_size);
3027 /* write main shader program */
3028 for (i = 0 ; i < 10; i++) {
3029 mem_offset = i * shader_offset;
3030 memcpy(ptr + mem_offset, shader, shader_size);
3033 /* overwrite patch codes */
3034 for (i = 0 ; i < 10; i++) {
3035 mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
3036 patch_code_offset = i * patchinfo_code_size;
3037 memcpy(ptr + mem_offset,
3038 patchinfo_code + patch_code_offset,
3039 patchinfo_code_size * sizeof(uint32_t));
3045 /* load RectPosTexFast_VS */
3046 static int amdgpu_draw_load_vs_shader(uint8_t *ptr)
3048 const uint32_t *shader;
3049 uint32_t shader_size;
3051 shader = vs_RectPosTexFast_shader_gfx9;
3052 shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
3054 memcpy(ptr, shader, shader_size);
3059 static int amdgpu_draw_init(uint32_t *ptr)
3062 const uint32_t *preamblecache_ptr;
3063 uint32_t preamblecache_size;
3065 /* Write context control and load shadowing register if necessary */
3066 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
3067 ptr[i++] = 0x80000000;
3068 ptr[i++] = 0x80000000;
3070 preamblecache_ptr = preamblecache_gfx9;
3071 preamblecache_size = sizeof(preamblecache_gfx9);
3073 memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
3074 return i + preamblecache_size/sizeof(uint32_t);
3077 static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
3083 /* setup color buffer */
3085 0xA318 CB_COLOR0_BASE
3086 0xA319 CB_COLOR0_BASE_EXT
3087 0xA31A CB_COLOR0_ATTRIB2
3088 0xA31B CB_COLOR0_VIEW
3089 0xA31C CB_COLOR0_INFO
3090 0xA31D CB_COLOR0_ATTRIB
3091 0xA31E CB_COLOR0_DCC_CONTROL
3092 0xA31F CB_COLOR0_CMASK
3093 0xA320 CB_COLOR0_CMASK_BASE_EXT
3094 0xA321 CB_COLOR0_FMASK
3095 0xA322 CB_COLOR0_FMASK_BASE_EXT
3096 0xA323 CB_COLOR0_CLEAR_WORD0
3097 0xA324 CB_COLOR0_CLEAR_WORD1
3098 0xA325 CB_COLOR0_DCC_BASE
3099 0xA326 CB_COLOR0_DCC_BASE_EXT */
3100 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
3102 ptr[i++] = dst_addr >> 8;
3103 ptr[i++] = dst_addr >> 40;
3104 ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f;
3107 ptr[i++] = 0x10140000;
3110 /* mmCB_MRT0_EPITCH */
3111 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3113 ptr[i++] = hang_slow ? 0x7ff : 0x1f;
3115 /* 0xA32B CB_COLOR1_BASE */
3116 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3120 /* 0xA33A CB_COLOR1_BASE */
3121 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3125 /* SPI_SHADER_COL_FORMAT */
3126 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3130 /* Setup depth buffer */
3132 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3139 static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow)
3142 const uint32_t *cached_cmd_ptr;
3143 uint32_t cached_cmd_size;
3145 /* mmPA_SC_TILE_STEERING_OVERRIDE */
3146 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3150 ptr[i++] = 0xffff1000;
3151 ptr[i++] = 0xc0021000;
3153 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3157 /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
3158 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
3162 /* mmPA_SC_CENTROID_PRIORITY_0 */
3163 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3167 cached_cmd_ptr = cached_cmd_gfx9;
3168 cached_cmd_size = sizeof(cached_cmd_gfx9);
3170 memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
3172 *(ptr + i + 12) = 0x8000800;
3173 i += cached_cmd_size/sizeof(uint32_t);
3178 static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
3180 uint64_t shader_addr,
3185 /* mmPA_CL_VS_OUT_CNTL */
3186 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3190 /* mmSPI_SHADER_PGM_RSRC3_VS */
3191 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3195 /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
3196 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
3198 ptr[i++] = shader_addr >> 8;
3199 ptr[i++] = shader_addr >> 40;
3201 /* mmSPI_SHADER_PGM_RSRC1_VS */
3202 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3205 /* mmSPI_SHADER_PGM_RSRC2_VS */
3206 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3210 /* mmSPI_VS_OUT_CONFIG */
3211 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3215 /* mmSPI_SHADER_POS_FORMAT */
3216 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3220 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3223 ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3224 ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3226 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3229 if (ps_type == PS_CONST) {
3231 } else if (ps_type == PS_TEX) {
3232 ptr[i++] = 0x3f800000;
3233 ptr[i++] = 0x3f800000;
3236 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3243 static int amdgpu_draw_ps_write2hw(uint32_t *ptr,
3245 uint64_t shader_addr)
3248 const uint32_t *sh_registers;
3249 const uint32_t *context_registers;
3250 uint32_t num_sh_reg, num_context_reg;
3252 if (ps_type == PS_CONST) {
3253 sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
3254 context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
3255 num_sh_reg = ps_num_sh_registers_gfx9;
3256 num_context_reg = ps_num_context_registers_gfx9;
3257 } else if (ps_type == PS_TEX) {
3258 sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
3259 context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
3260 num_sh_reg = ps_num_sh_registers_gfx9;
3261 num_context_reg = ps_num_context_registers_gfx9;
3266 /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS
3267 0x2c08 SPI_SHADER_PGM_LO_PS
3268 0x2c09 SPI_SHADER_PGM_HI_PS */
3269 shader_addr += 256 * 9;
3270 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
3273 ptr[i++] = shader_addr >> 8;
3274 ptr[i++] = shader_addr >> 40;
3276 for (j = 0; j < num_sh_reg; j++) {
3277 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3278 ptr[i++] = sh_registers[j * 2] - 0x2c00;
3279 ptr[i++] = sh_registers[j * 2 + 1];
3282 for (j = 0; j < num_context_reg; j++) {
3283 if (context_registers[j * 2] != 0xA1C5) {
3284 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3285 ptr[i++] = context_registers[j * 2] - 0xa000;
3286 ptr[i++] = context_registers[j * 2 + 1];
3289 if (context_registers[j * 2] == 0xA1B4) {
3290 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3299 static int amdgpu_draw_draw(uint32_t *ptr)
3303 /* mmIA_MULTI_VGT_PARAM */
3304 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3305 ptr[i++] = 0x40000258;
3308 /* mmVGT_PRIMITIVE_TYPE */
3309 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3310 ptr[i++] = 0x10000242;
3313 ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
3320 void amdgpu_memset_draw(amdgpu_device_handle device_handle,
3321 amdgpu_bo_handle bo_shader_ps,
3322 amdgpu_bo_handle bo_shader_vs,
3323 uint64_t mc_address_shader_ps,
3324 uint64_t mc_address_shader_vs,
3327 amdgpu_context_handle context_handle;
3328 amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
3329 volatile unsigned char *ptr_dst;
3331 uint64_t mc_address_dst, mc_address_cmd;
3332 amdgpu_va_handle va_dst, va_cmd;
3334 int bo_dst_size = 16384;
3335 int bo_cmd_size = 4096;
3336 struct amdgpu_cs_request ibs_request = {0};
3337 struct amdgpu_cs_ib_info ib_info = {0};
3338 struct amdgpu_cs_fence fence_status = {0};
3340 amdgpu_bo_list_handle bo_list;
3342 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3343 CU_ASSERT_EQUAL(r, 0);
3345 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3346 AMDGPU_GEM_DOMAIN_GTT, 0,
3347 &bo_cmd, (void **)&ptr_cmd,
3348 &mc_address_cmd, &va_cmd);
3349 CU_ASSERT_EQUAL(r, 0);
3350 memset(ptr_cmd, 0, bo_cmd_size);
3352 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3353 AMDGPU_GEM_DOMAIN_VRAM, 0,
3354 &bo_dst, (void **)&ptr_dst,
3355 &mc_address_dst, &va_dst);
3356 CU_ASSERT_EQUAL(r, 0);
3359 i += amdgpu_draw_init(ptr_cmd + i);
3361 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3363 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3365 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0);
3367 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps);
3369 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3371 ptr_cmd[i++] = 0x33333333;
3372 ptr_cmd[i++] = 0x33333333;
3373 ptr_cmd[i++] = 0x33333333;
3374 ptr_cmd[i++] = 0x33333333;
3376 i += amdgpu_draw_draw(ptr_cmd + i);
3379 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3381 resources[0] = bo_dst;
3382 resources[1] = bo_shader_ps;
3383 resources[2] = bo_shader_vs;
3384 resources[3] = bo_cmd;
3385 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
3386 CU_ASSERT_EQUAL(r, 0);
3388 ib_info.ib_mc_address = mc_address_cmd;
3390 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3391 ibs_request.ring = ring_id;
3392 ibs_request.resources = bo_list;
3393 ibs_request.number_of_ibs = 1;
3394 ibs_request.ibs = &ib_info;
3395 ibs_request.fence_info.handle = NULL;
3398 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3399 CU_ASSERT_EQUAL(r, 0);
3401 r = amdgpu_bo_list_destroy(bo_list);
3402 CU_ASSERT_EQUAL(r, 0);
3404 fence_status.ip_type = AMDGPU_HW_IP_GFX;
3405 fence_status.ip_instance = 0;
3406 fence_status.ring = ring_id;
3407 fence_status.context = context_handle;
3408 fence_status.fence = ibs_request.seq_no;
3410 /* wait for IB accomplished */
3411 r = amdgpu_cs_query_fence_status(&fence_status,
3412 AMDGPU_TIMEOUT_INFINITE,
3414 CU_ASSERT_EQUAL(r, 0);
3415 CU_ASSERT_EQUAL(expired, true);
3417 /* verify if memset test result meets with expected */
3419 while(i < bo_dst_size) {
3420 CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
3423 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
3424 CU_ASSERT_EQUAL(r, 0);
3426 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3427 CU_ASSERT_EQUAL(r, 0);
3429 r = amdgpu_cs_ctx_free(context_handle);
3430 CU_ASSERT_EQUAL(r, 0);
3433 static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
3436 amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3437 void *ptr_shader_ps;
3438 void *ptr_shader_vs;
3439 uint64_t mc_address_shader_ps, mc_address_shader_vs;
3440 amdgpu_va_handle va_shader_ps, va_shader_vs;
3442 int bo_shader_size = 4096;
3444 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3445 AMDGPU_GEM_DOMAIN_VRAM, 0,
3446 &bo_shader_ps, &ptr_shader_ps,
3447 &mc_address_shader_ps, &va_shader_ps);
3448 CU_ASSERT_EQUAL(r, 0);
3449 memset(ptr_shader_ps, 0, bo_shader_size);
3451 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3452 AMDGPU_GEM_DOMAIN_VRAM, 0,
3453 &bo_shader_vs, &ptr_shader_vs,
3454 &mc_address_shader_vs, &va_shader_vs);
3455 CU_ASSERT_EQUAL(r, 0);
3456 memset(ptr_shader_vs, 0, bo_shader_size);
3458 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST);
3459 CU_ASSERT_EQUAL(r, 0);
3461 r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3462 CU_ASSERT_EQUAL(r, 0);
3464 amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
3465 mc_address_shader_ps, mc_address_shader_vs, ring);
3467 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3468 CU_ASSERT_EQUAL(r, 0);
3470 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3471 CU_ASSERT_EQUAL(r, 0);
3474 static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
3475 amdgpu_bo_handle bo_shader_ps,
3476 amdgpu_bo_handle bo_shader_vs,
3477 uint64_t mc_address_shader_ps,
3478 uint64_t mc_address_shader_vs,
3479 uint32_t ring, int hang)
3481 amdgpu_context_handle context_handle;
3482 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3483 volatile unsigned char *ptr_dst;
3484 unsigned char *ptr_src;
3486 uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3487 amdgpu_va_handle va_dst, va_src, va_cmd;
3489 int bo_size = 16384;
3490 int bo_cmd_size = 4096;
3491 struct amdgpu_cs_request ibs_request = {0};
3492 struct amdgpu_cs_ib_info ib_info= {0};
3493 uint32_t hang_state, hangs;
3495 amdgpu_bo_list_handle bo_list;
3496 struct amdgpu_cs_fence fence_status = {0};
3498 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3499 CU_ASSERT_EQUAL(r, 0);
3501 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3502 AMDGPU_GEM_DOMAIN_GTT, 0,
3503 &bo_cmd, (void **)&ptr_cmd,
3504 &mc_address_cmd, &va_cmd);
3505 CU_ASSERT_EQUAL(r, 0);
3506 memset(ptr_cmd, 0, bo_cmd_size);
3508 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3509 AMDGPU_GEM_DOMAIN_VRAM, 0,
3510 &bo_src, (void **)&ptr_src,
3511 &mc_address_src, &va_src);
3512 CU_ASSERT_EQUAL(r, 0);
3514 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3515 AMDGPU_GEM_DOMAIN_VRAM, 0,
3516 &bo_dst, (void **)&ptr_dst,
3517 &mc_address_dst, &va_dst);
3518 CU_ASSERT_EQUAL(r, 0);
3520 memset(ptr_src, 0x55, bo_size);
3523 i += amdgpu_draw_init(ptr_cmd + i);
3525 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3527 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3529 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0);
3531 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3533 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3535 ptr_cmd[i++] = mc_address_src >> 8;
3536 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3537 ptr_cmd[i++] = 0x7c01f;
3538 ptr_cmd[i++] = 0x90500fac;
3539 ptr_cmd[i++] = 0x3e000;
3542 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3543 ptr_cmd[i++] = 0x14;
3544 ptr_cmd[i++] = 0x92;
3547 ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3548 ptr_cmd[i++] = 0x191;
3551 i += amdgpu_draw_draw(ptr_cmd + i);
3554 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3556 resources[0] = bo_dst;
3557 resources[1] = bo_src;
3558 resources[2] = bo_shader_ps;
3559 resources[3] = bo_shader_vs;
3560 resources[4] = bo_cmd;
3561 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3562 CU_ASSERT_EQUAL(r, 0);
3564 ib_info.ib_mc_address = mc_address_cmd;
3566 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3567 ibs_request.ring = ring;
3568 ibs_request.resources = bo_list;
3569 ibs_request.number_of_ibs = 1;
3570 ibs_request.ibs = &ib_info;
3571 ibs_request.fence_info.handle = NULL;
3572 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3573 CU_ASSERT_EQUAL(r, 0);
3575 fence_status.ip_type = AMDGPU_HW_IP_GFX;
3576 fence_status.ip_instance = 0;
3577 fence_status.ring = ring;
3578 fence_status.context = context_handle;
3579 fence_status.fence = ibs_request.seq_no;
3581 /* wait for IB accomplished */
3582 r = amdgpu_cs_query_fence_status(&fence_status,
3583 AMDGPU_TIMEOUT_INFINITE,
3586 CU_ASSERT_EQUAL(r, 0);
3587 CU_ASSERT_EQUAL(expired, true);
3589 /* verify if memcpy test result meets with expected */
3591 while(i < bo_size) {
3592 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
3596 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3597 CU_ASSERT_EQUAL(r, 0);
3598 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3601 r = amdgpu_bo_list_destroy(bo_list);
3602 CU_ASSERT_EQUAL(r, 0);
3604 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3605 CU_ASSERT_EQUAL(r, 0);
3606 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3607 CU_ASSERT_EQUAL(r, 0);
3609 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3610 CU_ASSERT_EQUAL(r, 0);
3612 r = amdgpu_cs_ctx_free(context_handle);
3613 CU_ASSERT_EQUAL(r, 0);
3616 void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring,
3619 amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3620 void *ptr_shader_ps;
3621 void *ptr_shader_vs;
3622 uint64_t mc_address_shader_ps, mc_address_shader_vs;
3623 amdgpu_va_handle va_shader_ps, va_shader_vs;
3624 int bo_shader_size = 4096;
3625 enum ps_type ps_type = hang ? PS_HANG : PS_TEX;
3628 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3629 AMDGPU_GEM_DOMAIN_VRAM, 0,
3630 &bo_shader_ps, &ptr_shader_ps,
3631 &mc_address_shader_ps, &va_shader_ps);
3632 CU_ASSERT_EQUAL(r, 0);
3633 memset(ptr_shader_ps, 0, bo_shader_size);
3635 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3636 AMDGPU_GEM_DOMAIN_VRAM, 0,
3637 &bo_shader_vs, &ptr_shader_vs,
3638 &mc_address_shader_vs, &va_shader_vs);
3639 CU_ASSERT_EQUAL(r, 0);
3640 memset(ptr_shader_vs, 0, bo_shader_size);
3642 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type);
3643 CU_ASSERT_EQUAL(r, 0);
3645 r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3646 CU_ASSERT_EQUAL(r, 0);
3648 amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
3649 mc_address_shader_ps, mc_address_shader_vs, ring, hang);
3651 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3652 CU_ASSERT_EQUAL(r, 0);
3654 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3655 CU_ASSERT_EQUAL(r, 0);
3658 static void amdgpu_draw_test(void)
3661 struct drm_amdgpu_info_hw_ip info;
3664 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
3665 CU_ASSERT_EQUAL(r, 0);
3666 if (!info.available_rings)
3667 printf("SKIP ... as there's no graphics ring\n");
3669 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3670 amdgpu_memset_draw_test(device_handle, ring_id);
3671 amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
3675 void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring)
3677 amdgpu_context_handle context_handle;
3678 amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3679 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3680 void *ptr_shader_ps;
3681 void *ptr_shader_vs;
3682 volatile unsigned char *ptr_dst;
3683 unsigned char *ptr_src;
3685 uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3686 uint64_t mc_address_shader_ps, mc_address_shader_vs;
3687 amdgpu_va_handle va_shader_ps, va_shader_vs;
3688 amdgpu_va_handle va_dst, va_src, va_cmd;
3689 struct amdgpu_gpu_info gpu_info = {0};
3691 int bo_size = 0x4000000;
3692 int bo_shader_ps_size = 0x400000;
3693 int bo_shader_vs_size = 4096;
3694 int bo_cmd_size = 4096;
3695 struct amdgpu_cs_request ibs_request = {0};
3696 struct amdgpu_cs_ib_info ib_info= {0};
3697 uint32_t hang_state, hangs, expired;
3698 amdgpu_bo_list_handle bo_list;
3699 struct amdgpu_cs_fence fence_status = {0};
3701 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
3702 CU_ASSERT_EQUAL(r, 0);
3704 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3705 CU_ASSERT_EQUAL(r, 0);
3707 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3708 AMDGPU_GEM_DOMAIN_GTT, 0,
3709 &bo_cmd, (void **)&ptr_cmd,
3710 &mc_address_cmd, &va_cmd);
3711 CU_ASSERT_EQUAL(r, 0);
3712 memset(ptr_cmd, 0, bo_cmd_size);
3714 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096,
3715 AMDGPU_GEM_DOMAIN_VRAM, 0,
3716 &bo_shader_ps, &ptr_shader_ps,
3717 &mc_address_shader_ps, &va_shader_ps);
3718 CU_ASSERT_EQUAL(r, 0);
3719 memset(ptr_shader_ps, 0, bo_shader_ps_size);
3721 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096,
3722 AMDGPU_GEM_DOMAIN_VRAM, 0,
3723 &bo_shader_vs, &ptr_shader_vs,
3724 &mc_address_shader_vs, &va_shader_vs);
3725 CU_ASSERT_EQUAL(r, 0);
3726 memset(ptr_shader_vs, 0, bo_shader_vs_size);
3728 r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id);
3729 CU_ASSERT_EQUAL(r, 0);
3731 r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3732 CU_ASSERT_EQUAL(r, 0);
3734 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3735 AMDGPU_GEM_DOMAIN_VRAM, 0,
3736 &bo_src, (void **)&ptr_src,
3737 &mc_address_src, &va_src);
3738 CU_ASSERT_EQUAL(r, 0);
3740 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3741 AMDGPU_GEM_DOMAIN_VRAM, 0,
3742 &bo_dst, (void **)&ptr_dst,
3743 &mc_address_dst, &va_dst);
3744 CU_ASSERT_EQUAL(r, 0);
3746 memset(ptr_src, 0x55, bo_size);
3749 i += amdgpu_draw_init(ptr_cmd + i);
3751 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1);
3753 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1);
3755 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX,
3756 mc_address_shader_vs, 1);
3758 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3760 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3762 ptr_cmd[i++] = mc_address_src >> 8;
3763 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3764 ptr_cmd[i++] = 0x1ffc7ff;
3765 ptr_cmd[i++] = 0x90500fac;
3766 ptr_cmd[i++] = 0xffe000;
3769 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3770 ptr_cmd[i++] = 0x14;
3771 ptr_cmd[i++] = 0x92;
3774 ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3775 ptr_cmd[i++] = 0x191;
3778 i += amdgpu_draw_draw(ptr_cmd + i);
3781 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3783 resources[0] = bo_dst;
3784 resources[1] = bo_src;
3785 resources[2] = bo_shader_ps;
3786 resources[3] = bo_shader_vs;
3787 resources[4] = bo_cmd;
3788 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3789 CU_ASSERT_EQUAL(r, 0);
3791 ib_info.ib_mc_address = mc_address_cmd;
3793 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3794 ibs_request.ring = ring;
3795 ibs_request.resources = bo_list;
3796 ibs_request.number_of_ibs = 1;
3797 ibs_request.ibs = &ib_info;
3798 ibs_request.fence_info.handle = NULL;
3799 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3800 CU_ASSERT_EQUAL(r, 0);
3802 fence_status.ip_type = AMDGPU_HW_IP_GFX;
3803 fence_status.ip_instance = 0;
3804 fence_status.ring = ring;
3805 fence_status.context = context_handle;
3806 fence_status.fence = ibs_request.seq_no;
3808 /* wait for IB accomplished */
3809 r = amdgpu_cs_query_fence_status(&fence_status,
3810 AMDGPU_TIMEOUT_INFINITE,
3813 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3814 CU_ASSERT_EQUAL(r, 0);
3815 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3817 r = amdgpu_bo_list_destroy(bo_list);
3818 CU_ASSERT_EQUAL(r, 0);
3820 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3821 CU_ASSERT_EQUAL(r, 0);
3822 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3823 CU_ASSERT_EQUAL(r, 0);
3825 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3826 CU_ASSERT_EQUAL(r, 0);
3828 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size);
3829 CU_ASSERT_EQUAL(r, 0);
3830 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size);
3831 CU_ASSERT_EQUAL(r, 0);
3833 r = amdgpu_cs_ctx_free(context_handle);
3834 CU_ASSERT_EQUAL(r, 0);
3837 static void amdgpu_gpu_reset_test(void)
3840 char debugfs_path[256], tmp[10];
3843 amdgpu_context_handle context_handle;
3844 uint32_t hang_state, hangs;
3846 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3847 CU_ASSERT_EQUAL(r, 0);
3849 r = fstat(drm_amdgpu[0], &sbuf);
3850 CU_ASSERT_EQUAL(r, 0);
3852 sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
3853 fd = open(debugfs_path, O_RDONLY);
3856 r = read(fd, tmp, sizeof(tmp)/sizeof(char));
3859 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3860 CU_ASSERT_EQUAL(r, 0);
3861 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3864 r = amdgpu_cs_ctx_free(context_handle);
3865 CU_ASSERT_EQUAL(r, 0);
3867 amdgpu_compute_dispatch_test();
3868 amdgpu_gfx_dispatch_test();