tests/amdgpu: add atomic dma command to verify the secure buffer (v2)
[platform/upstream/libdrm.git] / tests / amdgpu / basic_tests.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22 */
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <unistd.h>
27 #include <sys/types.h>
28 #ifdef MAJOR_IN_SYSMACROS
29 #include <sys/sysmacros.h>
30 #endif
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #if HAVE_ALLOCA_H
34 # include <alloca.h>
35 #endif
36 #include <sys/wait.h>
37
38 #include "CUnit/Basic.h"
39
40 #include "amdgpu_test.h"
41 #include "amdgpu_drm.h"
42 #include "amdgpu_internal.h"
43 #include "util_math.h"
44
45 static  amdgpu_device_handle device_handle;
46 static  uint32_t  major_version;
47 static  uint32_t  minor_version;
48 static  uint32_t  family_id;
49
50 static void amdgpu_query_info_test(void);
51 static void amdgpu_command_submission_gfx(void);
52 static void amdgpu_command_submission_compute(void);
53 static void amdgpu_command_submission_multi_fence(void);
54 static void amdgpu_command_submission_sdma(void);
55 static void amdgpu_userptr_test(void);
56 static void amdgpu_semaphore_test(void);
57 static void amdgpu_sync_dependency_test(void);
58 static void amdgpu_bo_eviction_test(void);
59 static void amdgpu_compute_dispatch_test(void);
60 static void amdgpu_gfx_dispatch_test(void);
61 static void amdgpu_draw_test(void);
62 static void amdgpu_gpu_reset_test(void);
63
64 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
65 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
66 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
67 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
68                                        unsigned ip_type,
69                                        int instance, int pm4_dw, uint32_t *pm4_src,
70                                        int res_cnt, amdgpu_bo_handle *resources,
71                                        struct amdgpu_cs_ib_info *ib_info,
72                                        struct amdgpu_cs_request *ibs_request);
73
74 CU_TestInfo basic_tests[] = {
75         { "Query Info Test",  amdgpu_query_info_test },
76         { "Userptr Test",  amdgpu_userptr_test },
77         { "bo eviction Test",  amdgpu_bo_eviction_test },
78         { "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
79         { "Command submission Test (Compute)", amdgpu_command_submission_compute },
80         { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
81         { "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
82         { "SW semaphore Test",  amdgpu_semaphore_test },
83         { "Sync dependency Test",  amdgpu_sync_dependency_test },
84         { "Dispatch Test (Compute)",  amdgpu_compute_dispatch_test },
85         { "Dispatch Test (GFX)",  amdgpu_gfx_dispatch_test },
86         { "Draw Test",  amdgpu_draw_test },
87         { "GPU reset Test", amdgpu_gpu_reset_test },
88         CU_TEST_INFO_NULL,
89 };
90 #define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
91 #define SDMA_PKT_HEADER_op_offset 0
92 #define SDMA_PKT_HEADER_op_mask   0x000000FF
93 #define SDMA_PKT_HEADER_op_shift  0
94 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
95 #define SDMA_OPCODE_CONSTANT_FILL  11
96 #       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
97         /* 0 = byte fill
98          * 2 = DW fill
99          */
100 #define SDMA_PACKET(op, sub_op, e)      ((((e) & 0xFFFF) << 16) |       \
101                                         (((sub_op) & 0xFF) << 8) |      \
102                                         (((op) & 0xFF) << 0))
103 #define SDMA_OPCODE_WRITE                                 2
104 #       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
105 #       define SDMA_WRTIE_SUB_OPCODE_TILED                1
106
107 #define SDMA_OPCODE_COPY                                  1
108 #       define SDMA_COPY_SUB_OPCODE_LINEAR                0
109
110 #define SDMA_OPCODE_ATOMIC                                10
111 #               define SDMA_ATOMIC_LOOP(x)               ((x) << 0)
112         /* 0 - single_pass_atomic.
113          * 1 - loop_until_compare_satisfied.
114          */
115 #               define SDMA_ATOMIC_TMZ(x)                ((x) << 2)
116                 /* 0 - non-TMZ.
117                  * 1 - TMZ.
118              */
119 #               define SDMA_ATOMIC_OPCODE(x)             ((x) << 9)
120                 /* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
121                  * same as Packet 3
122                  */
123
124 #define GFX_COMPUTE_NOP  0xffff1000
125 #define SDMA_NOP  0x0
126
127 /* PM4 */
128 #define PACKET_TYPE0    0
129 #define PACKET_TYPE1    1
130 #define PACKET_TYPE2    2
131 #define PACKET_TYPE3    3
132
133 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
134 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
135 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
136 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
137 #define PACKET0(reg, n) ((PACKET_TYPE0 << 30) |                         \
138                          ((reg) & 0xFFFF) |                     \
139                          ((n) & 0x3FFF) << 16)
140 #define CP_PACKET2                      0x80000000
141 #define         PACKET2_PAD_SHIFT               0
142 #define         PACKET2_PAD_MASK                (0x3fffffff << 0)
143
144 #define PACKET2(v)      (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
145
146 #define PACKET3(op, n)  ((PACKET_TYPE3 << 30) |                         \
147                          (((op) & 0xFF) << 8) |                         \
148                          ((n) & 0x3FFF) << 16)
149 #define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
150
151 /* Packet 3 types */
152 #define PACKET3_NOP                                     0x10
153
154 #define PACKET3_WRITE_DATA                              0x37
155 #define         WRITE_DATA_DST_SEL(x)                   ((x) << 8)
156                 /* 0 - register
157                  * 1 - memory (sync - via GRBM)
158                  * 2 - gl2
159                  * 3 - gds
160                  * 4 - reserved
161                  * 5 - memory (async - direct)
162                  */
163 #define         WR_ONE_ADDR                             (1 << 16)
164 #define         WR_CONFIRM                              (1 << 20)
165 #define         WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
166                 /* 0 - LRU
167                  * 1 - Stream
168                  */
169 #define         WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
170                 /* 0 - me
171                  * 1 - pfp
172                  * 2 - ce
173                  */
174
175 #define PACKET3_ATOMIC_MEM                              0x1E
176 #define     TC_OP_ATOMIC_CMPSWAP_RTN_32          0x00000008
177 #define     ATOMIC_MEM_COMMAND(x)               ((x) << 8)
178             /* 0 - single_pass_atomic.
179              * 1 - loop_until_compare_satisfied.
180              */
181 #define     ATOMIC_MEM_CACHEPOLICAY(x)          ((x) << 25)
182             /* 0 - lru.
183              * 1 - stream.
184              */
185 #define     ATOMIC_MEM_ENGINESEL(x)             ((x) << 30)
186             /* 0 - micro_engine.
187                          */
188
189 #define PACKET3_DMA_DATA                                0x50
190 /* 1. header
191  * 2. CONTROL
192  * 3. SRC_ADDR_LO or DATA [31:0]
193  * 4. SRC_ADDR_HI [31:0]
194  * 5. DST_ADDR_LO [31:0]
195  * 6. DST_ADDR_HI [7:0]
196  * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
197  */
198 /* CONTROL */
199 #              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
200                 /* 0 - ME
201                  * 1 - PFP
202                  */
203 #              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
204                 /* 0 - LRU
205                  * 1 - Stream
206                  * 2 - Bypass
207                  */
208 #              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
209 #              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
210                 /* 0 - DST_ADDR using DAS
211                  * 1 - GDS
212                  * 3 - DST_ADDR using L2
213                  */
214 #              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
215                 /* 0 - LRU
216                  * 1 - Stream
217                  * 2 - Bypass
218                  */
219 #              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
220 #              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
221                 /* 0 - SRC_ADDR using SAS
222                  * 1 - GDS
223                  * 2 - DATA
224                  * 3 - SRC_ADDR using L2
225                  */
226 #              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
227 /* COMMAND */
228 #              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
229 #              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
230                 /* 0 - none
231                  * 1 - 8 in 16
232                  * 2 - 8 in 32
233                  * 3 - 8 in 64
234                  */
235 #              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
236                 /* 0 - none
237                  * 1 - 8 in 16
238                  * 2 - 8 in 32
239                  * 3 - 8 in 64
240                  */
241 #              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
242                 /* 0 - memory
243                  * 1 - register
244                  */
245 #              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
246                 /* 0 - memory
247                  * 1 - register
248                  */
249 #              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
250 #              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
251 #              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
252
253 #define SDMA_PACKET_SI(op, b, t, s, cnt)        ((((op) & 0xF) << 28) | \
254                                                 (((b) & 0x1) << 26) |           \
255                                                 (((t) & 0x1) << 23) |           \
256                                                 (((s) & 0x1) << 22) |           \
257                                                 (((cnt) & 0xFFFFF) << 0))
258 #define SDMA_OPCODE_COPY_SI     3
259 #define SDMA_OPCODE_CONSTANT_FILL_SI    13
260 #define SDMA_NOP_SI  0xf
261 #define GFX_COMPUTE_NOP_SI 0x80000000
262 #define PACKET3_DMA_DATA_SI     0x41
263 #              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
264                 /* 0 - ME
265                  * 1 - PFP
266                  */
267 #              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
268                 /* 0 - DST_ADDR using DAS
269                  * 1 - GDS
270                  * 3 - DST_ADDR using L2
271                  */
272 #              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
273                 /* 0 - SRC_ADDR using SAS
274                  * 1 - GDS
275                  * 2 - DATA
276                  * 3 - SRC_ADDR using L2
277                  */
278 #              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
279
280
281 #define PKT3_CONTEXT_CONTROL                   0x28
282 #define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
283 #define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
284 #define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
285
286 #define PKT3_CLEAR_STATE                       0x12
287
288 #define PKT3_SET_SH_REG                        0x76
289 #define         PACKET3_SET_SH_REG_START                        0x00002c00
290
291 #define PACKET3_DISPATCH_DIRECT                         0x15
292 #define PACKET3_EVENT_WRITE                             0x46
293 #define PACKET3_ACQUIRE_MEM                             0x58
294 #define PACKET3_SET_CONTEXT_REG                         0x69
295 #define PACKET3_SET_UCONFIG_REG                         0x79
296 #define PACKET3_DRAW_INDEX_AUTO                         0x2D
297 /* gfx 8 */
298 #define mmCOMPUTE_PGM_LO                                                        0x2e0c
299 #define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
300 #define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
301 #define mmCOMPUTE_USER_DATA_0                                                   0x2e40
302 #define mmCOMPUTE_USER_DATA_1                                                   0x2e41
303 #define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
304 #define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
305
306
307
308 #define SWAP_32(num) (((num & 0xff000000) >> 24) | \
309                       ((num & 0x0000ff00) << 8) | \
310                       ((num & 0x00ff0000) >> 8) | \
311                       ((num & 0x000000ff) << 24))
312
313
314 /* Shader code
315  * void main()
316 {
317
318         float x = some_input;
319                 for (unsigned i = 0; i < 1000000; i++)
320         x = sin(x);
321
322         u[0] = 42u;
323 }
324 */
325
326 static  uint32_t shader_bin[] = {
327         SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
328         SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
329         SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
330         SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
331 };
332
333 #define CODE_OFFSET 512
334 #define DATA_OFFSET 1024
335
336 enum cs_type {
337         CS_BUFFERCLEAR,
338         CS_BUFFERCOPY,
339         CS_HANG,
340         CS_HANG_SLOW
341 };
342
343 static const uint32_t bufferclear_cs_shader_gfx9[] = {
344     0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205,
345     0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100,
346     0xBF810000
347 };
348
349 static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
350         {0x2e12, 0x000C0041},   //{ mmCOMPUTE_PGM_RSRC1,          0x000C0041 },
351         {0x2e13, 0x00000090},   //{ mmCOMPUTE_PGM_RSRC2,          0x00000090 },
352         {0x2e07, 0x00000040},   //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
353         {0x2e08, 0x00000001},   //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
354         {0x2e09, 0x00000001},   //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
355 };
356
357 static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
358
359 static const uint32_t buffercopy_cs_shader_gfx9[] = {
360     0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100,
361     0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000
362 };
363
364 static const uint32_t preamblecache_gfx9[] = {
365         0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
366         0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
367         0xc0026900, 0xb4,  0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
368         0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
369         0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
370         0xc0016900, 0x2d5, 0x10000, 0xc0016900,  0x2dc, 0x0,
371         0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
372         0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
373         0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
374         0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
375         0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
376         0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
377         0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
378         0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
379         0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
380         0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
381         0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
382         0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
383         0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
384         0xc0017900, 0x24b, 0x0
385 };
386
387 enum ps_type {
388         PS_CONST,
389         PS_TEX,
390         PS_HANG,
391         PS_HANG_SLOW
392 };
393
394 static const uint32_t ps_const_shader_gfx9[] = {
395     0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
396     0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
397     0xC4001C0F, 0x00000100, 0xBF810000
398 };
399
400 static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
401
402 static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
403     {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
404      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
405      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
406      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
407      { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
408      { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
409      { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
410      { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
411      { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
412      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
413     }
414 };
415
416 static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
417     0x00000004
418 };
419
420 static const uint32_t ps_num_sh_registers_gfx9 = 2;
421
422 static const uint32_t ps_const_sh_registers_gfx9[][2] = {
423     {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
424     {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
425 };
426
427 static const uint32_t ps_num_context_registers_gfx9 = 7;
428
429 static const uint32_t ps_const_context_reg_gfx9[][2] = {
430     {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
431     {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL,       0x00000000 },
432     {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
433     {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
434     {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
435     {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
436     {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004 }
437 };
438
439 static const uint32_t ps_tex_shader_gfx9[] = {
440     0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
441     0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
442     0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
443     0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
444     0x00000100, 0xBF810000
445 };
446
447 static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
448     0x0000000B
449 };
450
451 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
452
453 static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
454     {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
455      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
456      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
457      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
458      { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
459      { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
460      { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
461      { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
462      { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
463      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
464     }
465 };
466
467 static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
468     {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
469     {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
470 };
471
472 static const uint32_t ps_tex_context_reg_gfx9[][2] = {
473     {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
474     {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL,       0x00000001 },
475     {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
476     {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
477     {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
478     {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
479     {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004  }
480 };
481
482 static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
483     0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
484     0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
485     0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
486     0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
487     0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
488     0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
489     0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
490     0xC400020F, 0x05060403, 0xBF810000
491 };
492
493 static const uint32_t cached_cmd_gfx9[] = {
494         0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
495         0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
496         0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
497         0xc0056900, 0x105, 0x0, 0x0,  0x0, 0x0, 0x12,
498         0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
499         0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
500         0xc0026900, 0x292, 0x20, 0x60201b8,
501         0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
502 };
503
504 unsigned int memcpy_ps_hang[] = {
505         0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
506         0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
507         0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
508         0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
509         0xF800180F, 0x03020100, 0xBF810000
510 };
511
512 struct amdgpu_test_shader {
513         uint32_t *shader;
514         uint32_t header_length;
515         uint32_t body_length;
516         uint32_t foot_length;
517 };
518
519 unsigned int memcpy_cs_hang_slow_ai_codes[] = {
520     0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
521     0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
522 };
523
524 struct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
525         memcpy_cs_hang_slow_ai_codes,
526         4,
527         3,
528         1
529 };
530
531 unsigned int memcpy_cs_hang_slow_rv_codes[] = {
532     0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
533     0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
534 };
535
536 struct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
537         memcpy_cs_hang_slow_rv_codes,
538         4,
539         3,
540         1
541 };
542
543 unsigned int memcpy_ps_hang_slow_ai_codes[] = {
544         0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
545         0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
546         0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
547         0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
548         0x03020100, 0xbf810000
549 };
550
551 struct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
552         memcpy_ps_hang_slow_ai_codes,
553         7,
554         2,
555         9
556 };
557
558 int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
559                         unsigned alignment, unsigned heap, uint64_t alloc_flags,
560                         uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
561                         uint64_t *mc_address,
562                         amdgpu_va_handle *va_handle)
563 {
564         struct amdgpu_bo_alloc_request request = {};
565         amdgpu_bo_handle buf_handle;
566         amdgpu_va_handle handle;
567         uint64_t vmc_addr;
568         int r;
569
570         request.alloc_size = size;
571         request.phys_alignment = alignment;
572         request.preferred_heap = heap;
573         request.flags = alloc_flags;
574
575         r = amdgpu_bo_alloc(dev, &request, &buf_handle);
576         if (r)
577                 return r;
578
579         r = amdgpu_va_range_alloc(dev,
580                                   amdgpu_gpu_va_range_general,
581                                   size, alignment, 0, &vmc_addr,
582                                   &handle, 0);
583         if (r)
584                 goto error_va_alloc;
585
586         r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
587                                    AMDGPU_VM_PAGE_READABLE |
588                                    AMDGPU_VM_PAGE_WRITEABLE |
589                                    AMDGPU_VM_PAGE_EXECUTABLE |
590                                    mapping_flags,
591                                    AMDGPU_VA_OP_MAP);
592         if (r)
593                 goto error_va_map;
594
595         r = amdgpu_bo_cpu_map(buf_handle, cpu);
596         if (r)
597                 goto error_cpu_map;
598
599         *bo = buf_handle;
600         *mc_address = vmc_addr;
601         *va_handle = handle;
602
603         return 0;
604
605  error_cpu_map:
606         amdgpu_bo_cpu_unmap(buf_handle);
607
608  error_va_map:
609         amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
610
611  error_va_alloc:
612         amdgpu_bo_free(buf_handle);
613         return r;
614 }
615
616
617
618 CU_BOOL suite_basic_tests_enable(void)
619 {
620         uint32_t asic_id;
621
622         if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
623                                              &minor_version, &device_handle))
624                 return CU_FALSE;
625
626         asic_id = device_handle->info.asic_id;
627
628         if (amdgpu_device_deinitialize(device_handle))
629                 return CU_FALSE;
630
631         /* disable gfx engine basic test cases for Arturus due to no CPG */
632         if (asic_is_arcturus(asic_id)) {
633                 if (amdgpu_set_test_active("Basic Tests",
634                                         "Command submission Test (GFX)",
635                                         CU_FALSE))
636                         fprintf(stderr, "test deactivation failed - %s\n",
637                                 CU_get_error_msg());
638
639                 if (amdgpu_set_test_active("Basic Tests",
640                                         "Command submission Test (Multi-Fence)",
641                                         CU_FALSE))
642                         fprintf(stderr, "test deactivation failed - %s\n",
643                                 CU_get_error_msg());
644
645                 if (amdgpu_set_test_active("Basic Tests",
646                                         "Sync dependency Test",
647                                         CU_FALSE))
648                         fprintf(stderr, "test deactivation failed - %s\n",
649                                 CU_get_error_msg());
650         }
651
652         return CU_TRUE;
653 }
654
655 int suite_basic_tests_init(void)
656 {
657         struct amdgpu_gpu_info gpu_info = {0};
658         int r;
659
660         r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
661                                    &minor_version, &device_handle);
662
663         if (r) {
664                 if ((r == -EACCES) && (errno == EACCES))
665                         printf("\n\nError:%s. "
666                                 "Hint:Try to run this test program as root.",
667                                 strerror(errno));
668                 return CUE_SINIT_FAILED;
669         }
670
671         r = amdgpu_query_gpu_info(device_handle, &gpu_info);
672         if (r)
673                 return CUE_SINIT_FAILED;
674
675         family_id = gpu_info.family_id;
676
677         return CUE_SUCCESS;
678 }
679
680 int suite_basic_tests_clean(void)
681 {
682         int r = amdgpu_device_deinitialize(device_handle);
683
684         if (r == 0)
685                 return CUE_SUCCESS;
686         else
687                 return CUE_SCLEAN_FAILED;
688 }
689
690 static void amdgpu_query_info_test(void)
691 {
692         struct amdgpu_gpu_info gpu_info = {0};
693         uint32_t version, feature;
694         int r;
695
696         r = amdgpu_query_gpu_info(device_handle, &gpu_info);
697         CU_ASSERT_EQUAL(r, 0);
698
699         r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
700                                           0, &version, &feature);
701         CU_ASSERT_EQUAL(r, 0);
702 }
703
704 static void amdgpu_command_submission_gfx_separate_ibs(void)
705 {
706         amdgpu_context_handle context_handle;
707         amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
708         void *ib_result_cpu, *ib_result_ce_cpu;
709         uint64_t ib_result_mc_address, ib_result_ce_mc_address;
710         struct amdgpu_cs_request ibs_request = {0};
711         struct amdgpu_cs_ib_info ib_info[2];
712         struct amdgpu_cs_fence fence_status = {0};
713         uint32_t *ptr;
714         uint32_t expired;
715         amdgpu_bo_list_handle bo_list;
716         amdgpu_va_handle va_handle, va_handle_ce;
717         int r, i = 0;
718
719         r = amdgpu_cs_ctx_create(device_handle, &context_handle);
720         CU_ASSERT_EQUAL(r, 0);
721
722         r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
723                                     AMDGPU_GEM_DOMAIN_GTT, 0,
724                                     &ib_result_handle, &ib_result_cpu,
725                                     &ib_result_mc_address, &va_handle);
726         CU_ASSERT_EQUAL(r, 0);
727
728         r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
729                                     AMDGPU_GEM_DOMAIN_GTT, 0,
730                                     &ib_result_ce_handle, &ib_result_ce_cpu,
731                                     &ib_result_ce_mc_address, &va_handle_ce);
732         CU_ASSERT_EQUAL(r, 0);
733
734         r = amdgpu_get_bo_list(device_handle, ib_result_handle,
735                                ib_result_ce_handle, &bo_list);
736         CU_ASSERT_EQUAL(r, 0);
737
738         memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
739
740         /* IT_SET_CE_DE_COUNTERS */
741         ptr = ib_result_ce_cpu;
742         if (family_id != AMDGPU_FAMILY_SI) {
743                 ptr[i++] = 0xc0008900;
744                 ptr[i++] = 0;
745         }
746         ptr[i++] = 0xc0008400;
747         ptr[i++] = 1;
748         ib_info[0].ib_mc_address = ib_result_ce_mc_address;
749         ib_info[0].size = i;
750         ib_info[0].flags = AMDGPU_IB_FLAG_CE;
751
752         /* IT_WAIT_ON_CE_COUNTER */
753         ptr = ib_result_cpu;
754         ptr[0] = 0xc0008600;
755         ptr[1] = 0x00000001;
756         ib_info[1].ib_mc_address = ib_result_mc_address;
757         ib_info[1].size = 2;
758
759         ibs_request.ip_type = AMDGPU_HW_IP_GFX;
760         ibs_request.number_of_ibs = 2;
761         ibs_request.ibs = ib_info;
762         ibs_request.resources = bo_list;
763         ibs_request.fence_info.handle = NULL;
764
765         r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
766
767         CU_ASSERT_EQUAL(r, 0);
768
769         fence_status.context = context_handle;
770         fence_status.ip_type = AMDGPU_HW_IP_GFX;
771         fence_status.ip_instance = 0;
772         fence_status.fence = ibs_request.seq_no;
773
774         r = amdgpu_cs_query_fence_status(&fence_status,
775                                          AMDGPU_TIMEOUT_INFINITE,
776                                          0, &expired);
777         CU_ASSERT_EQUAL(r, 0);
778
779         r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
780                                      ib_result_mc_address, 4096);
781         CU_ASSERT_EQUAL(r, 0);
782
783         r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
784                                      ib_result_ce_mc_address, 4096);
785         CU_ASSERT_EQUAL(r, 0);
786
787         r = amdgpu_bo_list_destroy(bo_list);
788         CU_ASSERT_EQUAL(r, 0);
789
790         r = amdgpu_cs_ctx_free(context_handle);
791         CU_ASSERT_EQUAL(r, 0);
792
793 }
794
795 static void amdgpu_command_submission_gfx_shared_ib(void)
796 {
797         amdgpu_context_handle context_handle;
798         amdgpu_bo_handle ib_result_handle;
799         void *ib_result_cpu;
800         uint64_t ib_result_mc_address;
801         struct amdgpu_cs_request ibs_request = {0};
802         struct amdgpu_cs_ib_info ib_info[2];
803         struct amdgpu_cs_fence fence_status = {0};
804         uint32_t *ptr;
805         uint32_t expired;
806         amdgpu_bo_list_handle bo_list;
807         amdgpu_va_handle va_handle;
808         int r, i = 0;
809
810         r = amdgpu_cs_ctx_create(device_handle, &context_handle);
811         CU_ASSERT_EQUAL(r, 0);
812
813         r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
814                                     AMDGPU_GEM_DOMAIN_GTT, 0,
815                                     &ib_result_handle, &ib_result_cpu,
816                                     &ib_result_mc_address, &va_handle);
817         CU_ASSERT_EQUAL(r, 0);
818
819         r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
820                                &bo_list);
821         CU_ASSERT_EQUAL(r, 0);
822
823         memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
824
825         /* IT_SET_CE_DE_COUNTERS */
826         ptr = ib_result_cpu;
827         if (family_id != AMDGPU_FAMILY_SI) {
828                 ptr[i++] = 0xc0008900;
829                 ptr[i++] = 0;
830         }
831         ptr[i++] = 0xc0008400;
832         ptr[i++] = 1;
833         ib_info[0].ib_mc_address = ib_result_mc_address;
834         ib_info[0].size = i;
835         ib_info[0].flags = AMDGPU_IB_FLAG_CE;
836
837         ptr = (uint32_t *)ib_result_cpu + 4;
838         ptr[0] = 0xc0008600;
839         ptr[1] = 0x00000001;
840         ib_info[1].ib_mc_address = ib_result_mc_address + 16;
841         ib_info[1].size = 2;
842
843         ibs_request.ip_type = AMDGPU_HW_IP_GFX;
844         ibs_request.number_of_ibs = 2;
845         ibs_request.ibs = ib_info;
846         ibs_request.resources = bo_list;
847         ibs_request.fence_info.handle = NULL;
848
849         r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
850
851         CU_ASSERT_EQUAL(r, 0);
852
853         fence_status.context = context_handle;
854         fence_status.ip_type = AMDGPU_HW_IP_GFX;
855         fence_status.ip_instance = 0;
856         fence_status.fence = ibs_request.seq_no;
857
858         r = amdgpu_cs_query_fence_status(&fence_status,
859                                          AMDGPU_TIMEOUT_INFINITE,
860                                          0, &expired);
861         CU_ASSERT_EQUAL(r, 0);
862
863         r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
864                                      ib_result_mc_address, 4096);
865         CU_ASSERT_EQUAL(r, 0);
866
867         r = amdgpu_bo_list_destroy(bo_list);
868         CU_ASSERT_EQUAL(r, 0);
869
870         r = amdgpu_cs_ctx_free(context_handle);
871         CU_ASSERT_EQUAL(r, 0);
872 }
873
874 static void amdgpu_command_submission_gfx_cp_write_data(void)
875 {
876         amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
877 }
878
879 static void amdgpu_command_submission_gfx_cp_const_fill(void)
880 {
881         amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
882 }
883
884 static void amdgpu_command_submission_gfx_cp_copy_data(void)
885 {
886         amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
887 }
888
889 static void amdgpu_bo_eviction_test(void)
890 {
891         const int sdma_write_length = 1024;
892         const int pm4_dw = 256;
893         amdgpu_context_handle context_handle;
894         amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
895         amdgpu_bo_handle *resources;
896         uint32_t *pm4;
897         struct amdgpu_cs_ib_info *ib_info;
898         struct amdgpu_cs_request *ibs_request;
899         uint64_t bo1_mc, bo2_mc;
900         volatile unsigned char *bo1_cpu, *bo2_cpu;
901         int i, j, r, loop1, loop2;
902         uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
903         amdgpu_va_handle bo1_va_handle, bo2_va_handle;
904         struct amdgpu_heap_info vram_info, gtt_info;
905
906         pm4 = calloc(pm4_dw, sizeof(*pm4));
907         CU_ASSERT_NOT_EQUAL(pm4, NULL);
908
909         ib_info = calloc(1, sizeof(*ib_info));
910         CU_ASSERT_NOT_EQUAL(ib_info, NULL);
911
912         ibs_request = calloc(1, sizeof(*ibs_request));
913         CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
914
915         r = amdgpu_cs_ctx_create(device_handle, &context_handle);
916         CU_ASSERT_EQUAL(r, 0);
917
918         /* prepare resource */
919         resources = calloc(4, sizeof(amdgpu_bo_handle));
920         CU_ASSERT_NOT_EQUAL(resources, NULL);
921
922         r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
923                                    0, &vram_info);
924         CU_ASSERT_EQUAL(r, 0);
925
926         r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
927                                  AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
928         CU_ASSERT_EQUAL(r, 0);
929         r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
930                                  AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
931         CU_ASSERT_EQUAL(r, 0);
932
933         r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
934                                    0, &gtt_info);
935         CU_ASSERT_EQUAL(r, 0);
936
937         r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
938                                  AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
939         CU_ASSERT_EQUAL(r, 0);
940         r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
941                                  AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
942         CU_ASSERT_EQUAL(r, 0);
943
944
945
946         loop1 = loop2 = 0;
947         /* run 9 circle to test all mapping combination */
948         while(loop1 < 2) {
949                 while(loop2 < 2) {
950                         /* allocate UC bo1for sDMA use */
951                         r = amdgpu_bo_alloc_and_map(device_handle,
952                                                     sdma_write_length, 4096,
953                                                     AMDGPU_GEM_DOMAIN_GTT,
954                                                     gtt_flags[loop1], &bo1,
955                                                     (void**)&bo1_cpu, &bo1_mc,
956                                                     &bo1_va_handle);
957                         CU_ASSERT_EQUAL(r, 0);
958
959                         /* set bo1 */
960                         memset((void*)bo1_cpu, 0xaa, sdma_write_length);
961
962                         /* allocate UC bo2 for sDMA use */
963                         r = amdgpu_bo_alloc_and_map(device_handle,
964                                                     sdma_write_length, 4096,
965                                                     AMDGPU_GEM_DOMAIN_GTT,
966                                                     gtt_flags[loop2], &bo2,
967                                                     (void**)&bo2_cpu, &bo2_mc,
968                                                     &bo2_va_handle);
969                         CU_ASSERT_EQUAL(r, 0);
970
971                         /* clear bo2 */
972                         memset((void*)bo2_cpu, 0, sdma_write_length);
973
974                         resources[0] = bo1;
975                         resources[1] = bo2;
976                         resources[2] = vram_max[loop2];
977                         resources[3] = gtt_max[loop2];
978
979                         /* fulfill PM4: test DMA copy linear */
980                         i = j = 0;
981                         if (family_id == AMDGPU_FAMILY_SI) {
982                                 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
983                                                           sdma_write_length);
984                                 pm4[i++] = 0xffffffff & bo2_mc;
985                                 pm4[i++] = 0xffffffff & bo1_mc;
986                                 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
987                                 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
988                         } else {
989                                 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
990                                 if (family_id >= AMDGPU_FAMILY_AI)
991                                         pm4[i++] = sdma_write_length - 1;
992                                 else
993                                         pm4[i++] = sdma_write_length;
994                                 pm4[i++] = 0;
995                                 pm4[i++] = 0xffffffff & bo1_mc;
996                                 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
997                                 pm4[i++] = 0xffffffff & bo2_mc;
998                                 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
999                         }
1000
1001                         amdgpu_test_exec_cs_helper(context_handle,
1002                                                    AMDGPU_HW_IP_DMA, 0,
1003                                                    i, pm4,
1004                                                    4, resources,
1005                                                    ib_info, ibs_request);
1006
1007                         /* verify if SDMA test result meets with expected */
1008                         i = 0;
1009                         while(i < sdma_write_length) {
1010                                 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1011                         }
1012                         r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1013                                                      sdma_write_length);
1014                         CU_ASSERT_EQUAL(r, 0);
1015                         r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1016                                                      sdma_write_length);
1017                         CU_ASSERT_EQUAL(r, 0);
1018                         loop2++;
1019                 }
1020                 loop2 = 0;
1021                 loop1++;
1022         }
1023         amdgpu_bo_free(vram_max[0]);
1024         amdgpu_bo_free(vram_max[1]);
1025         amdgpu_bo_free(gtt_max[0]);
1026         amdgpu_bo_free(gtt_max[1]);
1027         /* clean resources */
1028         free(resources);
1029         free(ibs_request);
1030         free(ib_info);
1031         free(pm4);
1032
1033         /* end of test */
1034         r = amdgpu_cs_ctx_free(context_handle);
1035         CU_ASSERT_EQUAL(r, 0);
1036 }
1037
1038
1039 static void amdgpu_command_submission_gfx(void)
1040 {
1041         /* write data using the CP */
1042         amdgpu_command_submission_gfx_cp_write_data();
1043         /* const fill using the CP */
1044         amdgpu_command_submission_gfx_cp_const_fill();
1045         /* copy data using the CP */
1046         amdgpu_command_submission_gfx_cp_copy_data();
1047         /* separate IB buffers for multi-IB submission */
1048         amdgpu_command_submission_gfx_separate_ibs();
1049         /* shared IB buffer for multi-IB submission */
1050         amdgpu_command_submission_gfx_shared_ib();
1051 }
1052
1053 static void amdgpu_semaphore_test(void)
1054 {
1055         amdgpu_context_handle context_handle[2];
1056         amdgpu_semaphore_handle sem;
1057         amdgpu_bo_handle ib_result_handle[2];
1058         void *ib_result_cpu[2];
1059         uint64_t ib_result_mc_address[2];
1060         struct amdgpu_cs_request ibs_request[2] = {0};
1061         struct amdgpu_cs_ib_info ib_info[2] = {0};
1062         struct amdgpu_cs_fence fence_status = {0};
1063         uint32_t *ptr;
1064         uint32_t expired;
1065         uint32_t sdma_nop, gfx_nop;
1066         amdgpu_bo_list_handle bo_list[2];
1067         amdgpu_va_handle va_handle[2];
1068         int r, i;
1069
1070         if (family_id == AMDGPU_FAMILY_SI) {
1071                 sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1072                 gfx_nop = GFX_COMPUTE_NOP_SI;
1073         } else {
1074                 sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1075                 gfx_nop = GFX_COMPUTE_NOP;
1076         }
1077
1078         r = amdgpu_cs_create_semaphore(&sem);
1079         CU_ASSERT_EQUAL(r, 0);
1080         for (i = 0; i < 2; i++) {
1081                 r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
1082                 CU_ASSERT_EQUAL(r, 0);
1083
1084                 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1085                                             AMDGPU_GEM_DOMAIN_GTT, 0,
1086                                             &ib_result_handle[i], &ib_result_cpu[i],
1087                                             &ib_result_mc_address[i], &va_handle[i]);
1088                 CU_ASSERT_EQUAL(r, 0);
1089
1090                 r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
1091                                        NULL, &bo_list[i]);
1092                 CU_ASSERT_EQUAL(r, 0);
1093         }
1094
1095         /* 1. same context different engine */
1096         ptr = ib_result_cpu[0];
1097         ptr[0] = sdma_nop;
1098         ib_info[0].ib_mc_address = ib_result_mc_address[0];
1099         ib_info[0].size = 1;
1100
1101         ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
1102         ibs_request[0].number_of_ibs = 1;
1103         ibs_request[0].ibs = &ib_info[0];
1104         ibs_request[0].resources = bo_list[0];
1105         ibs_request[0].fence_info.handle = NULL;
1106         r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1107         CU_ASSERT_EQUAL(r, 0);
1108         r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
1109         CU_ASSERT_EQUAL(r, 0);
1110
1111         r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
1112         CU_ASSERT_EQUAL(r, 0);
1113         ptr = ib_result_cpu[1];
1114         ptr[0] = gfx_nop;
1115         ib_info[1].ib_mc_address = ib_result_mc_address[1];
1116         ib_info[1].size = 1;
1117
1118         ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
1119         ibs_request[1].number_of_ibs = 1;
1120         ibs_request[1].ibs = &ib_info[1];
1121         ibs_request[1].resources = bo_list[1];
1122         ibs_request[1].fence_info.handle = NULL;
1123
1124         r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
1125         CU_ASSERT_EQUAL(r, 0);
1126
1127         fence_status.context = context_handle[0];
1128         fence_status.ip_type = AMDGPU_HW_IP_GFX;
1129         fence_status.ip_instance = 0;
1130         fence_status.fence = ibs_request[1].seq_no;
1131         r = amdgpu_cs_query_fence_status(&fence_status,
1132                                          500000000, 0, &expired);
1133         CU_ASSERT_EQUAL(r, 0);
1134         CU_ASSERT_EQUAL(expired, true);
1135
1136         /* 2. same engine different context */
1137         ptr = ib_result_cpu[0];
1138         ptr[0] = gfx_nop;
1139         ib_info[0].ib_mc_address = ib_result_mc_address[0];
1140         ib_info[0].size = 1;
1141
1142         ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
1143         ibs_request[0].number_of_ibs = 1;
1144         ibs_request[0].ibs = &ib_info[0];
1145         ibs_request[0].resources = bo_list[0];
1146         ibs_request[0].fence_info.handle = NULL;
1147         r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1148         CU_ASSERT_EQUAL(r, 0);
1149         r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
1150         CU_ASSERT_EQUAL(r, 0);
1151
1152         r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
1153         CU_ASSERT_EQUAL(r, 0);
1154         ptr = ib_result_cpu[1];
1155         ptr[0] = gfx_nop;
1156         ib_info[1].ib_mc_address = ib_result_mc_address[1];
1157         ib_info[1].size = 1;
1158
1159         ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
1160         ibs_request[1].number_of_ibs = 1;
1161         ibs_request[1].ibs = &ib_info[1];
1162         ibs_request[1].resources = bo_list[1];
1163         ibs_request[1].fence_info.handle = NULL;
1164         r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1165
1166         CU_ASSERT_EQUAL(r, 0);
1167
1168         fence_status.context = context_handle[1];
1169         fence_status.ip_type = AMDGPU_HW_IP_GFX;
1170         fence_status.ip_instance = 0;
1171         fence_status.fence = ibs_request[1].seq_no;
1172         r = amdgpu_cs_query_fence_status(&fence_status,
1173                                          500000000, 0, &expired);
1174         CU_ASSERT_EQUAL(r, 0);
1175         CU_ASSERT_EQUAL(expired, true);
1176
1177         for (i = 0; i < 2; i++) {
1178                 r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1179                                              ib_result_mc_address[i], 4096);
1180                 CU_ASSERT_EQUAL(r, 0);
1181
1182                 r = amdgpu_bo_list_destroy(bo_list[i]);
1183                 CU_ASSERT_EQUAL(r, 0);
1184
1185                 r = amdgpu_cs_ctx_free(context_handle[i]);
1186                 CU_ASSERT_EQUAL(r, 0);
1187         }
1188
1189         r = amdgpu_cs_destroy_semaphore(sem);
1190         CU_ASSERT_EQUAL(r, 0);
1191 }
1192
1193 static void amdgpu_command_submission_compute_nop(void)
1194 {
1195         amdgpu_context_handle context_handle;
1196         amdgpu_bo_handle ib_result_handle;
1197         void *ib_result_cpu;
1198         uint64_t ib_result_mc_address;
1199         struct amdgpu_cs_request ibs_request;
1200         struct amdgpu_cs_ib_info ib_info;
1201         struct amdgpu_cs_fence fence_status;
1202         uint32_t *ptr;
1203         uint32_t expired;
1204         int r, instance;
1205         amdgpu_bo_list_handle bo_list;
1206         amdgpu_va_handle va_handle;
1207         struct drm_amdgpu_info_hw_ip info;
1208
1209         r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1210         CU_ASSERT_EQUAL(r, 0);
1211
1212         r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1213         CU_ASSERT_EQUAL(r, 0);
1214
1215         for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1216                 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1217                                             AMDGPU_GEM_DOMAIN_GTT, 0,
1218                                             &ib_result_handle, &ib_result_cpu,
1219                                             &ib_result_mc_address, &va_handle);
1220                 CU_ASSERT_EQUAL(r, 0);
1221
1222                 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1223                                        &bo_list);
1224                 CU_ASSERT_EQUAL(r, 0);
1225
1226                 ptr = ib_result_cpu;
1227                 memset(ptr, 0, 16);
1228                 ptr[0]=PACKET3(PACKET3_NOP, 14);
1229
1230                 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1231                 ib_info.ib_mc_address = ib_result_mc_address;
1232                 ib_info.size = 16;
1233
1234                 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1235                 ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1236                 ibs_request.ring = instance;
1237                 ibs_request.number_of_ibs = 1;
1238                 ibs_request.ibs = &ib_info;
1239                 ibs_request.resources = bo_list;
1240                 ibs_request.fence_info.handle = NULL;
1241
1242                 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1243                 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1244                 CU_ASSERT_EQUAL(r, 0);
1245
1246                 fence_status.context = context_handle;
1247                 fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1248                 fence_status.ip_instance = 0;
1249                 fence_status.ring = instance;
1250                 fence_status.fence = ibs_request.seq_no;
1251
1252                 r = amdgpu_cs_query_fence_status(&fence_status,
1253                                                  AMDGPU_TIMEOUT_INFINITE,
1254                                                  0, &expired);
1255                 CU_ASSERT_EQUAL(r, 0);
1256
1257                 r = amdgpu_bo_list_destroy(bo_list);
1258                 CU_ASSERT_EQUAL(r, 0);
1259
1260                 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1261                                              ib_result_mc_address, 4096);
1262                 CU_ASSERT_EQUAL(r, 0);
1263         }
1264
1265         r = amdgpu_cs_ctx_free(context_handle);
1266         CU_ASSERT_EQUAL(r, 0);
1267 }
1268
1269 static void amdgpu_command_submission_compute_cp_write_data(void)
1270 {
1271         amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1272 }
1273
1274 static void amdgpu_command_submission_compute_cp_const_fill(void)
1275 {
1276         amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1277 }
1278
1279 static void amdgpu_command_submission_compute_cp_copy_data(void)
1280 {
1281         amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1282 }
1283
1284 static void amdgpu_command_submission_compute(void)
1285 {
1286         /* write data using the CP */
1287         amdgpu_command_submission_compute_cp_write_data();
1288         /* const fill using the CP */
1289         amdgpu_command_submission_compute_cp_const_fill();
1290         /* copy data using the CP */
1291         amdgpu_command_submission_compute_cp_copy_data();
1292         /* nop test */
1293         amdgpu_command_submission_compute_nop();
1294 }
1295
1296 /*
1297  * caller need create/release:
1298  * pm4_src, resources, ib_info, and ibs_request
1299  * submit command stream described in ibs_request and wait for this IB accomplished
1300  */
1301 static void
1302 amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,
1303                                amdgpu_context_handle context_handle,
1304                                unsigned ip_type, int instance, int pm4_dw,
1305                                uint32_t *pm4_src, int res_cnt,
1306                                amdgpu_bo_handle *resources,
1307                                struct amdgpu_cs_ib_info *ib_info,
1308                                struct amdgpu_cs_request *ibs_request,
1309                                bool secure)
1310 {
1311         int r;
1312         uint32_t expired;
1313         uint32_t *ring_ptr;
1314         amdgpu_bo_handle ib_result_handle;
1315         void *ib_result_cpu;
1316         uint64_t ib_result_mc_address;
1317         struct amdgpu_cs_fence fence_status = {0};
1318         amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1319         amdgpu_va_handle va_handle;
1320
1321         /* prepare CS */
1322         CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1323         CU_ASSERT_NOT_EQUAL(resources, NULL);
1324         CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1325         CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1326         CU_ASSERT_TRUE(pm4_dw <= 1024);
1327
1328         /* allocate IB */
1329         r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1330                                     AMDGPU_GEM_DOMAIN_GTT, 0,
1331                                     &ib_result_handle, &ib_result_cpu,
1332                                     &ib_result_mc_address, &va_handle);
1333         CU_ASSERT_EQUAL(r, 0);
1334
1335         /* copy PM4 packet to ring from caller */
1336         ring_ptr = ib_result_cpu;
1337         memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1338
1339         ib_info->ib_mc_address = ib_result_mc_address;
1340         ib_info->size = pm4_dw;
1341         if (secure)
1342                 ib_info->flags |= AMDGPU_IB_FLAGS_SECURE;
1343
1344         ibs_request->ip_type = ip_type;
1345         ibs_request->ring = instance;
1346         ibs_request->number_of_ibs = 1;
1347         ibs_request->ibs = ib_info;
1348         ibs_request->fence_info.handle = NULL;
1349
1350         memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1351         all_res[res_cnt] = ib_result_handle;
1352
1353         r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1354                                   NULL, &ibs_request->resources);
1355         CU_ASSERT_EQUAL(r, 0);
1356
1357         CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1358
1359         /* submit CS */
1360         r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1361         CU_ASSERT_EQUAL(r, 0);
1362
1363         r = amdgpu_bo_list_destroy(ibs_request->resources);
1364         CU_ASSERT_EQUAL(r, 0);
1365
1366         fence_status.ip_type = ip_type;
1367         fence_status.ip_instance = 0;
1368         fence_status.ring = ibs_request->ring;
1369         fence_status.context = context_handle;
1370         fence_status.fence = ibs_request->seq_no;
1371
1372         /* wait for IB accomplished */
1373         r = amdgpu_cs_query_fence_status(&fence_status,
1374                                          AMDGPU_TIMEOUT_INFINITE,
1375                                          0, &expired);
1376         CU_ASSERT_EQUAL(r, 0);
1377         CU_ASSERT_EQUAL(expired, true);
1378
1379         r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1380                                      ib_result_mc_address, 4096);
1381         CU_ASSERT_EQUAL(r, 0);
1382 }
1383
1384 static void
1385 amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1386                            unsigned ip_type, int instance, int pm4_dw,
1387                            uint32_t *pm4_src, int res_cnt,
1388                            amdgpu_bo_handle *resources,
1389                            struct amdgpu_cs_ib_info *ib_info,
1390                            struct amdgpu_cs_request *ibs_request)
1391 {
1392         amdgpu_test_exec_cs_helper_raw(device_handle, context_handle,
1393                                        ip_type, instance, pm4_dw, pm4_src,
1394                                        res_cnt, resources, ib_info,
1395                                        ibs_request, false);
1396 }
1397
1398 void
1399 amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
1400                                                           device, unsigned
1401                                                           ip_type, bool secure)
1402 {
1403         const int sdma_write_length = 128;
1404         const int pm4_dw = 256;
1405         amdgpu_context_handle context_handle;
1406         amdgpu_bo_handle bo;
1407         amdgpu_bo_handle *resources;
1408         uint32_t *pm4;
1409         struct amdgpu_cs_ib_info *ib_info;
1410         struct amdgpu_cs_request *ibs_request;
1411         uint64_t bo_mc;
1412         volatile uint32_t *bo_cpu;
1413         uint32_t bo_cpu_origin;
1414         int i, j, r, loop, ring_id;
1415         uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1416         amdgpu_va_handle va_handle;
1417         struct drm_amdgpu_info_hw_ip hw_ip_info;
1418
1419         pm4 = calloc(pm4_dw, sizeof(*pm4));
1420         CU_ASSERT_NOT_EQUAL(pm4, NULL);
1421
1422         ib_info = calloc(1, sizeof(*ib_info));
1423         CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1424
1425         ibs_request = calloc(1, sizeof(*ibs_request));
1426         CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1427
1428         r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info);
1429         CU_ASSERT_EQUAL(r, 0);
1430
1431         for (i = 0; secure && (i < 2); i++)
1432                 gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
1433
1434         r = amdgpu_cs_ctx_create(device, &context_handle);
1435
1436         CU_ASSERT_EQUAL(r, 0);
1437
1438         /* prepare resource */
1439         resources = calloc(1, sizeof(amdgpu_bo_handle));
1440         CU_ASSERT_NOT_EQUAL(resources, NULL);
1441
1442         for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1443                 loop = 0;
1444                 while(loop < 2) {
1445                         /* allocate UC bo for sDMA use */
1446                         r = amdgpu_bo_alloc_and_map(device,
1447                                                     sdma_write_length * sizeof(uint32_t),
1448                                                     4096, AMDGPU_GEM_DOMAIN_GTT,
1449                                                     gtt_flags[loop], &bo, (void**)&bo_cpu,
1450                                                     &bo_mc, &va_handle);
1451                         CU_ASSERT_EQUAL(r, 0);
1452
1453                         /* clear bo */
1454                         memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1455
1456                         resources[0] = bo;
1457
1458                         /* fulfill PM4: test DMA write-linear */
1459                         i = j = 0;
1460                         if (ip_type == AMDGPU_HW_IP_DMA) {
1461                                 if (family_id == AMDGPU_FAMILY_SI)
1462                                         pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1463                                                                   sdma_write_length);
1464                                 else
1465                                         pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1466                                                                SDMA_WRITE_SUB_OPCODE_LINEAR,
1467                                                                secure ? SDMA_ATOMIC_TMZ(1) : 0);
1468                                 pm4[i++] = 0xfffffffc & bo_mc;
1469                                 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1470                                 if (family_id >= AMDGPU_FAMILY_AI)
1471                                         pm4[i++] = sdma_write_length - 1;
1472                                 else if (family_id != AMDGPU_FAMILY_SI)
1473                                         pm4[i++] = sdma_write_length;
1474                                 while(j++ < sdma_write_length)
1475                                         pm4[i++] = 0xdeadbeaf;
1476                         } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1477                                     (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1478                                 pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1479                                 pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1480                                 pm4[i++] = 0xfffffffc & bo_mc;
1481                                 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1482                                 while(j++ < sdma_write_length)
1483                                         pm4[i++] = 0xdeadbeaf;
1484                         }
1485
1486                         amdgpu_test_exec_cs_helper_raw(device, context_handle,
1487                                                        ip_type, ring_id, i, pm4,
1488                                                        1, resources, ib_info,
1489                                                        ibs_request, secure);
1490
1491                         /* verify if SDMA test result meets with expected */
1492                         i = 0;
1493                         if (!secure) {
1494                                 while(i < sdma_write_length) {
1495                                         CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1496                                 }
1497                         } else if (ip_type == AMDGPU_HW_IP_GFX) {
1498                                 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1499                                 pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7);
1500                                 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1501                                  * command, 1-loop_until_compare_satisfied.
1502                                  * single_pass_atomic, 0-lru
1503                                  * engine_sel, 0-micro_engine
1504                                  */
1505                                 pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 |
1506                                                         ATOMIC_MEM_COMMAND(1) |
1507                                                         ATOMIC_MEM_CACHEPOLICAY(0) |
1508                                                         ATOMIC_MEM_ENGINESEL(0));
1509                                 pm4[i++] = 0xfffffffc & bo_mc;
1510                                 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1511                                 pm4[i++] = 0x12345678;
1512                                 pm4[i++] = 0x0;
1513                                 pm4[i++] = 0xdeadbeaf;
1514                                 pm4[i++] = 0x0;
1515                                 pm4[i++] = 0x100;
1516                                 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1517                                                         ip_type, ring_id, i, pm4,
1518                                                         1, resources, ib_info,
1519                                                         ibs_request, true);
1520                         } else if (ip_type == AMDGPU_HW_IP_DMA) {
1521                                 /* restore the bo_cpu to compare */
1522                                 bo_cpu_origin = bo_cpu[0];
1523                                 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1524                                 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1525                                  * loop, 1-loop_until_compare_satisfied.
1526                                  * single_pass_atomic, 0-lru
1527                                  */
1528                                 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1529                                                                0,
1530                                                                SDMA_ATOMIC_LOOP(1) |
1531                                                                SDMA_ATOMIC_TMZ(1) |
1532                                                                SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1533                                 pm4[i++] = 0xfffffffc & bo_mc;
1534                                 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1535                                 pm4[i++] = 0x12345678;
1536                                 pm4[i++] = 0x0;
1537                                 pm4[i++] = 0xdeadbeaf;
1538                                 pm4[i++] = 0x0;
1539                                 pm4[i++] = 0x100;
1540                                 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1541                                                         ip_type, ring_id, i, pm4,
1542                                                         1, resources, ib_info,
1543                                                         ibs_request, true);
1544                                 /* DMA's atomic behavir is unlike GFX
1545                                  * If the comparing data is not equal to destination data,
1546                                  * For GFX, loop again till gfx timeout(system hang).
1547                                  * For DMA, loop again till timer expired and then send interrupt.
1548                                  * So testcase can't use interrupt mechanism.
1549                                  * We take another way to verify. When the comparing data is not
1550                                  * equal to destination data, overwrite the source data to the destination
1551                                  * buffer. Otherwise, original destination data unchanged.
1552                                  * So if the bo_cpu data is overwritten, the result is passed.
1553                                  */
1554                                 CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin);
1555
1556                                 /* compare again for the case of dest_data != cmp_data */
1557                                 i = 0;
1558                                 /* restore again, here dest_data should be */
1559                                 bo_cpu_origin = bo_cpu[0];
1560                                 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1561                                 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1562                                                                0,
1563                                                                SDMA_ATOMIC_LOOP(1) |
1564                                                                SDMA_ATOMIC_TMZ(1) |
1565                                                                SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1566                                 pm4[i++] = 0xfffffffc & bo_mc;
1567                                 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1568                                 pm4[i++] = 0x87654321;
1569                                 pm4[i++] = 0x0;
1570                                 pm4[i++] = 0xdeadbeaf;
1571                                 pm4[i++] = 0x0;
1572                                 pm4[i++] = 0x100;
1573                                 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1574                                                         ip_type, ring_id, i, pm4,
1575                                                         1, resources, ib_info,
1576                                                         ibs_request, true);
1577                                 /* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/
1578                                 CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin);
1579                         }
1580
1581                         r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1582                                                      sdma_write_length * sizeof(uint32_t));
1583                         CU_ASSERT_EQUAL(r, 0);
1584                         loop++;
1585                 }
1586         }
1587         /* clean resources */
1588         free(resources);
1589         free(ibs_request);
1590         free(ib_info);
1591         free(pm4);
1592
1593         /* end of test */
1594         r = amdgpu_cs_ctx_free(context_handle);
1595         CU_ASSERT_EQUAL(r, 0);
1596 }
1597
1598 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1599 {
1600         amdgpu_command_submission_write_linear_helper_with_secure(device_handle,
1601                                                                   ip_type,
1602                                                                   false);
1603 }
1604
1605 static void amdgpu_command_submission_sdma_write_linear(void)
1606 {
1607         amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1608 }
1609
1610 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1611 {
1612         const int sdma_write_length = 1024 * 1024;
1613         const int pm4_dw = 256;
1614         amdgpu_context_handle context_handle;
1615         amdgpu_bo_handle bo;
1616         amdgpu_bo_handle *resources;
1617         uint32_t *pm4;
1618         struct amdgpu_cs_ib_info *ib_info;
1619         struct amdgpu_cs_request *ibs_request;
1620         uint64_t bo_mc;
1621         volatile uint32_t *bo_cpu;
1622         int i, j, r, loop, ring_id;
1623         uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1624         amdgpu_va_handle va_handle;
1625         struct drm_amdgpu_info_hw_ip hw_ip_info;
1626
1627         pm4 = calloc(pm4_dw, sizeof(*pm4));
1628         CU_ASSERT_NOT_EQUAL(pm4, NULL);
1629
1630         ib_info = calloc(1, sizeof(*ib_info));
1631         CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1632
1633         ibs_request = calloc(1, sizeof(*ibs_request));
1634         CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1635
1636         r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1637         CU_ASSERT_EQUAL(r, 0);
1638
1639         r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1640         CU_ASSERT_EQUAL(r, 0);
1641
1642         /* prepare resource */
1643         resources = calloc(1, sizeof(amdgpu_bo_handle));
1644         CU_ASSERT_NOT_EQUAL(resources, NULL);
1645
1646         for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1647                 loop = 0;
1648                 while(loop < 2) {
1649                         /* allocate UC bo for sDMA use */
1650                         r = amdgpu_bo_alloc_and_map(device_handle,
1651                                                     sdma_write_length, 4096,
1652                                                     AMDGPU_GEM_DOMAIN_GTT,
1653                                                     gtt_flags[loop], &bo, (void**)&bo_cpu,
1654                                                     &bo_mc, &va_handle);
1655                         CU_ASSERT_EQUAL(r, 0);
1656
1657                         /* clear bo */
1658                         memset((void*)bo_cpu, 0, sdma_write_length);
1659
1660                         resources[0] = bo;
1661
1662                         /* fulfill PM4: test DMA const fill */
1663                         i = j = 0;
1664                         if (ip_type == AMDGPU_HW_IP_DMA) {
1665                                 if (family_id == AMDGPU_FAMILY_SI) {
1666                                         pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1667                                                                   0, 0, 0,
1668                                                                   sdma_write_length / 4);
1669                                         pm4[i++] = 0xfffffffc & bo_mc;
1670                                         pm4[i++] = 0xdeadbeaf;
1671                                         pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1672                                 } else {
1673                                         pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1674                                                                SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1675                                         pm4[i++] = 0xffffffff & bo_mc;
1676                                         pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1677                                         pm4[i++] = 0xdeadbeaf;
1678                                         if (family_id >= AMDGPU_FAMILY_AI)
1679                                                 pm4[i++] = sdma_write_length - 1;
1680                                         else
1681                                                 pm4[i++] = sdma_write_length;
1682                                 }
1683                         } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1684                                    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1685                                 if (family_id == AMDGPU_FAMILY_SI) {
1686                                         pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1687                                         pm4[i++] = 0xdeadbeaf;
1688                                         pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1689                                                    PACKET3_DMA_DATA_SI_DST_SEL(0) |
1690                                                    PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1691                                                    PACKET3_DMA_DATA_SI_CP_SYNC;
1692                                         pm4[i++] = 0xffffffff & bo_mc;
1693                                         pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1694                                         pm4[i++] = sdma_write_length;
1695                                 } else {
1696                                         pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1697                                         pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1698                                                    PACKET3_DMA_DATA_DST_SEL(0) |
1699                                                    PACKET3_DMA_DATA_SRC_SEL(2) |
1700                                                    PACKET3_DMA_DATA_CP_SYNC;
1701                                         pm4[i++] = 0xdeadbeaf;
1702                                         pm4[i++] = 0;
1703                                         pm4[i++] = 0xfffffffc & bo_mc;
1704                                         pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1705                                         pm4[i++] = sdma_write_length;
1706                                 }
1707                         }
1708
1709                         amdgpu_test_exec_cs_helper(context_handle,
1710                                                    ip_type, ring_id,
1711                                                    i, pm4,
1712                                                    1, resources,
1713                                                    ib_info, ibs_request);
1714
1715                         /* verify if SDMA test result meets with expected */
1716                         i = 0;
1717                         while(i < (sdma_write_length / 4)) {
1718                                 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1719                         }
1720
1721                         r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1722                                                      sdma_write_length);
1723                         CU_ASSERT_EQUAL(r, 0);
1724                         loop++;
1725                 }
1726         }
1727         /* clean resources */
1728         free(resources);
1729         free(ibs_request);
1730         free(ib_info);
1731         free(pm4);
1732
1733         /* end of test */
1734         r = amdgpu_cs_ctx_free(context_handle);
1735         CU_ASSERT_EQUAL(r, 0);
1736 }
1737
1738 static void amdgpu_command_submission_sdma_const_fill(void)
1739 {
1740         amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1741 }
1742
1743 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1744 {
1745         const int sdma_write_length = 1024;
1746         const int pm4_dw = 256;
1747         amdgpu_context_handle context_handle;
1748         amdgpu_bo_handle bo1, bo2;
1749         amdgpu_bo_handle *resources;
1750         uint32_t *pm4;
1751         struct amdgpu_cs_ib_info *ib_info;
1752         struct amdgpu_cs_request *ibs_request;
1753         uint64_t bo1_mc, bo2_mc;
1754         volatile unsigned char *bo1_cpu, *bo2_cpu;
1755         int i, j, r, loop1, loop2, ring_id;
1756         uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1757         amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1758         struct drm_amdgpu_info_hw_ip hw_ip_info;
1759
1760         pm4 = calloc(pm4_dw, sizeof(*pm4));
1761         CU_ASSERT_NOT_EQUAL(pm4, NULL);
1762
1763         ib_info = calloc(1, sizeof(*ib_info));
1764         CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1765
1766         ibs_request = calloc(1, sizeof(*ibs_request));
1767         CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1768
1769         r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1770         CU_ASSERT_EQUAL(r, 0);
1771
1772         r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1773         CU_ASSERT_EQUAL(r, 0);
1774
1775         /* prepare resource */
1776         resources = calloc(2, sizeof(amdgpu_bo_handle));
1777         CU_ASSERT_NOT_EQUAL(resources, NULL);
1778
1779         for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1780                 loop1 = loop2 = 0;
1781                 /* run 9 circle to test all mapping combination */
1782                 while(loop1 < 2) {
1783                         while(loop2 < 2) {
1784                                 /* allocate UC bo1for sDMA use */
1785                                 r = amdgpu_bo_alloc_and_map(device_handle,
1786                                                             sdma_write_length, 4096,
1787                                                             AMDGPU_GEM_DOMAIN_GTT,
1788                                                             gtt_flags[loop1], &bo1,
1789                                                             (void**)&bo1_cpu, &bo1_mc,
1790                                                             &bo1_va_handle);
1791                                 CU_ASSERT_EQUAL(r, 0);
1792
1793                                 /* set bo1 */
1794                                 memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1795
1796                                 /* allocate UC bo2 for sDMA use */
1797                                 r = amdgpu_bo_alloc_and_map(device_handle,
1798                                                             sdma_write_length, 4096,
1799                                                             AMDGPU_GEM_DOMAIN_GTT,
1800                                                             gtt_flags[loop2], &bo2,
1801                                                             (void**)&bo2_cpu, &bo2_mc,
1802                                                             &bo2_va_handle);
1803                                 CU_ASSERT_EQUAL(r, 0);
1804
1805                                 /* clear bo2 */
1806                                 memset((void*)bo2_cpu, 0, sdma_write_length);
1807
1808                                 resources[0] = bo1;
1809                                 resources[1] = bo2;
1810
1811                                 /* fulfill PM4: test DMA copy linear */
1812                                 i = j = 0;
1813                                 if (ip_type == AMDGPU_HW_IP_DMA) {
1814                                         if (family_id == AMDGPU_FAMILY_SI) {
1815                                                 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1816                                                                           0, 0, 0,
1817                                                                           sdma_write_length);
1818                                                 pm4[i++] = 0xffffffff & bo2_mc;
1819                                                 pm4[i++] = 0xffffffff & bo1_mc;
1820                                                 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1821                                                 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1822                                         } else {
1823                                                 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1824                                                                        SDMA_COPY_SUB_OPCODE_LINEAR,
1825                                                                        0);
1826                                                 if (family_id >= AMDGPU_FAMILY_AI)
1827                                                         pm4[i++] = sdma_write_length - 1;
1828                                                 else
1829                                                         pm4[i++] = sdma_write_length;
1830                                                 pm4[i++] = 0;
1831                                                 pm4[i++] = 0xffffffff & bo1_mc;
1832                                                 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1833                                                 pm4[i++] = 0xffffffff & bo2_mc;
1834                                                 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1835                                         }
1836                                 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1837                                            (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1838                                         if (family_id == AMDGPU_FAMILY_SI) {
1839                                                 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1840                                                 pm4[i++] = 0xfffffffc & bo1_mc;
1841                                                 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1842                                                            PACKET3_DMA_DATA_SI_DST_SEL(0) |
1843                                                            PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1844                                                            PACKET3_DMA_DATA_SI_CP_SYNC |
1845                                                            (0xffff00000000 & bo1_mc) >> 32;
1846                                                 pm4[i++] = 0xfffffffc & bo2_mc;
1847                                                 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1848                                                 pm4[i++] = sdma_write_length;
1849                                         } else {
1850                                                 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1851                                                 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1852                                                            PACKET3_DMA_DATA_DST_SEL(0) |
1853                                                            PACKET3_DMA_DATA_SRC_SEL(0) |
1854                                                            PACKET3_DMA_DATA_CP_SYNC;
1855                                                 pm4[i++] = 0xfffffffc & bo1_mc;
1856                                                 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1857                                                 pm4[i++] = 0xfffffffc & bo2_mc;
1858                                                 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1859                                                 pm4[i++] = sdma_write_length;
1860                                         }
1861                                 }
1862
1863                                 amdgpu_test_exec_cs_helper(context_handle,
1864                                                            ip_type, ring_id,
1865                                                            i, pm4,
1866                                                            2, resources,
1867                                                            ib_info, ibs_request);
1868
1869                                 /* verify if SDMA test result meets with expected */
1870                                 i = 0;
1871                                 while(i < sdma_write_length) {
1872                                         CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1873                                 }
1874                                 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1875                                                              sdma_write_length);
1876                                 CU_ASSERT_EQUAL(r, 0);
1877                                 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1878                                                              sdma_write_length);
1879                                 CU_ASSERT_EQUAL(r, 0);
1880                                 loop2++;
1881                         }
1882                         loop1++;
1883                 }
1884         }
1885         /* clean resources */
1886         free(resources);
1887         free(ibs_request);
1888         free(ib_info);
1889         free(pm4);
1890
1891         /* end of test */
1892         r = amdgpu_cs_ctx_free(context_handle);
1893         CU_ASSERT_EQUAL(r, 0);
1894 }
1895
1896 static void amdgpu_command_submission_sdma_copy_linear(void)
1897 {
1898         amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
1899 }
1900
1901 static void amdgpu_command_submission_sdma(void)
1902 {
1903         amdgpu_command_submission_sdma_write_linear();
1904         amdgpu_command_submission_sdma_const_fill();
1905         amdgpu_command_submission_sdma_copy_linear();
1906 }
1907
1908 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1909 {
1910         amdgpu_context_handle context_handle;
1911         amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1912         void *ib_result_cpu, *ib_result_ce_cpu;
1913         uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1914         struct amdgpu_cs_request ibs_request[2] = {0};
1915         struct amdgpu_cs_ib_info ib_info[2];
1916         struct amdgpu_cs_fence fence_status[2] = {0};
1917         uint32_t *ptr;
1918         uint32_t expired;
1919         amdgpu_bo_list_handle bo_list;
1920         amdgpu_va_handle va_handle, va_handle_ce;
1921         int r;
1922         int i = 0, ib_cs_num = 2;
1923
1924         r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1925         CU_ASSERT_EQUAL(r, 0);
1926
1927         r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1928                                     AMDGPU_GEM_DOMAIN_GTT, 0,
1929                                     &ib_result_handle, &ib_result_cpu,
1930                                     &ib_result_mc_address, &va_handle);
1931         CU_ASSERT_EQUAL(r, 0);
1932
1933         r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1934                                     AMDGPU_GEM_DOMAIN_GTT, 0,
1935                                     &ib_result_ce_handle, &ib_result_ce_cpu,
1936                                     &ib_result_ce_mc_address, &va_handle_ce);
1937         CU_ASSERT_EQUAL(r, 0);
1938
1939         r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1940                                ib_result_ce_handle, &bo_list);
1941         CU_ASSERT_EQUAL(r, 0);
1942
1943         memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1944
1945         /* IT_SET_CE_DE_COUNTERS */
1946         ptr = ib_result_ce_cpu;
1947         if (family_id != AMDGPU_FAMILY_SI) {
1948                 ptr[i++] = 0xc0008900;
1949                 ptr[i++] = 0;
1950         }
1951         ptr[i++] = 0xc0008400;
1952         ptr[i++] = 1;
1953         ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1954         ib_info[0].size = i;
1955         ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1956
1957         /* IT_WAIT_ON_CE_COUNTER */
1958         ptr = ib_result_cpu;
1959         ptr[0] = 0xc0008600;
1960         ptr[1] = 0x00000001;
1961         ib_info[1].ib_mc_address = ib_result_mc_address;
1962         ib_info[1].size = 2;
1963
1964         for (i = 0; i < ib_cs_num; i++) {
1965                 ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1966                 ibs_request[i].number_of_ibs = 2;
1967                 ibs_request[i].ibs = ib_info;
1968                 ibs_request[i].resources = bo_list;
1969                 ibs_request[i].fence_info.handle = NULL;
1970         }
1971
1972         r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1973
1974         CU_ASSERT_EQUAL(r, 0);
1975
1976         for (i = 0; i < ib_cs_num; i++) {
1977                 fence_status[i].context = context_handle;
1978                 fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1979                 fence_status[i].fence = ibs_request[i].seq_no;
1980         }
1981
1982         r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1983                                 AMDGPU_TIMEOUT_INFINITE,
1984                                 &expired, NULL);
1985         CU_ASSERT_EQUAL(r, 0);
1986
1987         r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1988                                      ib_result_mc_address, 4096);
1989         CU_ASSERT_EQUAL(r, 0);
1990
1991         r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
1992                                      ib_result_ce_mc_address, 4096);
1993         CU_ASSERT_EQUAL(r, 0);
1994
1995         r = amdgpu_bo_list_destroy(bo_list);
1996         CU_ASSERT_EQUAL(r, 0);
1997
1998         r = amdgpu_cs_ctx_free(context_handle);
1999         CU_ASSERT_EQUAL(r, 0);
2000 }
2001
2002 static void amdgpu_command_submission_multi_fence(void)
2003 {
2004         amdgpu_command_submission_multi_fence_wait_all(true);
2005         amdgpu_command_submission_multi_fence_wait_all(false);
2006 }
2007
2008 static void amdgpu_userptr_test(void)
2009 {
2010         int i, r, j;
2011         uint32_t *pm4 = NULL;
2012         uint64_t bo_mc;
2013         void *ptr = NULL;
2014         int pm4_dw = 256;
2015         int sdma_write_length = 4;
2016         amdgpu_bo_handle handle;
2017         amdgpu_context_handle context_handle;
2018         struct amdgpu_cs_ib_info *ib_info;
2019         struct amdgpu_cs_request *ibs_request;
2020         amdgpu_bo_handle buf_handle;
2021         amdgpu_va_handle va_handle;
2022
2023         pm4 = calloc(pm4_dw, sizeof(*pm4));
2024         CU_ASSERT_NOT_EQUAL(pm4, NULL);
2025
2026         ib_info = calloc(1, sizeof(*ib_info));
2027         CU_ASSERT_NOT_EQUAL(ib_info, NULL);
2028
2029         ibs_request = calloc(1, sizeof(*ibs_request));
2030         CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
2031
2032         r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2033         CU_ASSERT_EQUAL(r, 0);
2034
2035         posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
2036         CU_ASSERT_NOT_EQUAL(ptr, NULL);
2037         memset(ptr, 0, BUFFER_SIZE);
2038
2039         r = amdgpu_create_bo_from_user_mem(device_handle,
2040                                            ptr, BUFFER_SIZE, &buf_handle);
2041         CU_ASSERT_EQUAL(r, 0);
2042
2043         r = amdgpu_va_range_alloc(device_handle,
2044                                   amdgpu_gpu_va_range_general,
2045                                   BUFFER_SIZE, 1, 0, &bo_mc,
2046                                   &va_handle, 0);
2047         CU_ASSERT_EQUAL(r, 0);
2048
2049         r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
2050         CU_ASSERT_EQUAL(r, 0);
2051
2052         handle = buf_handle;
2053
2054         j = i = 0;
2055
2056         if (family_id == AMDGPU_FAMILY_SI)
2057                 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
2058                                 sdma_write_length);
2059         else
2060                 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
2061                                 SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2062         pm4[i++] = 0xffffffff & bo_mc;
2063         pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
2064         if (family_id >= AMDGPU_FAMILY_AI)
2065                 pm4[i++] = sdma_write_length - 1;
2066         else if (family_id != AMDGPU_FAMILY_SI)
2067                 pm4[i++] = sdma_write_length;
2068
2069         while (j++ < sdma_write_length)
2070                 pm4[i++] = 0xdeadbeaf;
2071
2072         if (!fork()) {
2073                 pm4[0] = 0x0;
2074                 exit(0);
2075         }
2076
2077         amdgpu_test_exec_cs_helper(context_handle,
2078                                    AMDGPU_HW_IP_DMA, 0,
2079                                    i, pm4,
2080                                    1, &handle,
2081                                    ib_info, ibs_request);
2082         i = 0;
2083         while (i < sdma_write_length) {
2084                 CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
2085         }
2086         free(ibs_request);
2087         free(ib_info);
2088         free(pm4);
2089
2090         r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
2091         CU_ASSERT_EQUAL(r, 0);
2092         r = amdgpu_va_range_free(va_handle);
2093         CU_ASSERT_EQUAL(r, 0);
2094         r = amdgpu_bo_free(buf_handle);
2095         CU_ASSERT_EQUAL(r, 0);
2096         free(ptr);
2097
2098         r = amdgpu_cs_ctx_free(context_handle);
2099         CU_ASSERT_EQUAL(r, 0);
2100
2101         wait(NULL);
2102 }
2103
2104 static void amdgpu_sync_dependency_test(void)
2105 {
2106         amdgpu_context_handle context_handle[2];
2107         amdgpu_bo_handle ib_result_handle;
2108         void *ib_result_cpu;
2109         uint64_t ib_result_mc_address;
2110         struct amdgpu_cs_request ibs_request;
2111         struct amdgpu_cs_ib_info ib_info;
2112         struct amdgpu_cs_fence fence_status;
2113         uint32_t expired;
2114         int i, j, r;
2115         amdgpu_bo_list_handle bo_list;
2116         amdgpu_va_handle va_handle;
2117         static uint32_t *ptr;
2118         uint64_t seq_no;
2119
2120         r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
2121         CU_ASSERT_EQUAL(r, 0);
2122         r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
2123         CU_ASSERT_EQUAL(r, 0);
2124
2125         r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
2126                         AMDGPU_GEM_DOMAIN_GTT, 0,
2127                                                     &ib_result_handle, &ib_result_cpu,
2128                                                     &ib_result_mc_address, &va_handle);
2129         CU_ASSERT_EQUAL(r, 0);
2130
2131         r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
2132                                &bo_list);
2133         CU_ASSERT_EQUAL(r, 0);
2134
2135         ptr = ib_result_cpu;
2136         i = 0;
2137
2138         memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
2139
2140         /* Dispatch minimal init config and verify it's executed */
2141         ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2142         ptr[i++] = 0x80000000;
2143         ptr[i++] = 0x80000000;
2144
2145         ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
2146         ptr[i++] = 0x80000000;
2147
2148
2149         /* Program compute regs */
2150         ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2151         ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
2152         ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
2153         ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
2154
2155
2156         ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2157         ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
2158         /*
2159          * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
2160                                               SGPRS = 1
2161                                               PRIORITY = 0
2162                                               FLOAT_MODE = 192 (0xc0)
2163                                               PRIV = 0
2164                                               DX10_CLAMP = 1
2165                                               DEBUG_MODE = 0
2166                                               IEEE_MODE = 0
2167                                               BULKY = 0
2168                                               CDBG_USER = 0
2169          *
2170          */
2171         ptr[i++] = 0x002c0040;
2172
2173
2174         /*
2175          * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
2176                                               USER_SGPR = 8
2177                                               TRAP_PRESENT = 0
2178                                               TGID_X_EN = 0
2179                                               TGID_Y_EN = 0
2180                                               TGID_Z_EN = 0
2181                                               TG_SIZE_EN = 0
2182                                               TIDIG_COMP_CNT = 0
2183                                               EXCP_EN_MSB = 0
2184                                               LDS_SIZE = 0
2185                                               EXCP_EN = 0
2186          *
2187          */
2188         ptr[i++] = 0x00000010;
2189
2190
2191 /*
2192  * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
2193                                          WAVESIZE = 0
2194  *
2195  */
2196         ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2197         ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
2198         ptr[i++] = 0x00000100;
2199
2200         ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2201         ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
2202         ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
2203         ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2204
2205         ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2206         ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
2207         ptr[i++] = 0;
2208
2209         ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2210         ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
2211         ptr[i++] = 1;
2212         ptr[i++] = 1;
2213         ptr[i++] = 1;
2214
2215
2216         /* Dispatch */
2217         ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
2218         ptr[i++] = 1;
2219         ptr[i++] = 1;
2220         ptr[i++] = 1;
2221         ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
2222
2223
2224         while (i & 7)
2225                 ptr[i++] =  0xffff1000; /* type3 nop packet */
2226
2227         memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2228         ib_info.ib_mc_address = ib_result_mc_address;
2229         ib_info.size = i;
2230
2231         memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2232         ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2233         ibs_request.ring = 0;
2234         ibs_request.number_of_ibs = 1;
2235         ibs_request.ibs = &ib_info;
2236         ibs_request.resources = bo_list;
2237         ibs_request.fence_info.handle = NULL;
2238
2239         r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
2240         CU_ASSERT_EQUAL(r, 0);
2241         seq_no = ibs_request.seq_no;
2242
2243
2244
2245         /* Prepare second command with dependency on the first */
2246         j = i;
2247         ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
2248         ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
2249         ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
2250         ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2251         ptr[i++] = 99;
2252
2253         while (i & 7)
2254                 ptr[i++] =  0xffff1000; /* type3 nop packet */
2255
2256         memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2257         ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2258         ib_info.size = i - j;
2259
2260         memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2261         ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2262         ibs_request.ring = 0;
2263         ibs_request.number_of_ibs = 1;
2264         ibs_request.ibs = &ib_info;
2265         ibs_request.resources = bo_list;
2266         ibs_request.fence_info.handle = NULL;
2267
2268         ibs_request.number_of_dependencies = 1;
2269
2270         ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2271         ibs_request.dependencies[0].context = context_handle[1];
2272         ibs_request.dependencies[0].ip_instance = 0;
2273         ibs_request.dependencies[0].ring = 0;
2274         ibs_request.dependencies[0].fence = seq_no;
2275
2276
2277         r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2278         CU_ASSERT_EQUAL(r, 0);
2279
2280
2281         memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2282         fence_status.context = context_handle[0];
2283         fence_status.ip_type = AMDGPU_HW_IP_GFX;
2284         fence_status.ip_instance = 0;
2285         fence_status.ring = 0;
2286         fence_status.fence = ibs_request.seq_no;
2287
2288         r = amdgpu_cs_query_fence_status(&fence_status,
2289                        AMDGPU_TIMEOUT_INFINITE,0, &expired);
2290         CU_ASSERT_EQUAL(r, 0);
2291
2292         /* Expect the second command to wait for shader to complete */
2293         CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2294
2295         r = amdgpu_bo_list_destroy(bo_list);
2296         CU_ASSERT_EQUAL(r, 0);
2297
2298         r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2299                                      ib_result_mc_address, 4096);
2300         CU_ASSERT_EQUAL(r, 0);
2301
2302         r = amdgpu_cs_ctx_free(context_handle[0]);
2303         CU_ASSERT_EQUAL(r, 0);
2304         r = amdgpu_cs_ctx_free(context_handle[1]);
2305         CU_ASSERT_EQUAL(r, 0);
2306
2307         free(ibs_request.dependencies);
2308 }
2309
2310 static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family)
2311 {
2312         struct amdgpu_test_shader *shader;
2313         int i, loop = 0x10000;
2314
2315         switch (family) {
2316                 case AMDGPU_FAMILY_AI:
2317                         shader = &memcpy_cs_hang_slow_ai;
2318                         break;
2319                 case AMDGPU_FAMILY_RV:
2320                         shader = &memcpy_cs_hang_slow_rv;
2321                         break;
2322                 default:
2323                         return -1;
2324                         break;
2325         }
2326
2327         memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2328
2329         for (i = 0; i < loop; i++)
2330                 memcpy(ptr + shader->header_length + shader->body_length * i,
2331                         shader->shader + shader->header_length,
2332                         shader->body_length * sizeof(uint32_t));
2333
2334         memcpy(ptr + shader->header_length + shader->body_length * loop,
2335                 shader->shader + shader->header_length + shader->body_length,
2336                 shader->foot_length * sizeof(uint32_t));
2337
2338         return 0;
2339 }
2340
2341 static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
2342                                            int cs_type)
2343 {
2344         uint32_t shader_size;
2345         const uint32_t *shader;
2346
2347         switch (cs_type) {
2348                 case CS_BUFFERCLEAR:
2349                         shader = bufferclear_cs_shader_gfx9;
2350                         shader_size = sizeof(bufferclear_cs_shader_gfx9);
2351                         break;
2352                 case CS_BUFFERCOPY:
2353                         shader = buffercopy_cs_shader_gfx9;
2354                         shader_size = sizeof(buffercopy_cs_shader_gfx9);
2355                         break;
2356                 case CS_HANG:
2357                         shader = memcpy_ps_hang;
2358                         shader_size = sizeof(memcpy_ps_hang);
2359                         break;
2360                 default:
2361                         return -1;
2362                         break;
2363         }
2364
2365         memcpy(ptr, shader, shader_size);
2366         return 0;
2367 }
2368
2369 static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type)
2370 {
2371         int i = 0;
2372
2373         /* Write context control and load shadowing register if necessary */
2374         if (ip_type == AMDGPU_HW_IP_GFX) {
2375                 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2376                 ptr[i++] = 0x80000000;
2377                 ptr[i++] = 0x80000000;
2378         }
2379
2380         /* Issue commands to set default compute state. */
2381         /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
2382         ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
2383         ptr[i++] = 0x204;
2384         i += 3;
2385
2386         /* clear mmCOMPUTE_TMPRING_SIZE */
2387         ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2388         ptr[i++] = 0x218;
2389         ptr[i++] = 0;
2390
2391         return i;
2392 }
2393
2394 static int amdgpu_dispatch_write_cumask(uint32_t *ptr)
2395 {
2396         int i = 0;
2397
2398         /*  Issue commands to set cu mask used in current dispatch */
2399         /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2400         ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2401         ptr[i++] = 0x216;
2402         ptr[i++] = 0xffffffff;
2403         ptr[i++] = 0xffffffff;
2404         /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2405         ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2406         ptr[i++] = 0x219;
2407         ptr[i++] = 0xffffffff;
2408         ptr[i++] = 0xffffffff;
2409
2410         return i;
2411 }
2412
2413 static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr)
2414 {
2415         int i, j;
2416
2417         i = 0;
2418
2419         /* Writes shader state to HW */
2420         /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
2421         ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2422         ptr[i++] = 0x20c;
2423         ptr[i++] = (shader_addr >> 8);
2424         ptr[i++] = (shader_addr >> 40);
2425         /* write sh regs*/
2426         for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
2427                 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2428                 /* - Gfx9ShRegBase */
2429                 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
2430                 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
2431         }
2432
2433         return i;
2434 }
2435
2436 static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
2437                                          uint32_t ip_type,
2438                                          uint32_t ring)
2439 {
2440         amdgpu_context_handle context_handle;
2441         amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
2442         volatile unsigned char *ptr_dst;
2443         void *ptr_shader;
2444         uint32_t *ptr_cmd;
2445         uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
2446         amdgpu_va_handle va_dst, va_shader, va_cmd;
2447         int i, r;
2448         int bo_dst_size = 16384;
2449         int bo_shader_size = 4096;
2450         int bo_cmd_size = 4096;
2451         struct amdgpu_cs_request ibs_request = {0};
2452         struct amdgpu_cs_ib_info ib_info= {0};
2453         amdgpu_bo_list_handle bo_list;
2454         struct amdgpu_cs_fence fence_status = {0};
2455         uint32_t expired;
2456
2457         r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2458         CU_ASSERT_EQUAL(r, 0);
2459
2460         r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2461                                         AMDGPU_GEM_DOMAIN_GTT, 0,
2462                                         &bo_cmd, (void **)&ptr_cmd,
2463                                         &mc_address_cmd, &va_cmd);
2464         CU_ASSERT_EQUAL(r, 0);
2465         memset(ptr_cmd, 0, bo_cmd_size);
2466
2467         r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2468                                         AMDGPU_GEM_DOMAIN_VRAM, 0,
2469                                         &bo_shader, &ptr_shader,
2470                                         &mc_address_shader, &va_shader);
2471         CU_ASSERT_EQUAL(r, 0);
2472         memset(ptr_shader, 0, bo_shader_size);
2473
2474         r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR);
2475         CU_ASSERT_EQUAL(r, 0);
2476
2477         r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2478                                         AMDGPU_GEM_DOMAIN_VRAM, 0,
2479                                         &bo_dst, (void **)&ptr_dst,
2480                                         &mc_address_dst, &va_dst);
2481         CU_ASSERT_EQUAL(r, 0);
2482
2483         i = 0;
2484         i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2485
2486         /*  Issue commands to set cu mask used in current dispatch */
2487         i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2488
2489         /* Writes shader state to HW */
2490         i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2491
2492         /* Write constant data */
2493         /* Writes the UAV constant data to the SGPRs. */
2494         ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2495         ptr_cmd[i++] = 0x240;
2496         ptr_cmd[i++] = mc_address_dst;
2497         ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2498         ptr_cmd[i++] = 0x400;
2499         ptr_cmd[i++] = 0x74fac;
2500
2501         /* Sets a range of pixel shader constants */
2502         ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2503         ptr_cmd[i++] = 0x244;
2504         ptr_cmd[i++] = 0x22222222;
2505         ptr_cmd[i++] = 0x22222222;
2506         ptr_cmd[i++] = 0x22222222;
2507         ptr_cmd[i++] = 0x22222222;
2508
2509         /* clear mmCOMPUTE_RESOURCE_LIMITS */
2510         ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2511         ptr_cmd[i++] = 0x215;
2512         ptr_cmd[i++] = 0;
2513
2514         /* dispatch direct command */
2515         ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2516         ptr_cmd[i++] = 0x10;
2517         ptr_cmd[i++] = 1;
2518         ptr_cmd[i++] = 1;
2519         ptr_cmd[i++] = 1;
2520
2521         while (i & 7)
2522                 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2523
2524         resources[0] = bo_dst;
2525         resources[1] = bo_shader;
2526         resources[2] = bo_cmd;
2527         r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2528         CU_ASSERT_EQUAL(r, 0);
2529
2530         ib_info.ib_mc_address = mc_address_cmd;
2531         ib_info.size = i;
2532         ibs_request.ip_type = ip_type;
2533         ibs_request.ring = ring;
2534         ibs_request.resources = bo_list;
2535         ibs_request.number_of_ibs = 1;
2536         ibs_request.ibs = &ib_info;
2537         ibs_request.fence_info.handle = NULL;
2538
2539         /* submit CS */
2540         r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2541         CU_ASSERT_EQUAL(r, 0);
2542
2543         r = amdgpu_bo_list_destroy(bo_list);
2544         CU_ASSERT_EQUAL(r, 0);
2545
2546         fence_status.ip_type = ip_type;
2547         fence_status.ip_instance = 0;
2548         fence_status.ring = ring;
2549         fence_status.context = context_handle;
2550         fence_status.fence = ibs_request.seq_no;
2551
2552         /* wait for IB accomplished */
2553         r = amdgpu_cs_query_fence_status(&fence_status,
2554                                          AMDGPU_TIMEOUT_INFINITE,
2555                                          0, &expired);
2556         CU_ASSERT_EQUAL(r, 0);
2557         CU_ASSERT_EQUAL(expired, true);
2558
2559         /* verify if memset test result meets with expected */
2560         i = 0;
2561         while(i < bo_dst_size) {
2562                 CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
2563         }
2564
2565         r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2566         CU_ASSERT_EQUAL(r, 0);
2567
2568         r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2569         CU_ASSERT_EQUAL(r, 0);
2570
2571         r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2572         CU_ASSERT_EQUAL(r, 0);
2573
2574         r = amdgpu_cs_ctx_free(context_handle);
2575         CU_ASSERT_EQUAL(r, 0);
2576 }
2577
2578 static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
2579                                         uint32_t ip_type,
2580                                         uint32_t ring,
2581                                         int hang)
2582 {
2583         amdgpu_context_handle context_handle;
2584         amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2585         volatile unsigned char *ptr_dst;
2586         void *ptr_shader;
2587         unsigned char *ptr_src;
2588         uint32_t *ptr_cmd;
2589         uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2590         amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2591         int i, r;
2592         int bo_dst_size = 16384;
2593         int bo_shader_size = 4096;
2594         int bo_cmd_size = 4096;
2595         struct amdgpu_cs_request ibs_request = {0};
2596         struct amdgpu_cs_ib_info ib_info= {0};
2597         uint32_t expired, hang_state, hangs;
2598         enum cs_type cs_type;
2599         amdgpu_bo_list_handle bo_list;
2600         struct amdgpu_cs_fence fence_status = {0};
2601
2602         r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2603         CU_ASSERT_EQUAL(r, 0);
2604
2605         r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2606                                     AMDGPU_GEM_DOMAIN_GTT, 0,
2607                                     &bo_cmd, (void **)&ptr_cmd,
2608                                     &mc_address_cmd, &va_cmd);
2609         CU_ASSERT_EQUAL(r, 0);
2610         memset(ptr_cmd, 0, bo_cmd_size);
2611
2612         r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2613                                         AMDGPU_GEM_DOMAIN_VRAM, 0,
2614                                         &bo_shader, &ptr_shader,
2615                                         &mc_address_shader, &va_shader);
2616         CU_ASSERT_EQUAL(r, 0);
2617         memset(ptr_shader, 0, bo_shader_size);
2618
2619         cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
2620         r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type);
2621         CU_ASSERT_EQUAL(r, 0);
2622
2623         r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2624                                         AMDGPU_GEM_DOMAIN_VRAM, 0,
2625                                         &bo_src, (void **)&ptr_src,
2626                                         &mc_address_src, &va_src);
2627         CU_ASSERT_EQUAL(r, 0);
2628
2629         r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2630                                         AMDGPU_GEM_DOMAIN_VRAM, 0,
2631                                         &bo_dst, (void **)&ptr_dst,
2632                                         &mc_address_dst, &va_dst);
2633         CU_ASSERT_EQUAL(r, 0);
2634
2635         memset(ptr_src, 0x55, bo_dst_size);
2636
2637         i = 0;
2638         i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2639
2640         /*  Issue commands to set cu mask used in current dispatch */
2641         i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2642
2643         /* Writes shader state to HW */
2644         i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2645
2646         /* Write constant data */
2647         /* Writes the texture resource constants data to the SGPRs */
2648         ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2649         ptr_cmd[i++] = 0x240;
2650         ptr_cmd[i++] = mc_address_src;
2651         ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2652         ptr_cmd[i++] = 0x400;
2653         ptr_cmd[i++] = 0x74fac;
2654
2655         /* Writes the UAV constant data to the SGPRs. */
2656         ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2657         ptr_cmd[i++] = 0x244;
2658         ptr_cmd[i++] = mc_address_dst;
2659         ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2660         ptr_cmd[i++] = 0x400;
2661         ptr_cmd[i++] = 0x74fac;
2662
2663         /* clear mmCOMPUTE_RESOURCE_LIMITS */
2664         ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2665         ptr_cmd[i++] = 0x215;
2666         ptr_cmd[i++] = 0;
2667
2668         /* dispatch direct command */
2669         ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2670         ptr_cmd[i++] = 0x10;
2671         ptr_cmd[i++] = 1;
2672         ptr_cmd[i++] = 1;
2673         ptr_cmd[i++] = 1;
2674
2675         while (i & 7)
2676                 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2677
2678         resources[0] = bo_shader;
2679         resources[1] = bo_src;
2680         resources[2] = bo_dst;
2681         resources[3] = bo_cmd;
2682         r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2683         CU_ASSERT_EQUAL(r, 0);
2684
2685         ib_info.ib_mc_address = mc_address_cmd;
2686         ib_info.size = i;
2687         ibs_request.ip_type = ip_type;
2688         ibs_request.ring = ring;
2689         ibs_request.resources = bo_list;
2690         ibs_request.number_of_ibs = 1;
2691         ibs_request.ibs = &ib_info;
2692         ibs_request.fence_info.handle = NULL;
2693         r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2694         CU_ASSERT_EQUAL(r, 0);
2695
2696         fence_status.ip_type = ip_type;
2697         fence_status.ip_instance = 0;
2698         fence_status.ring = ring;
2699         fence_status.context = context_handle;
2700         fence_status.fence = ibs_request.seq_no;
2701
2702         /* wait for IB accomplished */
2703         r = amdgpu_cs_query_fence_status(&fence_status,
2704                                          AMDGPU_TIMEOUT_INFINITE,
2705                                          0, &expired);
2706
2707         if (!hang) {
2708                 CU_ASSERT_EQUAL(r, 0);
2709                 CU_ASSERT_EQUAL(expired, true);
2710
2711                 /* verify if memcpy test result meets with expected */
2712                 i = 0;
2713                 while(i < bo_dst_size) {
2714                         CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
2715                         i++;
2716                 }
2717         } else {
2718                 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2719                 CU_ASSERT_EQUAL(r, 0);
2720                 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2721         }
2722
2723         r = amdgpu_bo_list_destroy(bo_list);
2724         CU_ASSERT_EQUAL(r, 0);
2725
2726         r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2727         CU_ASSERT_EQUAL(r, 0);
2728         r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2729         CU_ASSERT_EQUAL(r, 0);
2730
2731         r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2732         CU_ASSERT_EQUAL(r, 0);
2733
2734         r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2735         CU_ASSERT_EQUAL(r, 0);
2736
2737         r = amdgpu_cs_ctx_free(context_handle);
2738         CU_ASSERT_EQUAL(r, 0);
2739 }
2740
2741 static void amdgpu_compute_dispatch_test(void)
2742 {
2743         int r;
2744         struct drm_amdgpu_info_hw_ip info;
2745         uint32_t ring_id;
2746
2747         r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
2748         CU_ASSERT_EQUAL(r, 0);
2749         if (!info.available_rings)
2750                 printf("SKIP ... as there's no compute ring\n");
2751
2752         for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2753                 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
2754                 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0);
2755         }
2756 }
2757
2758 static void amdgpu_gfx_dispatch_test(void)
2759 {
2760         int r;
2761         struct drm_amdgpu_info_hw_ip info;
2762         uint32_t ring_id;
2763
2764         r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2765         CU_ASSERT_EQUAL(r, 0);
2766         if (!info.available_rings)
2767                 printf("SKIP ... as there's no graphics ring\n");
2768
2769         for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2770                 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
2771                 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0);
2772         }
2773 }
2774
2775 void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2776 {
2777         int r;
2778         struct drm_amdgpu_info_hw_ip info;
2779         uint32_t ring_id;
2780
2781         r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2782         CU_ASSERT_EQUAL(r, 0);
2783         if (!info.available_rings)
2784                 printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2785
2786         for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2787                 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2788                 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1);
2789                 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2790         }
2791 }
2792
2793 static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
2794                                                   uint32_t ip_type, uint32_t ring)
2795 {
2796         amdgpu_context_handle context_handle;
2797         amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2798         volatile unsigned char *ptr_dst;
2799         void *ptr_shader;
2800         unsigned char *ptr_src;
2801         uint32_t *ptr_cmd;
2802         uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2803         amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2804         int i, r;
2805         int bo_dst_size = 0x4000000;
2806         int bo_shader_size = 0x400000;
2807         int bo_cmd_size = 4096;
2808         struct amdgpu_cs_request ibs_request = {0};
2809         struct amdgpu_cs_ib_info ib_info= {0};
2810         uint32_t hang_state, hangs, expired;
2811         struct amdgpu_gpu_info gpu_info = {0};
2812         amdgpu_bo_list_handle bo_list;
2813         struct amdgpu_cs_fence fence_status = {0};
2814
2815         r = amdgpu_query_gpu_info(device_handle, &gpu_info);
2816         CU_ASSERT_EQUAL(r, 0);
2817
2818         r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2819         CU_ASSERT_EQUAL(r, 0);
2820
2821         r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2822                                     AMDGPU_GEM_DOMAIN_GTT, 0,
2823                                     &bo_cmd, (void **)&ptr_cmd,
2824                                     &mc_address_cmd, &va_cmd);
2825         CU_ASSERT_EQUAL(r, 0);
2826         memset(ptr_cmd, 0, bo_cmd_size);
2827
2828         r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2829                                         AMDGPU_GEM_DOMAIN_VRAM, 0,
2830                                         &bo_shader, &ptr_shader,
2831                                         &mc_address_shader, &va_shader);
2832         CU_ASSERT_EQUAL(r, 0);
2833         memset(ptr_shader, 0, bo_shader_size);
2834
2835         r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id);
2836         CU_ASSERT_EQUAL(r, 0);
2837
2838         r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2839                                         AMDGPU_GEM_DOMAIN_VRAM, 0,
2840                                         &bo_src, (void **)&ptr_src,
2841                                         &mc_address_src, &va_src);
2842         CU_ASSERT_EQUAL(r, 0);
2843
2844         r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2845                                         AMDGPU_GEM_DOMAIN_VRAM, 0,
2846                                         &bo_dst, (void **)&ptr_dst,
2847                                         &mc_address_dst, &va_dst);
2848         CU_ASSERT_EQUAL(r, 0);
2849
2850         memset(ptr_src, 0x55, bo_dst_size);
2851
2852         i = 0;
2853         i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2854
2855         /*  Issue commands to set cu mask used in current dispatch */
2856         i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2857
2858         /* Writes shader state to HW */
2859         i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2860
2861         /* Write constant data */
2862         /* Writes the texture resource constants data to the SGPRs */
2863         ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2864         ptr_cmd[i++] = 0x240;
2865         ptr_cmd[i++] = mc_address_src;
2866         ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2867         ptr_cmd[i++] = 0x400000;
2868         ptr_cmd[i++] = 0x74fac;
2869
2870         /* Writes the UAV constant data to the SGPRs. */
2871         ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2872         ptr_cmd[i++] = 0x244;
2873         ptr_cmd[i++] = mc_address_dst;
2874         ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2875         ptr_cmd[i++] = 0x400000;
2876         ptr_cmd[i++] = 0x74fac;
2877
2878         /* clear mmCOMPUTE_RESOURCE_LIMITS */
2879         ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2880         ptr_cmd[i++] = 0x215;
2881         ptr_cmd[i++] = 0;
2882
2883         /* dispatch direct command */
2884         ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2885         ptr_cmd[i++] = 0x10000;
2886         ptr_cmd[i++] = 1;
2887         ptr_cmd[i++] = 1;
2888         ptr_cmd[i++] = 1;
2889
2890         while (i & 7)
2891                 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2892
2893         resources[0] = bo_shader;
2894         resources[1] = bo_src;
2895         resources[2] = bo_dst;
2896         resources[3] = bo_cmd;
2897         r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2898         CU_ASSERT_EQUAL(r, 0);
2899
2900         ib_info.ib_mc_address = mc_address_cmd;
2901         ib_info.size = i;
2902         ibs_request.ip_type = ip_type;
2903         ibs_request.ring = ring;
2904         ibs_request.resources = bo_list;
2905         ibs_request.number_of_ibs = 1;
2906         ibs_request.ibs = &ib_info;
2907         ibs_request.fence_info.handle = NULL;
2908         r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2909         CU_ASSERT_EQUAL(r, 0);
2910
2911         fence_status.ip_type = ip_type;
2912         fence_status.ip_instance = 0;
2913         fence_status.ring = ring;
2914         fence_status.context = context_handle;
2915         fence_status.fence = ibs_request.seq_no;
2916
2917         /* wait for IB accomplished */
2918         r = amdgpu_cs_query_fence_status(&fence_status,
2919                                          AMDGPU_TIMEOUT_INFINITE,
2920                                          0, &expired);
2921
2922         r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2923         CU_ASSERT_EQUAL(r, 0);
2924         CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2925
2926         r = amdgpu_bo_list_destroy(bo_list);
2927         CU_ASSERT_EQUAL(r, 0);
2928
2929         r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2930         CU_ASSERT_EQUAL(r, 0);
2931         r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2932         CU_ASSERT_EQUAL(r, 0);
2933
2934         r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2935         CU_ASSERT_EQUAL(r, 0);
2936
2937         r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2938         CU_ASSERT_EQUAL(r, 0);
2939
2940         r = amdgpu_cs_ctx_free(context_handle);
2941         CU_ASSERT_EQUAL(r, 0);
2942 }
2943
2944 void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2945 {
2946         int r;
2947         struct drm_amdgpu_info_hw_ip info;
2948         uint32_t ring_id;
2949
2950         r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2951         CU_ASSERT_EQUAL(r, 0);
2952         if (!info.available_rings)
2953                 printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2954
2955         for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2956                 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2957                 amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id);
2958                 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2959         }
2960 }
2961
2962 static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family)
2963 {
2964         struct amdgpu_test_shader *shader;
2965         int i, loop = 0x40000;
2966
2967         switch (family) {
2968                 case AMDGPU_FAMILY_AI:
2969                 case AMDGPU_FAMILY_RV:
2970                         shader = &memcpy_ps_hang_slow_ai;
2971                         break;
2972                 default:
2973                         return -1;
2974                         break;
2975         }
2976
2977         memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2978
2979         for (i = 0; i < loop; i++)
2980                 memcpy(ptr + shader->header_length + shader->body_length * i,
2981                         shader->shader + shader->header_length,
2982                         shader->body_length * sizeof(uint32_t));
2983
2984         memcpy(ptr + shader->header_length + shader->body_length * loop,
2985                 shader->shader + shader->header_length + shader->body_length,
2986                 shader->foot_length * sizeof(uint32_t));
2987
2988         return 0;
2989 }
2990
2991 static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type)
2992 {
2993         int i;
2994         uint32_t shader_offset= 256;
2995         uint32_t mem_offset, patch_code_offset;
2996         uint32_t shader_size, patchinfo_code_size;
2997         const uint32_t *shader;
2998         const uint32_t *patchinfo_code;
2999         const uint32_t *patchcode_offset;
3000
3001         switch (ps_type) {
3002                 case PS_CONST:
3003                         shader = ps_const_shader_gfx9;
3004                         shader_size = sizeof(ps_const_shader_gfx9);
3005                         patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
3006                         patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
3007                         patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
3008                         break;
3009                 case PS_TEX:
3010                         shader = ps_tex_shader_gfx9;
3011                         shader_size = sizeof(ps_tex_shader_gfx9);
3012                         patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
3013                         patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
3014                         patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
3015                         break;
3016                 case PS_HANG:
3017                         shader = memcpy_ps_hang;
3018                         shader_size = sizeof(memcpy_ps_hang);
3019
3020                         memcpy(ptr, shader, shader_size);
3021                         return 0;
3022                 default:
3023                         return -1;
3024                         break;
3025         }
3026
3027         /* write main shader program */
3028         for (i = 0 ; i < 10; i++) {
3029                 mem_offset = i * shader_offset;
3030                 memcpy(ptr + mem_offset, shader, shader_size);
3031         }
3032
3033         /* overwrite patch codes */
3034         for (i = 0 ; i < 10; i++) {
3035                 mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
3036                 patch_code_offset = i * patchinfo_code_size;
3037                 memcpy(ptr + mem_offset,
3038                         patchinfo_code + patch_code_offset,
3039                         patchinfo_code_size * sizeof(uint32_t));
3040         }
3041
3042         return 0;
3043 }
3044
3045 /* load RectPosTexFast_VS */
3046 static int amdgpu_draw_load_vs_shader(uint8_t *ptr)
3047 {
3048         const uint32_t *shader;
3049         uint32_t shader_size;
3050
3051         shader = vs_RectPosTexFast_shader_gfx9;
3052         shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
3053
3054         memcpy(ptr, shader, shader_size);
3055
3056         return 0;
3057 }
3058
3059 static int amdgpu_draw_init(uint32_t *ptr)
3060 {
3061         int i = 0;
3062         const uint32_t *preamblecache_ptr;
3063         uint32_t preamblecache_size;
3064
3065         /* Write context control and load shadowing register if necessary */
3066         ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
3067         ptr[i++] = 0x80000000;
3068         ptr[i++] = 0x80000000;
3069
3070         preamblecache_ptr = preamblecache_gfx9;
3071         preamblecache_size = sizeof(preamblecache_gfx9);
3072
3073         memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
3074         return i + preamblecache_size/sizeof(uint32_t);
3075 }
3076
3077 static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
3078                                                          uint64_t dst_addr,
3079                                                          int hang_slow)
3080 {
3081         int i = 0;
3082
3083         /* setup color buffer */
3084         /* offset   reg
3085            0xA318   CB_COLOR0_BASE
3086            0xA319   CB_COLOR0_BASE_EXT
3087            0xA31A   CB_COLOR0_ATTRIB2
3088            0xA31B   CB_COLOR0_VIEW
3089            0xA31C   CB_COLOR0_INFO
3090            0xA31D   CB_COLOR0_ATTRIB
3091            0xA31E   CB_COLOR0_DCC_CONTROL
3092            0xA31F   CB_COLOR0_CMASK
3093            0xA320   CB_COLOR0_CMASK_BASE_EXT
3094            0xA321   CB_COLOR0_FMASK
3095            0xA322   CB_COLOR0_FMASK_BASE_EXT
3096            0xA323   CB_COLOR0_CLEAR_WORD0
3097            0xA324   CB_COLOR0_CLEAR_WORD1
3098            0xA325   CB_COLOR0_DCC_BASE
3099            0xA326   CB_COLOR0_DCC_BASE_EXT */
3100         ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
3101         ptr[i++] = 0x318;
3102         ptr[i++] = dst_addr >> 8;
3103         ptr[i++] = dst_addr >> 40;
3104         ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f;
3105         ptr[i++] = 0;
3106         ptr[i++] = 0x50438;
3107         ptr[i++] = 0x10140000;
3108         i += 9;
3109
3110         /* mmCB_MRT0_EPITCH */
3111         ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3112         ptr[i++] = 0x1e8;
3113         ptr[i++] = hang_slow ? 0x7ff : 0x1f;
3114
3115         /* 0xA32B   CB_COLOR1_BASE */
3116         ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3117         ptr[i++] = 0x32b;
3118         ptr[i++] = 0;
3119
3120         /* 0xA33A   CB_COLOR1_BASE */
3121         ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3122         ptr[i++] = 0x33a;
3123         ptr[i++] = 0;
3124
3125         /* SPI_SHADER_COL_FORMAT */
3126         ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3127         ptr[i++] = 0x1c5;
3128         ptr[i++] = 9;
3129
3130         /* Setup depth buffer */
3131         /* mmDB_Z_INFO */
3132         ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3133         ptr[i++] = 0xe;
3134         i += 2;
3135
3136         return i;
3137 }
3138
3139 static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow)
3140 {
3141         int i = 0;
3142         const uint32_t *cached_cmd_ptr;
3143         uint32_t cached_cmd_size;
3144
3145         /* mmPA_SC_TILE_STEERING_OVERRIDE */
3146         ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3147         ptr[i++] = 0xd7;
3148         ptr[i++] = 0;
3149
3150         ptr[i++] = 0xffff1000;
3151         ptr[i++] = 0xc0021000;
3152
3153         ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3154         ptr[i++] = 0xd7;
3155         ptr[i++] = 1;
3156
3157         /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
3158         ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
3159         ptr[i++] = 0x2fe;
3160         i += 16;
3161
3162         /* mmPA_SC_CENTROID_PRIORITY_0 */
3163         ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3164         ptr[i++] = 0x2f5;
3165         i += 2;
3166
3167         cached_cmd_ptr = cached_cmd_gfx9;
3168         cached_cmd_size = sizeof(cached_cmd_gfx9);
3169
3170         memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
3171         if (hang_slow)
3172                 *(ptr + i + 12) = 0x8000800;
3173         i += cached_cmd_size/sizeof(uint32_t);
3174
3175         return i;
3176 }
3177
3178 static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
3179                                                   int ps_type,
3180                                                   uint64_t shader_addr,
3181                                                   int hang_slow)
3182 {
3183         int i = 0;
3184
3185         /* mmPA_CL_VS_OUT_CNTL */
3186         ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3187         ptr[i++] = 0x207;
3188         ptr[i++] = 0;
3189
3190         /* mmSPI_SHADER_PGM_RSRC3_VS */
3191         ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3192         ptr[i++] = 0x46;
3193         ptr[i++] = 0xffff;
3194
3195         /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
3196         ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
3197         ptr[i++] = 0x48;
3198         ptr[i++] = shader_addr >> 8;
3199         ptr[i++] = shader_addr >> 40;
3200
3201         /* mmSPI_SHADER_PGM_RSRC1_VS */
3202         ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3203         ptr[i++] = 0x4a;
3204         ptr[i++] = 0xc0081;
3205         /* mmSPI_SHADER_PGM_RSRC2_VS */
3206         ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3207         ptr[i++] = 0x4b;
3208         ptr[i++] = 0x18;
3209
3210         /* mmSPI_VS_OUT_CONFIG */
3211         ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3212         ptr[i++] = 0x1b1;
3213         ptr[i++] = 2;
3214
3215         /* mmSPI_SHADER_POS_FORMAT */
3216         ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3217         ptr[i++] = 0x1c3;
3218         ptr[i++] = 4;
3219
3220         ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3221         ptr[i++] = 0x4c;
3222         i += 2;
3223         ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3224         ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3225
3226         ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3227         ptr[i++] = 0x50;
3228         i += 2;
3229         if (ps_type == PS_CONST) {
3230                 i += 2;
3231         } else if (ps_type == PS_TEX) {
3232                 ptr[i++] = 0x3f800000;
3233                 ptr[i++] = 0x3f800000;
3234         }
3235
3236         ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3237         ptr[i++] = 0x54;
3238         i += 4;
3239
3240         return i;
3241 }
3242
3243 static int amdgpu_draw_ps_write2hw(uint32_t *ptr,
3244                                    int ps_type,
3245                                    uint64_t shader_addr)
3246 {
3247         int i, j;
3248         const uint32_t *sh_registers;
3249         const uint32_t *context_registers;
3250         uint32_t num_sh_reg, num_context_reg;
3251
3252         if (ps_type == PS_CONST) {
3253                 sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
3254                 context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
3255                 num_sh_reg = ps_num_sh_registers_gfx9;
3256                 num_context_reg = ps_num_context_registers_gfx9;
3257         } else if (ps_type == PS_TEX) {
3258                 sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
3259                 context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
3260                 num_sh_reg = ps_num_sh_registers_gfx9;
3261                 num_context_reg = ps_num_context_registers_gfx9;
3262         }
3263
3264         i = 0;
3265
3266         /* 0x2c07   SPI_SHADER_PGM_RSRC3_PS
3267            0x2c08   SPI_SHADER_PGM_LO_PS
3268            0x2c09   SPI_SHADER_PGM_HI_PS */
3269         shader_addr += 256 * 9;
3270         ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
3271         ptr[i++] = 0x7;
3272         ptr[i++] = 0xffff;
3273         ptr[i++] = shader_addr >> 8;
3274         ptr[i++] = shader_addr >> 40;
3275
3276         for (j = 0; j < num_sh_reg; j++) {
3277                 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3278                 ptr[i++] = sh_registers[j * 2] - 0x2c00;
3279                 ptr[i++] = sh_registers[j * 2 + 1];
3280         }
3281
3282         for (j = 0; j < num_context_reg; j++) {
3283                 if (context_registers[j * 2] != 0xA1C5) {
3284                         ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3285                         ptr[i++] = context_registers[j * 2] - 0xa000;
3286                         ptr[i++] = context_registers[j * 2 + 1];
3287                 }
3288
3289                 if (context_registers[j * 2] == 0xA1B4) {
3290                         ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3291                         ptr[i++] = 0x1b3;
3292                         ptr[i++] = 2;
3293                 }
3294         }
3295
3296         return i;
3297 }
3298
3299 static int amdgpu_draw_draw(uint32_t *ptr)
3300 {
3301         int i = 0;
3302
3303         /* mmIA_MULTI_VGT_PARAM */
3304         ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3305         ptr[i++] = 0x40000258;
3306         ptr[i++] = 0xd00ff;
3307
3308         /* mmVGT_PRIMITIVE_TYPE */
3309         ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3310         ptr[i++] = 0x10000242;
3311         ptr[i++] = 0x11;
3312
3313         ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
3314         ptr[i++] = 3;
3315         ptr[i++] = 2;
3316
3317         return i;
3318 }
3319
3320 void amdgpu_memset_draw(amdgpu_device_handle device_handle,
3321                         amdgpu_bo_handle bo_shader_ps,
3322                         amdgpu_bo_handle bo_shader_vs,
3323                         uint64_t mc_address_shader_ps,
3324                         uint64_t mc_address_shader_vs,
3325                         uint32_t ring_id)
3326 {
3327         amdgpu_context_handle context_handle;
3328         amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
3329         volatile unsigned char *ptr_dst;
3330         uint32_t *ptr_cmd;
3331         uint64_t mc_address_dst, mc_address_cmd;
3332         amdgpu_va_handle va_dst, va_cmd;
3333         int i, r;
3334         int bo_dst_size = 16384;
3335         int bo_cmd_size = 4096;
3336         struct amdgpu_cs_request ibs_request = {0};
3337         struct amdgpu_cs_ib_info ib_info = {0};
3338         struct amdgpu_cs_fence fence_status = {0};
3339         uint32_t expired;
3340         amdgpu_bo_list_handle bo_list;
3341
3342         r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3343         CU_ASSERT_EQUAL(r, 0);
3344
3345         r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3346                                         AMDGPU_GEM_DOMAIN_GTT, 0,
3347                                         &bo_cmd, (void **)&ptr_cmd,
3348                                         &mc_address_cmd, &va_cmd);
3349         CU_ASSERT_EQUAL(r, 0);
3350         memset(ptr_cmd, 0, bo_cmd_size);
3351
3352         r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3353                                         AMDGPU_GEM_DOMAIN_VRAM, 0,
3354                                         &bo_dst, (void **)&ptr_dst,
3355                                         &mc_address_dst, &va_dst);
3356         CU_ASSERT_EQUAL(r, 0);
3357
3358         i = 0;
3359         i += amdgpu_draw_init(ptr_cmd + i);
3360
3361         i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3362
3363         i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3364
3365         i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0);
3366
3367         i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps);
3368
3369         ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3370         ptr_cmd[i++] = 0xc;
3371         ptr_cmd[i++] = 0x33333333;
3372         ptr_cmd[i++] = 0x33333333;
3373         ptr_cmd[i++] = 0x33333333;
3374         ptr_cmd[i++] = 0x33333333;
3375
3376         i += amdgpu_draw_draw(ptr_cmd + i);
3377
3378         while (i & 7)
3379                 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3380
3381         resources[0] = bo_dst;
3382         resources[1] = bo_shader_ps;
3383         resources[2] = bo_shader_vs;
3384         resources[3] = bo_cmd;
3385         r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
3386         CU_ASSERT_EQUAL(r, 0);
3387
3388         ib_info.ib_mc_address = mc_address_cmd;
3389         ib_info.size = i;
3390         ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3391         ibs_request.ring = ring_id;
3392         ibs_request.resources = bo_list;
3393         ibs_request.number_of_ibs = 1;
3394         ibs_request.ibs = &ib_info;
3395         ibs_request.fence_info.handle = NULL;
3396
3397         /* submit CS */
3398         r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3399         CU_ASSERT_EQUAL(r, 0);
3400
3401         r = amdgpu_bo_list_destroy(bo_list);
3402         CU_ASSERT_EQUAL(r, 0);
3403
3404         fence_status.ip_type = AMDGPU_HW_IP_GFX;
3405         fence_status.ip_instance = 0;
3406         fence_status.ring = ring_id;
3407         fence_status.context = context_handle;
3408         fence_status.fence = ibs_request.seq_no;
3409
3410         /* wait for IB accomplished */
3411         r = amdgpu_cs_query_fence_status(&fence_status,
3412                                          AMDGPU_TIMEOUT_INFINITE,
3413                                          0, &expired);
3414         CU_ASSERT_EQUAL(r, 0);
3415         CU_ASSERT_EQUAL(expired, true);
3416
3417         /* verify if memset test result meets with expected */
3418         i = 0;
3419         while(i < bo_dst_size) {
3420                 CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
3421         }
3422
3423         r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
3424         CU_ASSERT_EQUAL(r, 0);
3425
3426         r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3427         CU_ASSERT_EQUAL(r, 0);
3428
3429         r = amdgpu_cs_ctx_free(context_handle);
3430         CU_ASSERT_EQUAL(r, 0);
3431 }
3432
3433 static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
3434                                     uint32_t ring)
3435 {
3436         amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3437         void *ptr_shader_ps;
3438         void *ptr_shader_vs;
3439         uint64_t mc_address_shader_ps, mc_address_shader_vs;
3440         amdgpu_va_handle va_shader_ps, va_shader_vs;
3441         int r;
3442         int bo_shader_size = 4096;
3443
3444         r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3445                                         AMDGPU_GEM_DOMAIN_VRAM, 0,
3446                                         &bo_shader_ps, &ptr_shader_ps,
3447                                         &mc_address_shader_ps, &va_shader_ps);
3448         CU_ASSERT_EQUAL(r, 0);
3449         memset(ptr_shader_ps, 0, bo_shader_size);
3450
3451         r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3452                                         AMDGPU_GEM_DOMAIN_VRAM, 0,
3453                                         &bo_shader_vs, &ptr_shader_vs,
3454                                         &mc_address_shader_vs, &va_shader_vs);
3455         CU_ASSERT_EQUAL(r, 0);
3456         memset(ptr_shader_vs, 0, bo_shader_size);
3457
3458         r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST);
3459         CU_ASSERT_EQUAL(r, 0);
3460
3461         r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3462         CU_ASSERT_EQUAL(r, 0);
3463
3464         amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
3465                         mc_address_shader_ps, mc_address_shader_vs, ring);
3466
3467         r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3468         CU_ASSERT_EQUAL(r, 0);
3469
3470         r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3471         CU_ASSERT_EQUAL(r, 0);
3472 }
3473
3474 static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
3475                                amdgpu_bo_handle bo_shader_ps,
3476                                amdgpu_bo_handle bo_shader_vs,
3477                                uint64_t mc_address_shader_ps,
3478                                uint64_t mc_address_shader_vs,
3479                                uint32_t ring, int hang)
3480 {
3481         amdgpu_context_handle context_handle;
3482         amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3483         volatile unsigned char *ptr_dst;
3484         unsigned char *ptr_src;
3485         uint32_t *ptr_cmd;
3486         uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3487         amdgpu_va_handle va_dst, va_src, va_cmd;
3488         int i, r;
3489         int bo_size = 16384;
3490         int bo_cmd_size = 4096;
3491         struct amdgpu_cs_request ibs_request = {0};
3492         struct amdgpu_cs_ib_info ib_info= {0};
3493         uint32_t hang_state, hangs;
3494         uint32_t expired;
3495         amdgpu_bo_list_handle bo_list;
3496         struct amdgpu_cs_fence fence_status = {0};
3497
3498         r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3499         CU_ASSERT_EQUAL(r, 0);
3500
3501         r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3502                                     AMDGPU_GEM_DOMAIN_GTT, 0,
3503                                     &bo_cmd, (void **)&ptr_cmd,
3504                                     &mc_address_cmd, &va_cmd);
3505         CU_ASSERT_EQUAL(r, 0);
3506         memset(ptr_cmd, 0, bo_cmd_size);
3507
3508         r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3509                                         AMDGPU_GEM_DOMAIN_VRAM, 0,
3510                                         &bo_src, (void **)&ptr_src,
3511                                         &mc_address_src, &va_src);
3512         CU_ASSERT_EQUAL(r, 0);
3513
3514         r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3515                                         AMDGPU_GEM_DOMAIN_VRAM, 0,
3516                                         &bo_dst, (void **)&ptr_dst,
3517                                         &mc_address_dst, &va_dst);
3518         CU_ASSERT_EQUAL(r, 0);
3519
3520         memset(ptr_src, 0x55, bo_size);
3521
3522         i = 0;
3523         i += amdgpu_draw_init(ptr_cmd + i);
3524
3525         i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3526
3527         i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3528
3529         i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0);
3530
3531         i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3532
3533         ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3534         ptr_cmd[i++] = 0xc;
3535         ptr_cmd[i++] = mc_address_src >> 8;
3536         ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3537         ptr_cmd[i++] = 0x7c01f;
3538         ptr_cmd[i++] = 0x90500fac;
3539         ptr_cmd[i++] = 0x3e000;
3540         i += 3;
3541
3542         ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3543         ptr_cmd[i++] = 0x14;
3544         ptr_cmd[i++] = 0x92;
3545         i += 3;
3546
3547         ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3548         ptr_cmd[i++] = 0x191;
3549         ptr_cmd[i++] = 0;
3550
3551         i += amdgpu_draw_draw(ptr_cmd + i);
3552
3553         while (i & 7)
3554                 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3555
3556         resources[0] = bo_dst;
3557         resources[1] = bo_src;
3558         resources[2] = bo_shader_ps;
3559         resources[3] = bo_shader_vs;
3560         resources[4] = bo_cmd;
3561         r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3562         CU_ASSERT_EQUAL(r, 0);
3563
3564         ib_info.ib_mc_address = mc_address_cmd;
3565         ib_info.size = i;
3566         ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3567         ibs_request.ring = ring;
3568         ibs_request.resources = bo_list;
3569         ibs_request.number_of_ibs = 1;
3570         ibs_request.ibs = &ib_info;
3571         ibs_request.fence_info.handle = NULL;
3572         r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3573         CU_ASSERT_EQUAL(r, 0);
3574
3575         fence_status.ip_type = AMDGPU_HW_IP_GFX;
3576         fence_status.ip_instance = 0;
3577         fence_status.ring = ring;
3578         fence_status.context = context_handle;
3579         fence_status.fence = ibs_request.seq_no;
3580
3581         /* wait for IB accomplished */
3582         r = amdgpu_cs_query_fence_status(&fence_status,
3583                                          AMDGPU_TIMEOUT_INFINITE,
3584                                          0, &expired);
3585         if (!hang) {
3586                 CU_ASSERT_EQUAL(r, 0);
3587                 CU_ASSERT_EQUAL(expired, true);
3588
3589                 /* verify if memcpy test result meets with expected */
3590                 i = 0;
3591                 while(i < bo_size) {
3592                         CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
3593                         i++;
3594                 }
3595         } else {
3596                 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3597                 CU_ASSERT_EQUAL(r, 0);
3598                 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3599         }
3600
3601         r = amdgpu_bo_list_destroy(bo_list);
3602         CU_ASSERT_EQUAL(r, 0);
3603
3604         r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3605         CU_ASSERT_EQUAL(r, 0);
3606         r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3607         CU_ASSERT_EQUAL(r, 0);
3608
3609         r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3610         CU_ASSERT_EQUAL(r, 0);
3611
3612         r = amdgpu_cs_ctx_free(context_handle);
3613         CU_ASSERT_EQUAL(r, 0);
3614 }
3615
3616 void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring,
3617                              int hang)
3618 {
3619         amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3620         void *ptr_shader_ps;
3621         void *ptr_shader_vs;
3622         uint64_t mc_address_shader_ps, mc_address_shader_vs;
3623         amdgpu_va_handle va_shader_ps, va_shader_vs;
3624         int bo_shader_size = 4096;
3625         enum ps_type ps_type = hang ? PS_HANG : PS_TEX;
3626         int r;
3627
3628         r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3629                                         AMDGPU_GEM_DOMAIN_VRAM, 0,
3630                                         &bo_shader_ps, &ptr_shader_ps,
3631                                         &mc_address_shader_ps, &va_shader_ps);
3632         CU_ASSERT_EQUAL(r, 0);
3633         memset(ptr_shader_ps, 0, bo_shader_size);
3634
3635         r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3636                                         AMDGPU_GEM_DOMAIN_VRAM, 0,
3637                                         &bo_shader_vs, &ptr_shader_vs,
3638                                         &mc_address_shader_vs, &va_shader_vs);
3639         CU_ASSERT_EQUAL(r, 0);
3640         memset(ptr_shader_vs, 0, bo_shader_size);
3641
3642         r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type);
3643         CU_ASSERT_EQUAL(r, 0);
3644
3645         r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3646         CU_ASSERT_EQUAL(r, 0);
3647
3648         amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
3649                         mc_address_shader_ps, mc_address_shader_vs, ring, hang);
3650
3651         r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3652         CU_ASSERT_EQUAL(r, 0);
3653
3654         r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3655         CU_ASSERT_EQUAL(r, 0);
3656 }
3657
3658 static void amdgpu_draw_test(void)
3659 {
3660         int r;
3661         struct drm_amdgpu_info_hw_ip info;
3662         uint32_t ring_id;
3663
3664         r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
3665         CU_ASSERT_EQUAL(r, 0);
3666         if (!info.available_rings)
3667                 printf("SKIP ... as there's no graphics ring\n");
3668
3669         for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3670                 amdgpu_memset_draw_test(device_handle, ring_id);
3671                 amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
3672         }
3673 }
3674
3675 void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring)
3676 {
3677         amdgpu_context_handle context_handle;
3678         amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3679         amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3680         void *ptr_shader_ps;
3681         void *ptr_shader_vs;
3682         volatile unsigned char *ptr_dst;
3683         unsigned char *ptr_src;
3684         uint32_t *ptr_cmd;
3685         uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3686         uint64_t mc_address_shader_ps, mc_address_shader_vs;
3687         amdgpu_va_handle va_shader_ps, va_shader_vs;
3688         amdgpu_va_handle va_dst, va_src, va_cmd;
3689         struct amdgpu_gpu_info gpu_info = {0};
3690         int i, r;
3691         int bo_size = 0x4000000;
3692         int bo_shader_ps_size = 0x400000;
3693         int bo_shader_vs_size = 4096;
3694         int bo_cmd_size = 4096;
3695         struct amdgpu_cs_request ibs_request = {0};
3696         struct amdgpu_cs_ib_info ib_info= {0};
3697         uint32_t hang_state, hangs, expired;
3698         amdgpu_bo_list_handle bo_list;
3699         struct amdgpu_cs_fence fence_status = {0};
3700
3701         r = amdgpu_query_gpu_info(device_handle, &gpu_info);
3702         CU_ASSERT_EQUAL(r, 0);
3703
3704         r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3705         CU_ASSERT_EQUAL(r, 0);
3706
3707         r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3708                                     AMDGPU_GEM_DOMAIN_GTT, 0,
3709                                     &bo_cmd, (void **)&ptr_cmd,
3710                                     &mc_address_cmd, &va_cmd);
3711         CU_ASSERT_EQUAL(r, 0);
3712         memset(ptr_cmd, 0, bo_cmd_size);
3713
3714         r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096,
3715                                         AMDGPU_GEM_DOMAIN_VRAM, 0,
3716                                         &bo_shader_ps, &ptr_shader_ps,
3717                                         &mc_address_shader_ps, &va_shader_ps);
3718         CU_ASSERT_EQUAL(r, 0);
3719         memset(ptr_shader_ps, 0, bo_shader_ps_size);
3720
3721         r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096,
3722                                         AMDGPU_GEM_DOMAIN_VRAM, 0,
3723                                         &bo_shader_vs, &ptr_shader_vs,
3724                                         &mc_address_shader_vs, &va_shader_vs);
3725         CU_ASSERT_EQUAL(r, 0);
3726         memset(ptr_shader_vs, 0, bo_shader_vs_size);
3727
3728         r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id);
3729         CU_ASSERT_EQUAL(r, 0);
3730
3731         r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3732         CU_ASSERT_EQUAL(r, 0);
3733
3734         r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3735                                         AMDGPU_GEM_DOMAIN_VRAM, 0,
3736                                         &bo_src, (void **)&ptr_src,
3737                                         &mc_address_src, &va_src);
3738         CU_ASSERT_EQUAL(r, 0);
3739
3740         r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3741                                         AMDGPU_GEM_DOMAIN_VRAM, 0,
3742                                         &bo_dst, (void **)&ptr_dst,
3743                                         &mc_address_dst, &va_dst);
3744         CU_ASSERT_EQUAL(r, 0);
3745
3746         memset(ptr_src, 0x55, bo_size);
3747
3748         i = 0;
3749         i += amdgpu_draw_init(ptr_cmd + i);
3750
3751         i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1);
3752
3753         i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1);
3754
3755         i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX,
3756                                                         mc_address_shader_vs, 1);
3757
3758         i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3759
3760         ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3761         ptr_cmd[i++] = 0xc;
3762         ptr_cmd[i++] = mc_address_src >> 8;
3763         ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3764         ptr_cmd[i++] = 0x1ffc7ff;
3765         ptr_cmd[i++] = 0x90500fac;
3766         ptr_cmd[i++] = 0xffe000;
3767         i += 3;
3768
3769         ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3770         ptr_cmd[i++] = 0x14;
3771         ptr_cmd[i++] = 0x92;
3772         i += 3;
3773
3774         ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3775         ptr_cmd[i++] = 0x191;
3776         ptr_cmd[i++] = 0;
3777
3778         i += amdgpu_draw_draw(ptr_cmd + i);
3779
3780         while (i & 7)
3781                 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3782
3783         resources[0] = bo_dst;
3784         resources[1] = bo_src;
3785         resources[2] = bo_shader_ps;
3786         resources[3] = bo_shader_vs;
3787         resources[4] = bo_cmd;
3788         r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3789         CU_ASSERT_EQUAL(r, 0);
3790
3791         ib_info.ib_mc_address = mc_address_cmd;
3792         ib_info.size = i;
3793         ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3794         ibs_request.ring = ring;
3795         ibs_request.resources = bo_list;
3796         ibs_request.number_of_ibs = 1;
3797         ibs_request.ibs = &ib_info;
3798         ibs_request.fence_info.handle = NULL;
3799         r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3800         CU_ASSERT_EQUAL(r, 0);
3801
3802         fence_status.ip_type = AMDGPU_HW_IP_GFX;
3803         fence_status.ip_instance = 0;
3804         fence_status.ring = ring;
3805         fence_status.context = context_handle;
3806         fence_status.fence = ibs_request.seq_no;
3807
3808         /* wait for IB accomplished */
3809         r = amdgpu_cs_query_fence_status(&fence_status,
3810                                          AMDGPU_TIMEOUT_INFINITE,
3811                                          0, &expired);
3812
3813         r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3814         CU_ASSERT_EQUAL(r, 0);
3815         CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3816
3817         r = amdgpu_bo_list_destroy(bo_list);
3818         CU_ASSERT_EQUAL(r, 0);
3819
3820         r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3821         CU_ASSERT_EQUAL(r, 0);
3822         r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3823         CU_ASSERT_EQUAL(r, 0);
3824
3825         r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3826         CU_ASSERT_EQUAL(r, 0);
3827
3828         r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size);
3829         CU_ASSERT_EQUAL(r, 0);
3830         r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size);
3831         CU_ASSERT_EQUAL(r, 0);
3832
3833         r = amdgpu_cs_ctx_free(context_handle);
3834         CU_ASSERT_EQUAL(r, 0);
3835 }
3836
3837 static void amdgpu_gpu_reset_test(void)
3838 {
3839         int r;
3840         char debugfs_path[256], tmp[10];
3841         int fd;
3842         struct stat sbuf;
3843         amdgpu_context_handle context_handle;
3844         uint32_t hang_state, hangs;
3845
3846         r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3847         CU_ASSERT_EQUAL(r, 0);
3848
3849         r = fstat(drm_amdgpu[0], &sbuf);
3850         CU_ASSERT_EQUAL(r, 0);
3851
3852         sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
3853         fd = open(debugfs_path, O_RDONLY);
3854         CU_ASSERT(fd >= 0);
3855
3856         r = read(fd, tmp, sizeof(tmp)/sizeof(char));
3857         CU_ASSERT(r > 0);
3858
3859         r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3860         CU_ASSERT_EQUAL(r, 0);
3861         CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3862
3863         close(fd);
3864         r = amdgpu_cs_ctx_free(context_handle);
3865         CU_ASSERT_EQUAL(r, 0);
3866
3867         amdgpu_compute_dispatch_test();
3868         amdgpu_gfx_dispatch_test();
3869 }