tests/amdgpu: use appropriate ring for different asics
[platform/upstream/libdrm.git] / tests / amdgpu / cp_dma_tests.c
1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22 */
23
24 #include <unistd.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <inttypes.h>
28
29 #include "CUnit/Basic.h"
30
31 #include "amdgpu_test.h"
32 #include "amdgpu_drm.h"
33 #include "amdgpu_internal.h"
34
35 #define IB_SIZE 4096
36 #define MAX_RESOURCES 8
37
38 #define DMA_SIZE 4097 
39 #define DMA_DATA_BYTE 0xea
40
41 static bool do_p2p;
42
43 static amdgpu_device_handle executing_device_handle;
44 static uint32_t executing_device_major_version;
45 static uint32_t executing_device_minor_version;
46
47 static amdgpu_device_handle peer_exporting_device_handle;
48 static uint32_t peer_exporting_device_major_version;
49 static uint32_t peer_exporting_device_minor_version;
50
51 static amdgpu_context_handle context_handle;
52 static amdgpu_bo_handle ib_handle;
53 static uint32_t *ib_cpu;
54 static uint64_t ib_mc_address;
55 static amdgpu_va_handle ib_va_handle;
56 static uint32_t num_dword;
57
58 static amdgpu_bo_handle resources[MAX_RESOURCES];
59 static unsigned num_resources;
60
61 static uint8_t* reference_data;
62
63 static void amdgpu_cp_dma_host_to_vram(void);
64 static void amdgpu_cp_dma_vram_to_host(void);
65 static void amdgpu_cp_dma_p2p_vram_to_vram(void);
66 static void amdgpu_cp_dma_p2p_host_to_vram(void);
67 static void amdgpu_cp_dma_p2p_vram_to_host(void);
68
69 /**
70  * Tests in cp dma test suite
71  */
72 CU_TestInfo cp_dma_tests[] = {
73         { "CP DMA write Host to VRAM",  amdgpu_cp_dma_host_to_vram },
74         { "CP DMA write VRAM to Host",  amdgpu_cp_dma_vram_to_host },
75
76         { "Peer to Peer CP DMA write VRAM to VRAM",  amdgpu_cp_dma_p2p_vram_to_vram },
77         { "Peer to Peer CP DMA write Host to VRAM",  amdgpu_cp_dma_p2p_host_to_vram },
78         { "Peer to Peer CP DMA write VRAM to Host",  amdgpu_cp_dma_p2p_vram_to_host },
79         CU_TEST_INFO_NULL,
80 };
81
82 struct amdgpu_cp_dma_bo{
83         amdgpu_bo_handle buf_handle;
84         amdgpu_va_handle va_handle;
85         uint64_t gpu_va;
86         uint64_t size;
87 };
88
89 static int allocate_bo_and_va(amdgpu_device_handle dev,
90                 uint64_t size, uint64_t alignment,
91                 uint32_t heap, uint64_t alloc_flags,
92                 struct amdgpu_cp_dma_bo *bo) {
93         struct amdgpu_bo_alloc_request request = {};
94         amdgpu_bo_handle buf_handle;
95         amdgpu_va_handle va_handle;
96         uint64_t vmc_addr;
97         int r;
98
99         request.alloc_size = size;
100         request.phys_alignment = alignment;
101         request.preferred_heap = heap;
102         request.flags = alloc_flags;
103
104         r = amdgpu_bo_alloc(dev, &request, &buf_handle);
105         if (r)
106                 goto error_bo_alloc;
107
108         r = amdgpu_va_range_alloc(dev, amdgpu_gpu_va_range_general,
109                         size, alignment, 0,
110                         &vmc_addr, &va_handle, 0);
111         if (r)
112                 goto error_va_alloc;
113
114         r = amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr,
115                                                 AMDGPU_VM_PAGE_READABLE |
116                                                         AMDGPU_VM_PAGE_WRITEABLE |
117                                                         AMDGPU_VM_PAGE_EXECUTABLE,
118                                                 AMDGPU_VA_OP_MAP);
119         if (r)
120                 goto error_va_map;
121
122         bo->buf_handle = buf_handle;
123         bo->va_handle = va_handle;
124         bo->gpu_va = vmc_addr;
125         bo->size = size;
126
127         return 0;
128
129 error_va_map:
130         amdgpu_bo_va_op(buf_handle, 0,
131                         size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
132
133 error_va_alloc:
134         amdgpu_va_range_free(va_handle);
135
136 error_bo_alloc:
137         amdgpu_bo_free(buf_handle);
138
139         return r;
140 }
141
142 static int import_dma_buf_to_bo(amdgpu_device_handle dev,
143                 int dmabuf_fd, struct amdgpu_cp_dma_bo *bo) {
144         amdgpu_va_handle va_handle;
145         uint64_t vmc_addr;
146         int r;
147         struct amdgpu_bo_import_result bo_import_result = {};
148
149         r = amdgpu_bo_import(dev, amdgpu_bo_handle_type_dma_buf_fd,
150                         dmabuf_fd, &bo_import_result);
151         if (r)
152                 goto error_bo_import;
153
154         r = amdgpu_va_range_alloc(dev, amdgpu_gpu_va_range_general,
155                                 bo_import_result.alloc_size, 0, 0,
156                                 &vmc_addr, &va_handle, 0);
157         if (r)
158                 goto error_va_alloc;
159
160         r = amdgpu_bo_va_op(bo_import_result.buf_handle, 0,
161                         bo_import_result.alloc_size, vmc_addr,
162                         AMDGPU_VM_PAGE_READABLE |
163                                 AMDGPU_VM_PAGE_WRITEABLE |
164                                 AMDGPU_VM_PAGE_EXECUTABLE,
165                         AMDGPU_VA_OP_MAP);
166         if (r)
167                 goto error_va_map;
168
169         bo->buf_handle = bo_import_result.buf_handle;
170         bo->va_handle = va_handle;
171         bo->gpu_va = vmc_addr;
172         bo->size = bo_import_result.alloc_size;
173
174         return 0;
175
176 error_va_map:
177         amdgpu_bo_va_op(bo_import_result.buf_handle, 0,
178                         bo_import_result.alloc_size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
179
180 error_va_alloc:
181         amdgpu_va_range_free(va_handle);
182
183 error_bo_import:
184         amdgpu_bo_free(bo_import_result.buf_handle);
185
186         return r;
187 }
188
189 static int free_bo(struct amdgpu_cp_dma_bo bo) {
190         int r;
191         r = amdgpu_bo_va_op(bo.buf_handle, 0,
192                         bo.size, bo.gpu_va, 0, AMDGPU_VA_OP_UNMAP);
193         if(r)
194                 return r;
195
196         r = amdgpu_va_range_free(bo.va_handle);
197         if(r)
198                 return r;
199
200         r = amdgpu_bo_free(bo.buf_handle);
201         if(r)
202                 return r;
203
204         return 0;
205 }
206
207 static int submit_and_sync() {
208         struct amdgpu_cs_request ibs_request = {0};
209         struct amdgpu_cs_ib_info ib_info = {0};
210         struct amdgpu_cs_fence fence_status = {0};
211         uint32_t expired;
212         uint32_t family_id, chip_id, chip_rev;
213         unsigned gc_ip_type;
214         int r;
215
216         r = amdgpu_bo_list_create(executing_device_handle,
217                         num_resources, resources,
218                         NULL, &ibs_request.resources);
219         if (r)
220                 return r;
221
222         family_id = executing_device_handle->info.family_id;
223         chip_id = executing_device_handle->info.chip_external_rev;
224         chip_rev = executing_device_handle->info.chip_rev;
225
226         gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ?
227                 AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX;
228
229         ib_info.ib_mc_address = ib_mc_address;
230         ib_info.size = num_dword;
231
232         ibs_request.ip_type = gc_ip_type;
233         ibs_request.number_of_ibs = 1;
234         ibs_request.ibs = &ib_info;
235         ibs_request.fence_info.handle = NULL;
236
237         r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
238         if (r)
239                 return r;
240
241         r = amdgpu_bo_list_destroy(ibs_request.resources);
242         if (r)
243                 return r;
244
245         fence_status.context = context_handle;
246         fence_status.ip_type = gc_ip_type;
247         fence_status.fence = ibs_request.seq_no;
248
249         r = amdgpu_cs_query_fence_status(&fence_status,
250                         AMDGPU_TIMEOUT_INFINITE,
251                         0, &expired);
252         if (r)
253                 return r;
254
255         return 0;
256
257
258 static void cp_dma_cmd(struct amdgpu_cp_dma_bo src_bo,
259                 struct amdgpu_cp_dma_bo dst_bo) {
260         _Static_assert(DMA_SIZE < (1 << 26), "DMA size exceeds CP DMA maximium!");
261
262         ib_cpu[0] = 0xc0055000;
263         ib_cpu[1] = 0x80000000;
264         ib_cpu[2] = src_bo.gpu_va & 0x00000000ffffffff;
265         ib_cpu[3] = (src_bo.gpu_va & 0xffffffff00000000) >> 32;
266         ib_cpu[4] = dst_bo.gpu_va & 0x00000000ffffffff;
267         ib_cpu[5] = (dst_bo.gpu_va & 0xffffffff00000000) >> 32;
268         // size is read from the lower 26bits. 
269         ib_cpu[6] = ((1 << 26) - 1) & DMA_SIZE;
270         ib_cpu[7] = 0xffff1000;
271
272         num_dword = 8;
273
274         resources[0] = src_bo.buf_handle;
275         resources[1] = dst_bo.buf_handle;
276         resources[2] = ib_handle;
277         num_resources = 3;
278 }
279
280 static void amdgpu_cp_dma(uint32_t src_heap, uint32_t dst_heap) {
281         int r;
282         struct amdgpu_cp_dma_bo src_bo = {0};
283         struct amdgpu_cp_dma_bo dst_bo = {0};
284         void *src_bo_cpu;
285         void *dst_bo_cpu;
286
287         /* allocate the src bo, set its data to DMA_DATA_BYTE */
288         r = allocate_bo_and_va(executing_device_handle, DMA_SIZE, 4096,
289                         src_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &src_bo);
290         CU_ASSERT_EQUAL(r, 0);
291
292         r = amdgpu_bo_cpu_map(src_bo.buf_handle, (void **)&src_bo_cpu);
293         CU_ASSERT_EQUAL(r, 0);
294         memset(src_bo_cpu, DMA_DATA_BYTE, DMA_SIZE);
295
296         r = amdgpu_bo_cpu_unmap(src_bo.buf_handle);
297         CU_ASSERT_EQUAL(r, 0);
298
299         /* allocate the dst bo and clear its content to all 0 */
300         r = allocate_bo_and_va(executing_device_handle, DMA_SIZE, 4096,
301                         dst_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &dst_bo);
302         CU_ASSERT_EQUAL(r, 0);
303
304         r = amdgpu_bo_cpu_map(dst_bo.buf_handle, (void **)&dst_bo_cpu);
305         CU_ASSERT_EQUAL(r, 0);
306
307         _Static_assert(DMA_DATA_BYTE != 0, "Initialization data should be different from DMA data!");
308         memset(dst_bo_cpu, 0, DMA_SIZE);
309
310         /* record CP DMA command and dispatch the command */
311         cp_dma_cmd(src_bo, dst_bo);
312
313         r = submit_and_sync();
314         CU_ASSERT_EQUAL(r, 0);
315
316         /* verify the dst bo is filled with DMA_DATA_BYTE */
317         CU_ASSERT_EQUAL(memcmp(dst_bo_cpu, reference_data, DMA_SIZE) == 0, true);
318
319         r = amdgpu_bo_cpu_unmap(dst_bo.buf_handle);
320         CU_ASSERT_EQUAL(r, 0);
321
322         r = free_bo(src_bo);
323         CU_ASSERT_EQUAL(r, 0);
324
325         r = free_bo(dst_bo);
326         CU_ASSERT_EQUAL(r, 0);
327 }
328
329 static void amdgpu_cp_dma_p2p(uint32_t src_heap, uint32_t dst_heap) {
330         int r;
331         struct amdgpu_cp_dma_bo exported_bo = {0};
332         int dma_buf_fd;
333         int dma_buf_fd_dup;
334         struct amdgpu_cp_dma_bo src_bo = {0};
335         struct amdgpu_cp_dma_bo imported_dst_bo = {0};
336         void *exported_bo_cpu;
337         void *src_bo_cpu;
338
339         /* allocate a bo on the peer device and export it to dma-buf */
340         r = allocate_bo_and_va(peer_exporting_device_handle, DMA_SIZE, 4096,
341                         src_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &exported_bo);
342         CU_ASSERT_EQUAL(r, 0);
343
344         /* map the exported bo and clear its content to 0 */
345         _Static_assert(DMA_DATA_BYTE != 0, "Initialization data should be different from DMA data!");
346         r = amdgpu_bo_cpu_map(exported_bo.buf_handle, (void **)&exported_bo_cpu);
347         CU_ASSERT_EQUAL(r, 0);
348         memset(exported_bo_cpu, 0, DMA_SIZE);
349
350         r = amdgpu_bo_export(exported_bo.buf_handle,
351                         amdgpu_bo_handle_type_dma_buf_fd, (uint32_t*)&dma_buf_fd);
352         CU_ASSERT_EQUAL(r, 0);
353
354     // According to amdgpu_drm:
355         // "Buffer must be "imported" only using new "fd"
356         // (different from one used by "exporter")"
357         dma_buf_fd_dup = dup(dma_buf_fd);
358         r = close(dma_buf_fd);
359         CU_ASSERT_EQUAL(r, 0);
360
361         /* import the dma-buf to the executing device, imported bo is the DMA destination */
362         r = import_dma_buf_to_bo(
363                         executing_device_handle, dma_buf_fd_dup, &imported_dst_bo);
364         CU_ASSERT_EQUAL(r, 0);
365
366         r = close(dma_buf_fd_dup);
367         CU_ASSERT_EQUAL(r, 0);
368
369         /* allocate the src bo and set its content to DMA_DATA_BYTE */
370         r = allocate_bo_and_va(executing_device_handle, DMA_SIZE, 4096,
371                         dst_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &src_bo);
372         CU_ASSERT_EQUAL(r, 0);
373
374         r = amdgpu_bo_cpu_map(src_bo.buf_handle, (void **)&src_bo_cpu);
375         CU_ASSERT_EQUAL(r, 0);
376
377         memset(src_bo_cpu, DMA_DATA_BYTE, DMA_SIZE);
378
379         r = amdgpu_bo_cpu_unmap(src_bo.buf_handle);
380         CU_ASSERT_EQUAL(r, 0);
381
382         /* record CP DMA command and dispatch the command */
383         cp_dma_cmd(src_bo, imported_dst_bo);
384
385         r = submit_and_sync();
386         CU_ASSERT_EQUAL(r, 0);
387
388         /* verify the bo from the peer device is filled with DMA_DATA_BYTE */
389         CU_ASSERT_EQUAL(memcmp(exported_bo_cpu, reference_data, DMA_SIZE) == 0, true);
390
391         r = amdgpu_bo_cpu_unmap(exported_bo.buf_handle);
392         CU_ASSERT_EQUAL(r, 0);
393
394         r = free_bo(exported_bo);
395         CU_ASSERT_EQUAL(r, 0);
396
397         r = free_bo(imported_dst_bo);
398         CU_ASSERT_EQUAL(r, 0);
399
400         r = free_bo(src_bo);
401         CU_ASSERT_EQUAL(r, 0);
402 }
403
404 static void amdgpu_cp_dma_host_to_vram(void) {
405         amdgpu_cp_dma(AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_DOMAIN_VRAM);
406 }
407
408 static void amdgpu_cp_dma_vram_to_host(void) {
409         amdgpu_cp_dma(AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_GTT);
410 }
411
412 static void amdgpu_cp_dma_p2p_vram_to_vram(void) {
413         amdgpu_cp_dma_p2p(AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM);
414 }
415
416 static void amdgpu_cp_dma_p2p_host_to_vram(void) {
417         amdgpu_cp_dma_p2p(AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_DOMAIN_VRAM);
418 }
419
420 static void amdgpu_cp_dma_p2p_vram_to_host(void) {
421         amdgpu_cp_dma_p2p(AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_GTT);
422 }
423
424 int suite_cp_dma_tests_init() {
425         int r;
426         
427         r = amdgpu_device_initialize(drm_amdgpu[0],
428                         &executing_device_major_version,
429                         &executing_device_minor_version,
430                         &executing_device_handle);
431         if (r)
432                 return CUE_SINIT_FAILED;
433         
434         r = amdgpu_cs_ctx_create(executing_device_handle, &context_handle);
435         if (r)
436                 return CUE_SINIT_FAILED;
437
438         r = amdgpu_bo_alloc_and_map(executing_device_handle, IB_SIZE, 4096,
439                                         AMDGPU_GEM_DOMAIN_GTT, 0,
440                                         &ib_handle, (void**)&ib_cpu,
441                                         &ib_mc_address, &ib_va_handle);
442         if (r)
443                 return CUE_SINIT_FAILED;
444         
445         if (do_p2p) {
446                 r = amdgpu_device_initialize(drm_amdgpu[1],
447                                 &peer_exporting_device_major_version,
448                                 &peer_exporting_device_minor_version,
449                                 &peer_exporting_device_handle);
450                 
451                 if (r)
452                         return CUE_SINIT_FAILED;
453         }
454
455         reference_data = (uint8_t*)malloc(DMA_SIZE);
456         if (!reference_data)
457                 return CUE_SINIT_FAILED;
458         memset(reference_data, DMA_DATA_BYTE, DMA_SIZE);
459
460         return CUE_SUCCESS;
461 }
462
463 int suite_cp_dma_tests_clean() {
464         int r;
465
466         free(reference_data);
467
468         r = amdgpu_bo_unmap_and_free(ib_handle, ib_va_handle,
469                                  ib_mc_address, IB_SIZE);
470         if (r)
471                 return CUE_SCLEAN_FAILED;
472
473         r = amdgpu_cs_ctx_free(context_handle);
474         if (r)
475                 return CUE_SCLEAN_FAILED;
476
477         r = amdgpu_device_deinitialize(executing_device_handle);
478         if (r)
479                 return CUE_SCLEAN_FAILED;
480
481         if (do_p2p) {
482                 r = amdgpu_device_deinitialize(peer_exporting_device_handle);
483                 if (r)
484                         return CUE_SCLEAN_FAILED;
485         }
486
487         return CUE_SUCCESS;
488 }
489
490 CU_BOOL suite_cp_dma_tests_enable(void) {
491         int r = 0;
492
493         if (amdgpu_device_initialize(drm_amdgpu[0],
494                         &executing_device_major_version,
495                         &executing_device_minor_version,
496                         &executing_device_handle))
497                 return CU_FALSE;
498
499         if (!(executing_device_handle->info.family_id >= AMDGPU_FAMILY_AI &&
500                         executing_device_handle->info.family_id <= AMDGPU_FAMILY_NV)) {
501                 printf("Testing device has ASIC that is not supported by CP-DMA test suite!\n");
502                 return CU_FALSE;
503         }
504
505         if (amdgpu_device_deinitialize(executing_device_handle))
506                 return CU_FALSE;        
507
508         if (drm_amdgpu[1] >= 0) {
509                 r = amdgpu_device_initialize(drm_amdgpu[1],
510                                 &peer_exporting_device_major_version,
511                                 &peer_exporting_device_minor_version,
512                                 &peer_exporting_device_handle);
513                 
514                 if (r == 0 && (peer_exporting_device_handle->info.family_id >= AMDGPU_FAMILY_AI &&
515                                                 peer_exporting_device_handle->info.family_id <= AMDGPU_FAMILY_NV)) {
516                         do_p2p = true;
517                 }
518
519                 if (r == 0 && amdgpu_device_deinitialize(peer_exporting_device_handle) != 0) {
520                         printf("Deinitialize peer_exporting_device_handle failed!\n");
521                         return CU_FALSE;
522                 }
523         }
524
525         if (!do_p2p) {
526                 amdgpu_set_test_active("CP DMA Tests", "Peer to Peer CP DMA write VRAM to VRAM", CU_FALSE);
527                 amdgpu_set_test_active("CP DMA Tests", "Peer to Peer CP DMA write Host to VRAM", CU_FALSE);
528                 amdgpu_set_test_active("CP DMA Tests", "Peer to Peer CP DMA write VRAM to Host", CU_FALSE);
529                 printf("Peer device is not opened or has ASIC not supported by the suite, skip all Peer to Peer tests.\n");
530         }
531         
532         return CU_TRUE;
533 }