2 * Copyright 2022 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
29 #include "CUnit/Basic.h"
31 #include "amdgpu_test.h"
32 #include "amdgpu_drm.h"
33 #include "amdgpu_internal.h"
36 #define MAX_RESOURCES 8
39 #define DMA_DATA_BYTE 0xea
43 static amdgpu_device_handle executing_device_handle;
44 static uint32_t executing_device_major_version;
45 static uint32_t executing_device_minor_version;
47 static amdgpu_device_handle peer_exporting_device_handle;
48 static uint32_t peer_exporting_device_major_version;
49 static uint32_t peer_exporting_device_minor_version;
51 static amdgpu_context_handle context_handle;
52 static amdgpu_bo_handle ib_handle;
53 static uint32_t *ib_cpu;
54 static uint64_t ib_mc_address;
55 static amdgpu_va_handle ib_va_handle;
56 static uint32_t num_dword;
58 static amdgpu_bo_handle resources[MAX_RESOURCES];
59 static unsigned num_resources;
61 static uint8_t* reference_data;
63 static void amdgpu_cp_dma_host_to_vram(void);
64 static void amdgpu_cp_dma_vram_to_host(void);
65 static void amdgpu_cp_dma_p2p_vram_to_vram(void);
66 static void amdgpu_cp_dma_p2p_host_to_vram(void);
67 static void amdgpu_cp_dma_p2p_vram_to_host(void);
70 * Tests in cp dma test suite
72 CU_TestInfo cp_dma_tests[] = {
73 { "CP DMA write Host to VRAM", amdgpu_cp_dma_host_to_vram },
74 { "CP DMA write VRAM to Host", amdgpu_cp_dma_vram_to_host },
76 { "Peer to Peer CP DMA write VRAM to VRAM", amdgpu_cp_dma_p2p_vram_to_vram },
77 { "Peer to Peer CP DMA write Host to VRAM", amdgpu_cp_dma_p2p_host_to_vram },
78 { "Peer to Peer CP DMA write VRAM to Host", amdgpu_cp_dma_p2p_vram_to_host },
82 struct amdgpu_cp_dma_bo{
83 amdgpu_bo_handle buf_handle;
84 amdgpu_va_handle va_handle;
89 static int allocate_bo_and_va(amdgpu_device_handle dev,
90 uint64_t size, uint64_t alignment,
91 uint32_t heap, uint64_t alloc_flags,
92 struct amdgpu_cp_dma_bo *bo) {
93 struct amdgpu_bo_alloc_request request = {};
94 amdgpu_bo_handle buf_handle;
95 amdgpu_va_handle va_handle;
99 request.alloc_size = size;
100 request.phys_alignment = alignment;
101 request.preferred_heap = heap;
102 request.flags = alloc_flags;
104 r = amdgpu_bo_alloc(dev, &request, &buf_handle);
108 r = amdgpu_va_range_alloc(dev, amdgpu_gpu_va_range_general,
110 &vmc_addr, &va_handle, 0);
114 r = amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr,
115 AMDGPU_VM_PAGE_READABLE |
116 AMDGPU_VM_PAGE_WRITEABLE |
117 AMDGPU_VM_PAGE_EXECUTABLE,
122 bo->buf_handle = buf_handle;
123 bo->va_handle = va_handle;
124 bo->gpu_va = vmc_addr;
130 amdgpu_bo_va_op(buf_handle, 0,
131 size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
134 amdgpu_va_range_free(va_handle);
137 amdgpu_bo_free(buf_handle);
142 static int import_dma_buf_to_bo(amdgpu_device_handle dev,
143 int dmabuf_fd, struct amdgpu_cp_dma_bo *bo) {
144 amdgpu_va_handle va_handle;
147 struct amdgpu_bo_import_result bo_import_result = {};
149 r = amdgpu_bo_import(dev, amdgpu_bo_handle_type_dma_buf_fd,
150 dmabuf_fd, &bo_import_result);
152 goto error_bo_import;
154 r = amdgpu_va_range_alloc(dev, amdgpu_gpu_va_range_general,
155 bo_import_result.alloc_size, 0, 0,
156 &vmc_addr, &va_handle, 0);
160 r = amdgpu_bo_va_op(bo_import_result.buf_handle, 0,
161 bo_import_result.alloc_size, vmc_addr,
162 AMDGPU_VM_PAGE_READABLE |
163 AMDGPU_VM_PAGE_WRITEABLE |
164 AMDGPU_VM_PAGE_EXECUTABLE,
169 bo->buf_handle = bo_import_result.buf_handle;
170 bo->va_handle = va_handle;
171 bo->gpu_va = vmc_addr;
172 bo->size = bo_import_result.alloc_size;
177 amdgpu_bo_va_op(bo_import_result.buf_handle, 0,
178 bo_import_result.alloc_size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
181 amdgpu_va_range_free(va_handle);
184 amdgpu_bo_free(bo_import_result.buf_handle);
189 static int free_bo(struct amdgpu_cp_dma_bo bo) {
191 r = amdgpu_bo_va_op(bo.buf_handle, 0,
192 bo.size, bo.gpu_va, 0, AMDGPU_VA_OP_UNMAP);
196 r = amdgpu_va_range_free(bo.va_handle);
200 r = amdgpu_bo_free(bo.buf_handle);
207 static int submit_and_sync() {
208 struct amdgpu_cs_request ibs_request = {0};
209 struct amdgpu_cs_ib_info ib_info = {0};
210 struct amdgpu_cs_fence fence_status = {0};
212 uint32_t family_id, chip_id, chip_rev;
216 r = amdgpu_bo_list_create(executing_device_handle,
217 num_resources, resources,
218 NULL, &ibs_request.resources);
222 family_id = executing_device_handle->info.family_id;
223 chip_id = executing_device_handle->info.chip_external_rev;
224 chip_rev = executing_device_handle->info.chip_rev;
226 gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ?
227 AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX;
229 ib_info.ib_mc_address = ib_mc_address;
230 ib_info.size = num_dword;
232 ibs_request.ip_type = gc_ip_type;
233 ibs_request.number_of_ibs = 1;
234 ibs_request.ibs = &ib_info;
235 ibs_request.fence_info.handle = NULL;
237 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
241 r = amdgpu_bo_list_destroy(ibs_request.resources);
245 fence_status.context = context_handle;
246 fence_status.ip_type = gc_ip_type;
247 fence_status.fence = ibs_request.seq_no;
249 r = amdgpu_cs_query_fence_status(&fence_status,
250 AMDGPU_TIMEOUT_INFINITE,
258 static void cp_dma_cmd(struct amdgpu_cp_dma_bo src_bo,
259 struct amdgpu_cp_dma_bo dst_bo) {
260 _Static_assert(DMA_SIZE < (1 << 26), "DMA size exceeds CP DMA maximium!");
262 ib_cpu[0] = 0xc0055000;
263 ib_cpu[1] = 0x80000000;
264 ib_cpu[2] = src_bo.gpu_va & 0x00000000ffffffff;
265 ib_cpu[3] = (src_bo.gpu_va & 0xffffffff00000000) >> 32;
266 ib_cpu[4] = dst_bo.gpu_va & 0x00000000ffffffff;
267 ib_cpu[5] = (dst_bo.gpu_va & 0xffffffff00000000) >> 32;
268 // size is read from the lower 26bits.
269 ib_cpu[6] = ((1 << 26) - 1) & DMA_SIZE;
270 ib_cpu[7] = 0xffff1000;
274 resources[0] = src_bo.buf_handle;
275 resources[1] = dst_bo.buf_handle;
276 resources[2] = ib_handle;
280 static void amdgpu_cp_dma(uint32_t src_heap, uint32_t dst_heap) {
282 struct amdgpu_cp_dma_bo src_bo = {0};
283 struct amdgpu_cp_dma_bo dst_bo = {0};
287 /* allocate the src bo, set its data to DMA_DATA_BYTE */
288 r = allocate_bo_and_va(executing_device_handle, DMA_SIZE, 4096,
289 src_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &src_bo);
290 CU_ASSERT_EQUAL(r, 0);
292 r = amdgpu_bo_cpu_map(src_bo.buf_handle, (void **)&src_bo_cpu);
293 CU_ASSERT_EQUAL(r, 0);
294 memset(src_bo_cpu, DMA_DATA_BYTE, DMA_SIZE);
296 r = amdgpu_bo_cpu_unmap(src_bo.buf_handle);
297 CU_ASSERT_EQUAL(r, 0);
299 /* allocate the dst bo and clear its content to all 0 */
300 r = allocate_bo_and_va(executing_device_handle, DMA_SIZE, 4096,
301 dst_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &dst_bo);
302 CU_ASSERT_EQUAL(r, 0);
304 r = amdgpu_bo_cpu_map(dst_bo.buf_handle, (void **)&dst_bo_cpu);
305 CU_ASSERT_EQUAL(r, 0);
307 _Static_assert(DMA_DATA_BYTE != 0, "Initialization data should be different from DMA data!");
308 memset(dst_bo_cpu, 0, DMA_SIZE);
310 /* record CP DMA command and dispatch the command */
311 cp_dma_cmd(src_bo, dst_bo);
313 r = submit_and_sync();
314 CU_ASSERT_EQUAL(r, 0);
316 /* verify the dst bo is filled with DMA_DATA_BYTE */
317 CU_ASSERT_EQUAL(memcmp(dst_bo_cpu, reference_data, DMA_SIZE) == 0, true);
319 r = amdgpu_bo_cpu_unmap(dst_bo.buf_handle);
320 CU_ASSERT_EQUAL(r, 0);
323 CU_ASSERT_EQUAL(r, 0);
326 CU_ASSERT_EQUAL(r, 0);
329 static void amdgpu_cp_dma_p2p(uint32_t src_heap, uint32_t dst_heap) {
331 struct amdgpu_cp_dma_bo exported_bo = {0};
334 struct amdgpu_cp_dma_bo src_bo = {0};
335 struct amdgpu_cp_dma_bo imported_dst_bo = {0};
336 void *exported_bo_cpu;
339 /* allocate a bo on the peer device and export it to dma-buf */
340 r = allocate_bo_and_va(peer_exporting_device_handle, DMA_SIZE, 4096,
341 src_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &exported_bo);
342 CU_ASSERT_EQUAL(r, 0);
344 /* map the exported bo and clear its content to 0 */
345 _Static_assert(DMA_DATA_BYTE != 0, "Initialization data should be different from DMA data!");
346 r = amdgpu_bo_cpu_map(exported_bo.buf_handle, (void **)&exported_bo_cpu);
347 CU_ASSERT_EQUAL(r, 0);
348 memset(exported_bo_cpu, 0, DMA_SIZE);
350 r = amdgpu_bo_export(exported_bo.buf_handle,
351 amdgpu_bo_handle_type_dma_buf_fd, (uint32_t*)&dma_buf_fd);
352 CU_ASSERT_EQUAL(r, 0);
354 // According to amdgpu_drm:
355 // "Buffer must be "imported" only using new "fd"
356 // (different from one used by "exporter")"
357 dma_buf_fd_dup = dup(dma_buf_fd);
358 r = close(dma_buf_fd);
359 CU_ASSERT_EQUAL(r, 0);
361 /* import the dma-buf to the executing device, imported bo is the DMA destination */
362 r = import_dma_buf_to_bo(
363 executing_device_handle, dma_buf_fd_dup, &imported_dst_bo);
364 CU_ASSERT_EQUAL(r, 0);
366 r = close(dma_buf_fd_dup);
367 CU_ASSERT_EQUAL(r, 0);
369 /* allocate the src bo and set its content to DMA_DATA_BYTE */
370 r = allocate_bo_and_va(executing_device_handle, DMA_SIZE, 4096,
371 dst_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &src_bo);
372 CU_ASSERT_EQUAL(r, 0);
374 r = amdgpu_bo_cpu_map(src_bo.buf_handle, (void **)&src_bo_cpu);
375 CU_ASSERT_EQUAL(r, 0);
377 memset(src_bo_cpu, DMA_DATA_BYTE, DMA_SIZE);
379 r = amdgpu_bo_cpu_unmap(src_bo.buf_handle);
380 CU_ASSERT_EQUAL(r, 0);
382 /* record CP DMA command and dispatch the command */
383 cp_dma_cmd(src_bo, imported_dst_bo);
385 r = submit_and_sync();
386 CU_ASSERT_EQUAL(r, 0);
388 /* verify the bo from the peer device is filled with DMA_DATA_BYTE */
389 CU_ASSERT_EQUAL(memcmp(exported_bo_cpu, reference_data, DMA_SIZE) == 0, true);
391 r = amdgpu_bo_cpu_unmap(exported_bo.buf_handle);
392 CU_ASSERT_EQUAL(r, 0);
394 r = free_bo(exported_bo);
395 CU_ASSERT_EQUAL(r, 0);
397 r = free_bo(imported_dst_bo);
398 CU_ASSERT_EQUAL(r, 0);
401 CU_ASSERT_EQUAL(r, 0);
404 static void amdgpu_cp_dma_host_to_vram(void) {
405 amdgpu_cp_dma(AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_DOMAIN_VRAM);
408 static void amdgpu_cp_dma_vram_to_host(void) {
409 amdgpu_cp_dma(AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_GTT);
412 static void amdgpu_cp_dma_p2p_vram_to_vram(void) {
413 amdgpu_cp_dma_p2p(AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM);
416 static void amdgpu_cp_dma_p2p_host_to_vram(void) {
417 amdgpu_cp_dma_p2p(AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_DOMAIN_VRAM);
420 static void amdgpu_cp_dma_p2p_vram_to_host(void) {
421 amdgpu_cp_dma_p2p(AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_GTT);
424 int suite_cp_dma_tests_init() {
427 r = amdgpu_device_initialize(drm_amdgpu[0],
428 &executing_device_major_version,
429 &executing_device_minor_version,
430 &executing_device_handle);
432 return CUE_SINIT_FAILED;
434 r = amdgpu_cs_ctx_create(executing_device_handle, &context_handle);
436 return CUE_SINIT_FAILED;
438 r = amdgpu_bo_alloc_and_map(executing_device_handle, IB_SIZE, 4096,
439 AMDGPU_GEM_DOMAIN_GTT, 0,
440 &ib_handle, (void**)&ib_cpu,
441 &ib_mc_address, &ib_va_handle);
443 return CUE_SINIT_FAILED;
446 r = amdgpu_device_initialize(drm_amdgpu[1],
447 &peer_exporting_device_major_version,
448 &peer_exporting_device_minor_version,
449 &peer_exporting_device_handle);
452 return CUE_SINIT_FAILED;
455 reference_data = (uint8_t*)malloc(DMA_SIZE);
457 return CUE_SINIT_FAILED;
458 memset(reference_data, DMA_DATA_BYTE, DMA_SIZE);
463 int suite_cp_dma_tests_clean() {
466 free(reference_data);
468 r = amdgpu_bo_unmap_and_free(ib_handle, ib_va_handle,
469 ib_mc_address, IB_SIZE);
471 return CUE_SCLEAN_FAILED;
473 r = amdgpu_cs_ctx_free(context_handle);
475 return CUE_SCLEAN_FAILED;
477 r = amdgpu_device_deinitialize(executing_device_handle);
479 return CUE_SCLEAN_FAILED;
482 r = amdgpu_device_deinitialize(peer_exporting_device_handle);
484 return CUE_SCLEAN_FAILED;
490 CU_BOOL suite_cp_dma_tests_enable(void) {
493 if (amdgpu_device_initialize(drm_amdgpu[0],
494 &executing_device_major_version,
495 &executing_device_minor_version,
496 &executing_device_handle))
499 if (!(executing_device_handle->info.family_id >= AMDGPU_FAMILY_AI &&
500 executing_device_handle->info.family_id <= AMDGPU_FAMILY_NV)) {
501 printf("Testing device has ASIC that is not supported by CP-DMA test suite!\n");
505 if (amdgpu_device_deinitialize(executing_device_handle))
508 if (drm_amdgpu[1] >= 0) {
509 r = amdgpu_device_initialize(drm_amdgpu[1],
510 &peer_exporting_device_major_version,
511 &peer_exporting_device_minor_version,
512 &peer_exporting_device_handle);
514 if (r == 0 && (peer_exporting_device_handle->info.family_id >= AMDGPU_FAMILY_AI &&
515 peer_exporting_device_handle->info.family_id <= AMDGPU_FAMILY_NV)) {
519 if (r == 0 && amdgpu_device_deinitialize(peer_exporting_device_handle) != 0) {
520 printf("Deinitialize peer_exporting_device_handle failed!\n");
526 amdgpu_set_test_active("CP DMA Tests", "Peer to Peer CP DMA write VRAM to VRAM", CU_FALSE);
527 amdgpu_set_test_active("CP DMA Tests", "Peer to Peer CP DMA write Host to VRAM", CU_FALSE);
528 amdgpu_set_test_active("CP DMA Tests", "Peer to Peer CP DMA write VRAM to Host", CU_FALSE);
529 printf("Peer device is not opened or has ASIC not supported by the suite, skip all Peer to Peer tests.\n");