tests/amdgpu/cp_dma_tests.c

   1 /*
   2  * Copyright 2022 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22 */
  23
  24 #include <unistd.h>
  25 #include <stdio.h>
  26 #include <stdlib.h>
  27 #include <inttypes.h>
  28
  29 #include "CUnit/Basic.h"
  30
  31 #include "amdgpu_test.h"
  32 #include "amdgpu_drm.h"
  33 #include "amdgpu_internal.h"
  34
  35 #define IB_SIZE 4096
  36 #define MAX_RESOURCES 8
  37
  38 #define DMA_SIZE 4097
  39 #define DMA_DATA_BYTE 0xea
  40
  41 static bool do_p2p;
  42
  43 static amdgpu_device_handle executing_device_handle;
  44 static uint32_t executing_device_major_version;
  45 static uint32_t executing_device_minor_version;
  46
  47 static amdgpu_device_handle peer_exporting_device_handle;
  48 static uint32_t peer_exporting_device_major_version;
  49 static uint32_t peer_exporting_device_minor_version;
  50
  51 static amdgpu_context_handle context_handle;
  52 static amdgpu_bo_handle ib_handle;
  53 static uint32_t *ib_cpu;
  54 static uint64_t ib_mc_address;
  55 static amdgpu_va_handle ib_va_handle;
  56 static uint32_t num_dword;
  57
  58 static amdgpu_bo_handle resources[MAX_RESOURCES];
  59 static unsigned num_resources;
  60
  61 static uint8_t* reference_data;
  62
  63 static void amdgpu_cp_dma_host_to_vram(void);
  64 static void amdgpu_cp_dma_vram_to_host(void);
  65 static void amdgpu_cp_dma_p2p_vram_to_vram(void);
  66 static void amdgpu_cp_dma_p2p_host_to_vram(void);
  67 static void amdgpu_cp_dma_p2p_vram_to_host(void);
  68
  69 /**
  70  * Tests in cp dma test suite
  71  */
  72 CU_TestInfo cp_dma_tests[] = {
  73         { "CP DMA write Host to VRAM",  amdgpu_cp_dma_host_to_vram },
  74         { "CP DMA write VRAM to Host",  amdgpu_cp_dma_vram_to_host },
  75
  76         { "Peer to Peer CP DMA write VRAM to VRAM",  amdgpu_cp_dma_p2p_vram_to_vram },
  77         { "Peer to Peer CP DMA write Host to VRAM",  amdgpu_cp_dma_p2p_host_to_vram },
  78         { "Peer to Peer CP DMA write VRAM to Host",  amdgpu_cp_dma_p2p_vram_to_host },
  79         CU_TEST_INFO_NULL,
  80 };
  81
  82 struct amdgpu_cp_dma_bo{
  83         amdgpu_bo_handle buf_handle;
  84         amdgpu_va_handle va_handle;
  85         uint64_t gpu_va;
  86         uint64_t size;
  87 };
  88
  89 static int allocate_bo_and_va(amdgpu_device_handle dev,
  90                 uint64_t size, uint64_t alignment,
  91                 uint32_t heap, uint64_t alloc_flags,
  92                 struct amdgpu_cp_dma_bo *bo) {
  93         struct amdgpu_bo_alloc_request request = {};
  94         amdgpu_bo_handle buf_handle;
  95         amdgpu_va_handle va_handle;
  96         uint64_t vmc_addr;
  97         int r;
  98
  99         request.alloc_size = size;
 100         request.phys_alignment = alignment;
 101         request.preferred_heap = heap;
 102         request.flags = alloc_flags;
 103
 104         r = amdgpu_bo_alloc(dev, &request, &buf_handle);
 105         if (r)
 106                 goto error_bo_alloc;
 107
 108         r = amdgpu_va_range_alloc(dev, amdgpu_gpu_va_range_general,
 109                         size, alignment, 0,
 110                         &vmc_addr, &va_handle, 0);
 111         if (r)
 112                 goto error_va_alloc;
 113
 114         r = amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr,
 115                                                 AMDGPU_VM_PAGE_READABLE |
 116                                                         AMDGPU_VM_PAGE_WRITEABLE |
 117                                                         AMDGPU_VM_PAGE_EXECUTABLE,
 118                                                 AMDGPU_VA_OP_MAP);
 119         if (r)
 120                 goto error_va_map;
 121
 122         bo->buf_handle = buf_handle;
 123         bo->va_handle = va_handle;
 124         bo->gpu_va = vmc_addr;
 125         bo->size = size;
 126
 127         return 0;
 128
 129 error_va_map:
 130         amdgpu_bo_va_op(buf_handle, 0,
 131                         size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
 132
 133 error_va_alloc:
 134         amdgpu_va_range_free(va_handle);
 135
 136 error_bo_alloc:
 137         amdgpu_bo_free(buf_handle);
 138
 139         return r;
 140 }
 141
 142 static int import_dma_buf_to_bo(amdgpu_device_handle dev,
 143                 int dmabuf_fd, struct amdgpu_cp_dma_bo *bo) {
 144         amdgpu_va_handle va_handle;
 145         uint64_t vmc_addr;
 146         int r;
 147         struct amdgpu_bo_import_result bo_import_result = {};
 148
 149         r = amdgpu_bo_import(dev, amdgpu_bo_handle_type_dma_buf_fd,
 150                         dmabuf_fd, &bo_import_result);
 151         if (r)
 152                 goto error_bo_import;
 153
 154         r = amdgpu_va_range_alloc(dev, amdgpu_gpu_va_range_general,
 155                                 bo_import_result.alloc_size, 0, 0,
 156                                 &vmc_addr, &va_handle, 0);
 157         if (r)
 158                 goto error_va_alloc;
 159
 160         r = amdgpu_bo_va_op(bo_import_result.buf_handle, 0,
 161                         bo_import_result.alloc_size, vmc_addr,
 162                         AMDGPU_VM_PAGE_READABLE |
 163                                 AMDGPU_VM_PAGE_WRITEABLE |
 164                                 AMDGPU_VM_PAGE_EXECUTABLE,
 165                         AMDGPU_VA_OP_MAP);
 166         if (r)
 167                 goto error_va_map;
 168
 169         bo->buf_handle = bo_import_result.buf_handle;
 170         bo->va_handle = va_handle;
 171         bo->gpu_va = vmc_addr;
 172         bo->size = bo_import_result.alloc_size;
 173
 174         return 0;
 175
 176 error_va_map:
 177         amdgpu_bo_va_op(bo_import_result.buf_handle, 0,
 178                         bo_import_result.alloc_size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
 179
 180 error_va_alloc:
 181         amdgpu_va_range_free(va_handle);
 182
 183 error_bo_import:
 184         amdgpu_bo_free(bo_import_result.buf_handle);
 185
 186         return r;
 187 }
 188
 189 static int free_bo(struct amdgpu_cp_dma_bo bo) {
 190         int r;
 191         r = amdgpu_bo_va_op(bo.buf_handle, 0,
 192                         bo.size, bo.gpu_va, 0, AMDGPU_VA_OP_UNMAP);
 193         if(r)
 194                 return r;
 195
 196         r = amdgpu_va_range_free(bo.va_handle);
 197         if(r)
 198                 return r;
 199
 200         r = amdgpu_bo_free(bo.buf_handle);
 201         if(r)
 202                 return r;
 203
 204         return 0;
 205 }
 206
 207 static int submit_and_sync() {
 208         struct amdgpu_cs_request ibs_request = {0};
 209         struct amdgpu_cs_ib_info ib_info = {0};
 210         struct amdgpu_cs_fence fence_status = {0};
 211         uint32_t expired;
 212         uint32_t family_id, chip_id, chip_rev;
 213         unsigned gc_ip_type;
 214         int r;
 215
 216         r = amdgpu_bo_list_create(executing_device_handle,
 217                         num_resources, resources,
 218                         NULL, &ibs_request.resources);
 219         if (r)
 220                 return r;
 221
 222         family_id = executing_device_handle->info.family_id;
 223         chip_id = executing_device_handle->info.chip_external_rev;
 224         chip_rev = executing_device_handle->info.chip_rev;
 225
 226         gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ?
 227                 AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX;
 228
 229         ib_info.ib_mc_address = ib_mc_address;
 230         ib_info.size = num_dword;
 231
 232         ibs_request.ip_type = gc_ip_type;
 233         ibs_request.number_of_ibs = 1;
 234         ibs_request.ibs = &ib_info;
 235         ibs_request.fence_info.handle = NULL;
 236
 237         r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
 238         if (r)
 239                 return r;
 240
 241         r = amdgpu_bo_list_destroy(ibs_request.resources);
 242         if (r)
 243                 return r;
 244
 245         fence_status.context = context_handle;
 246         fence_status.ip_type = gc_ip_type;
 247         fence_status.fence = ibs_request.seq_no;
 248
 249         r = amdgpu_cs_query_fence_status(&fence_status,
 250                         AMDGPU_TIMEOUT_INFINITE,
 251                         0, &expired);
 252         if (r)
 253                 return r;
 254
 255         return 0;
 256 }
 257
 258 static void cp_dma_cmd(struct amdgpu_cp_dma_bo src_bo,
 259                 struct amdgpu_cp_dma_bo dst_bo) {
 260         _Static_assert(DMA_SIZE < (1 << 26), "DMA size exceeds CP DMA maximium!");
 261
 262         ib_cpu[0] = 0xc0055000;
 263         ib_cpu[1] = 0x80000000;
 264         ib_cpu[2] = src_bo.gpu_va & 0x00000000ffffffff;
 265         ib_cpu[3] = (src_bo.gpu_va & 0xffffffff00000000) >> 32;
 266         ib_cpu[4] = dst_bo.gpu_va & 0x00000000ffffffff;
 267         ib_cpu[5] = (dst_bo.gpu_va & 0xffffffff00000000) >> 32;
 268         // size is read from the lower 26bits.
 269         ib_cpu[6] = ((1 << 26) - 1) & DMA_SIZE;
 270         ib_cpu[7] = 0xffff1000;
 271
 272         num_dword = 8;
 273
 274         resources[0] = src_bo.buf_handle;
 275         resources[1] = dst_bo.buf_handle;
 276         resources[2] = ib_handle;
 277         num_resources = 3;
 278 }
 279
 280 static void amdgpu_cp_dma(uint32_t src_heap, uint32_t dst_heap) {
 281         int r;
 282         struct amdgpu_cp_dma_bo src_bo = {0};
 283         struct amdgpu_cp_dma_bo dst_bo = {0};
 284         void *src_bo_cpu;
 285         void *dst_bo_cpu;
 286
 287         /* allocate the src bo, set its data to DMA_DATA_BYTE */
 288         r = allocate_bo_and_va(executing_device_handle, DMA_SIZE, 4096,
 289                         src_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &src_bo);
 290         CU_ASSERT_EQUAL(r, 0);
 291
 292         r = amdgpu_bo_cpu_map(src_bo.buf_handle, (void **)&src_bo_cpu);
 293         CU_ASSERT_EQUAL(r, 0);
 294         memset(src_bo_cpu, DMA_DATA_BYTE, DMA_SIZE);
 295
 296         r = amdgpu_bo_cpu_unmap(src_bo.buf_handle);
 297         CU_ASSERT_EQUAL(r, 0);
 298
 299         /* allocate the dst bo and clear its content to all 0 */
 300         r = allocate_bo_and_va(executing_device_handle, DMA_SIZE, 4096,
 301                         dst_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &dst_bo);
 302         CU_ASSERT_EQUAL(r, 0);
 303
 304         r = amdgpu_bo_cpu_map(dst_bo.buf_handle, (void **)&dst_bo_cpu);
 305         CU_ASSERT_EQUAL(r, 0);
 306
 307         _Static_assert(DMA_DATA_BYTE != 0, "Initialization data should be different from DMA data!");
 308         memset(dst_bo_cpu, 0, DMA_SIZE);
 309
 310         /* record CP DMA command and dispatch the command */
 311         cp_dma_cmd(src_bo, dst_bo);
 312
 313         r = submit_and_sync();
 314         CU_ASSERT_EQUAL(r, 0);
 315
 316         /* verify the dst bo is filled with DMA_DATA_BYTE */
 317         CU_ASSERT_EQUAL(memcmp(dst_bo_cpu, reference_data, DMA_SIZE) == 0, true);
 318
 319         r = amdgpu_bo_cpu_unmap(dst_bo.buf_handle);
 320         CU_ASSERT_EQUAL(r, 0);
 321
 322         r = free_bo(src_bo);
 323         CU_ASSERT_EQUAL(r, 0);
 324
 325         r = free_bo(dst_bo);
 326         CU_ASSERT_EQUAL(r, 0);
 327 }
 328
 329 static void amdgpu_cp_dma_p2p(uint32_t src_heap, uint32_t dst_heap) {
 330         int r;
 331         struct amdgpu_cp_dma_bo exported_bo = {0};
 332         int dma_buf_fd;
 333         int dma_buf_fd_dup;
 334         struct amdgpu_cp_dma_bo src_bo = {0};
 335         struct amdgpu_cp_dma_bo imported_dst_bo = {0};
 336         void *exported_bo_cpu;
 337         void *src_bo_cpu;
 338
 339         /* allocate a bo on the peer device and export it to dma-buf */
 340         r = allocate_bo_and_va(peer_exporting_device_handle, DMA_SIZE, 4096,
 341                         src_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &exported_bo);
 342         CU_ASSERT_EQUAL(r, 0);
 343
 344         /* map the exported bo and clear its content to 0 */
 345         _Static_assert(DMA_DATA_BYTE != 0, "Initialization data should be different from DMA data!");
 346         r = amdgpu_bo_cpu_map(exported_bo.buf_handle, (void **)&exported_bo_cpu);
 347         CU_ASSERT_EQUAL(r, 0);
 348         memset(exported_bo_cpu, 0, DMA_SIZE);
 349
 350         r = amdgpu_bo_export(exported_bo.buf_handle,
 351                         amdgpu_bo_handle_type_dma_buf_fd, (uint32_t*)&dma_buf_fd);
 352         CU_ASSERT_EQUAL(r, 0);
 353
 354     // According to amdgpu_drm:
 355         // "Buffer must be "imported" only using new "fd"
 356         // (different from one used by "exporter")"
 357         dma_buf_fd_dup = dup(dma_buf_fd);
 358         r = close(dma_buf_fd);
 359         CU_ASSERT_EQUAL(r, 0);
 360
 361         /* import the dma-buf to the executing device, imported bo is the DMA destination */
 362         r = import_dma_buf_to_bo(
 363                         executing_device_handle, dma_buf_fd_dup, &imported_dst_bo);
 364         CU_ASSERT_EQUAL(r, 0);
 365
 366         r = close(dma_buf_fd_dup);
 367         CU_ASSERT_EQUAL(r, 0);
 368
 369         /* allocate the src bo and set its content to DMA_DATA_BYTE */
 370         r = allocate_bo_and_va(executing_device_handle, DMA_SIZE, 4096,
 371                         dst_heap, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &src_bo);
 372         CU_ASSERT_EQUAL(r, 0);
 373
 374         r = amdgpu_bo_cpu_map(src_bo.buf_handle, (void **)&src_bo_cpu);
 375         CU_ASSERT_EQUAL(r, 0);
 376
 377         memset(src_bo_cpu, DMA_DATA_BYTE, DMA_SIZE);
 378
 379         r = amdgpu_bo_cpu_unmap(src_bo.buf_handle);
 380         CU_ASSERT_EQUAL(r, 0);
 381
 382         /* record CP DMA command and dispatch the command */
 383         cp_dma_cmd(src_bo, imported_dst_bo);
 384
 385         r = submit_and_sync();
 386         CU_ASSERT_EQUAL(r, 0);
 387
 388         /* verify the bo from the peer device is filled with DMA_DATA_BYTE */
 389         CU_ASSERT_EQUAL(memcmp(exported_bo_cpu, reference_data, DMA_SIZE) == 0, true);
 390
 391         r = amdgpu_bo_cpu_unmap(exported_bo.buf_handle);
 392         CU_ASSERT_EQUAL(r, 0);
 393
 394         r = free_bo(exported_bo);
 395         CU_ASSERT_EQUAL(r, 0);
 396
 397         r = free_bo(imported_dst_bo);
 398         CU_ASSERT_EQUAL(r, 0);
 399
 400         r = free_bo(src_bo);
 401         CU_ASSERT_EQUAL(r, 0);
 402 }
 403
 404 static void amdgpu_cp_dma_host_to_vram(void) {
 405         amdgpu_cp_dma(AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_DOMAIN_VRAM);
 406 }
 407
 408 static void amdgpu_cp_dma_vram_to_host(void) {
 409         amdgpu_cp_dma(AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_GTT);
 410 }
 411
 412 static void amdgpu_cp_dma_p2p_vram_to_vram(void) {
 413         amdgpu_cp_dma_p2p(AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM);
 414 }
 415
 416 static void amdgpu_cp_dma_p2p_host_to_vram(void) {
 417         amdgpu_cp_dma_p2p(AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_DOMAIN_VRAM);
 418 }
 419
 420 static void amdgpu_cp_dma_p2p_vram_to_host(void) {
 421         amdgpu_cp_dma_p2p(AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_GTT);
 422 }
 423
 424 int suite_cp_dma_tests_init() {
 425         int r;
 426
 427         r = amdgpu_device_initialize(drm_amdgpu[0],
 428                         &executing_device_major_version,
 429                         &executing_device_minor_version,
 430                         &executing_device_handle);
 431         if (r)
 432                 return CUE_SINIT_FAILED;
 433
 434         r = amdgpu_cs_ctx_create(executing_device_handle, &context_handle);
 435         if (r)
 436                 return CUE_SINIT_FAILED;
 437
 438         r = amdgpu_bo_alloc_and_map(executing_device_handle, IB_SIZE, 4096,
 439                                         AMDGPU_GEM_DOMAIN_GTT, 0,
 440                                         &ib_handle, (void**)&ib_cpu,
 441                                         &ib_mc_address, &ib_va_handle);
 442         if (r)
 443                 return CUE_SINIT_FAILED;
 444
 445         if (do_p2p) {
 446                 r = amdgpu_device_initialize(drm_amdgpu[1],
 447                                 &peer_exporting_device_major_version,
 448                                 &peer_exporting_device_minor_version,
 449                                 &peer_exporting_device_handle);
 450
 451                 if (r)
 452                         return CUE_SINIT_FAILED;
 453         }
 454
 455         reference_data = (uint8_t*)malloc(DMA_SIZE);
 456         if (!reference_data)
 457                 return CUE_SINIT_FAILED;
 458         memset(reference_data, DMA_DATA_BYTE, DMA_SIZE);
 459
 460         return CUE_SUCCESS;
 461 }
 462
 463 int suite_cp_dma_tests_clean() {
 464         int r;
 465
 466         free(reference_data);
 467
 468         r = amdgpu_bo_unmap_and_free(ib_handle, ib_va_handle,
 469                                  ib_mc_address, IB_SIZE);
 470         if (r)
 471                 return CUE_SCLEAN_FAILED;
 472
 473         r = amdgpu_cs_ctx_free(context_handle);
 474         if (r)
 475                 return CUE_SCLEAN_FAILED;
 476
 477         r = amdgpu_device_deinitialize(executing_device_handle);
 478         if (r)
 479                 return CUE_SCLEAN_FAILED;
 480
 481         if (do_p2p) {
 482                 r = amdgpu_device_deinitialize(peer_exporting_device_handle);
 483                 if (r)
 484                         return CUE_SCLEAN_FAILED;
 485         }
 486
 487         return CUE_SUCCESS;
 488 }
 489
 490 CU_BOOL suite_cp_dma_tests_enable(void) {
 491         int r = 0;
 492
 493         if (amdgpu_device_initialize(drm_amdgpu[0],
 494                         &executing_device_major_version,
 495                         &executing_device_minor_version,
 496                         &executing_device_handle))
 497                 return CU_FALSE;
 498
 499         if (!(executing_device_handle->info.family_id >= AMDGPU_FAMILY_AI &&
 500                         executing_device_handle->info.family_id <= AMDGPU_FAMILY_NV)) {
 501                 printf("Testing device has ASIC that is not supported by CP-DMA test suite!\n");
 502                 return CU_FALSE;
 503         }
 504
 505         if (amdgpu_device_deinitialize(executing_device_handle))
 506                 return CU_FALSE;
 507
 508         if (drm_amdgpu[1] >= 0) {
 509                 r = amdgpu_device_initialize(drm_amdgpu[1],
 510                                 &peer_exporting_device_major_version,
 511                                 &peer_exporting_device_minor_version,
 512                                 &peer_exporting_device_handle);
 513
 514                 if (r == 0 && (peer_exporting_device_handle->info.family_id >= AMDGPU_FAMILY_AI &&
 515                                                 peer_exporting_device_handle->info.family_id <= AMDGPU_FAMILY_NV)) {
 516                         do_p2p = true;
 517                 }
 518
 519                 if (r == 0 && amdgpu_device_deinitialize(peer_exporting_device_handle) != 0) {
 520                         printf("Deinitialize peer_exporting_device_handle failed!\n");
 521                         return CU_FALSE;
 522                 }
 523         }
 524
 525         if (!do_p2p) {
 526                 amdgpu_set_test_active("CP DMA Tests", "Peer to Peer CP DMA write VRAM to VRAM", CU_FALSE);
 527                 amdgpu_set_test_active("CP DMA Tests", "Peer to Peer CP DMA write Host to VRAM", CU_FALSE);
 528                 amdgpu_set_test_active("CP DMA Tests", "Peer to Peer CP DMA write VRAM to Host", CU_FALSE);
 529                 printf("Peer device is not opened or has ASIC not supported by the suite, skip all Peer to Peer tests.\n");
 530         }
 531
 532         return CU_TRUE;
 533 }