amdgpu/amdgpu_cs.c

   1 /*
   2  * Copyright 2014 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22 */
  23 #include <stdlib.h>
  24 #include <stdio.h>
  25 #include <string.h>
  26 #include <errno.h>
  27 #include <pthread.h>
  28 #include <sched.h>
  29 #include <sys/ioctl.h>
  30
  31 #include "xf86drm.h"
  32 #include "amdgpu_drm.h"
  33 #include "amdgpu_internal.h"
  34
  35 /**
  36  * Create an IB buffer.
  37  *
  38  * \param   dev - \c [in] Device handle
  39  * \param   context - \c [in] GPU Context
  40  * \param   ib_size - \c [in] Size of allocation
  41  * \param   ib - \c [out] return the pointer to the created IB buffer
  42  *
  43  * \return  0 on success otherwise POSIX Error code
  44 */
  45 static int amdgpu_cs_create_ib(amdgpu_context_handle context,
  46                                enum amdgpu_cs_ib_size ib_size,
  47                                amdgpu_ib_handle *ib)
  48 {
  49         struct amdgpu_bo_alloc_request alloc_buffer;
  50         struct amdgpu_bo_alloc_result info;
  51         int r;
  52         void *cpu;
  53         struct amdgpu_ib *new_ib;
  54
  55         memset(&alloc_buffer, 0, sizeof(alloc_buffer));
  56
  57         switch (ib_size) {
  58         case amdgpu_cs_ib_size_4K:
  59                 alloc_buffer.alloc_size = 4 * 1024;
  60                 break;
  61         case amdgpu_cs_ib_size_16K:
  62                 alloc_buffer.alloc_size = 16 * 1024;
  63                 break;
  64         case amdgpu_cs_ib_size_32K:
  65                 alloc_buffer.alloc_size = 32 * 1024;
  66                 break;
  67         case amdgpu_cs_ib_size_64K:
  68                 alloc_buffer.alloc_size = 64 * 1024;
  69                 break;
  70         case amdgpu_cs_ib_size_128K:
  71                 alloc_buffer.alloc_size = 128 * 1024;
  72                 break;
  73         default:
  74                 return -EINVAL;
  75         }
  76
  77         alloc_buffer.phys_alignment = 4 * 1024;
  78
  79         alloc_buffer.preferred_heap = AMDGPU_GEM_DOMAIN_GTT;
  80
  81         r = amdgpu_bo_alloc(context->dev,
  82                             &alloc_buffer,
  83                             &info);
  84         if (r)
  85                 return r;
  86
  87         r = amdgpu_bo_cpu_map(info.buf_handle, &cpu);
  88         if (r) {
  89                 amdgpu_bo_free(info.buf_handle);
  90                 return r;
  91         }
  92
  93         new_ib = malloc(sizeof(struct amdgpu_ib));
  94         if (NULL == new_ib) {
  95                 amdgpu_bo_cpu_unmap(info.buf_handle);
  96                 amdgpu_bo_free(info.buf_handle);
  97                 return -ENOMEM;
  98         }
  99
 100         new_ib->context = context;
 101         new_ib->buf_handle = info.buf_handle;
 102         new_ib->cpu = cpu;
 103         new_ib->virtual_mc_base_address = info.virtual_mc_base_address;
 104         new_ib->ib_size = ib_size;
 105         *ib = new_ib;
 106         return 0;
 107 }
 108
 109 /**
 110  * Destroy an IB buffer.
 111  *
 112  * \param   dev - \c [in]  Device handle
 113  * \param   ib - \c [in] the IB buffer
 114  *
 115  * \return  0 on success otherwise POSIX Error code
 116 */
 117 static int amdgpu_cs_destroy_ib(amdgpu_ib_handle ib)
 118 {
 119         int r;
 120
 121         r = amdgpu_bo_cpu_unmap(ib->buf_handle);
 122         if (r)
 123                 return r;
 124
 125         r = amdgpu_bo_free(ib->buf_handle);
 126         if (r)
 127                 return r;
 128
 129         free(ib);
 130         return 0;
 131 }
 132
 133 /**
 134  * Initialize IB pools to empty.
 135  *
 136  * \param   context - \c [in]  GPU Context
 137  *
 138  * \return  0 on success otherwise POSIX Error code
 139 */
 140 static int amdgpu_cs_init_ib_pool(amdgpu_context_handle context)
 141 {
 142         int i;
 143         int r;
 144
 145         r = pthread_mutex_init(&context->pool_mutex, NULL);
 146         if (r)
 147                 return r;
 148
 149         for (i = 0; i < AMDGPU_CS_IB_SIZE_NUM; i++)
 150                 LIST_INITHEAD(&context->ib_pools[i]);
 151
 152         return 0;
 153 }
 154
 155 /**
 156  * Allocate an IB buffer from IB pools.
 157  *
 158  * \param   dev - \c [in]  Device handle
 159  * \param   context - \c [in] GPU Context
 160  * \param   ib_size - \c [in]  Size of allocation
 161  * \param   ib - \c [out] return the pointer to the allocated IB buffer
 162  *
 163  * \return  0 on success otherwise POSIX Error code
 164 */
 165 static int amdgpu_cs_alloc_from_ib_pool(amdgpu_context_handle context,
 166                                         enum amdgpu_cs_ib_size ib_size,
 167                                         amdgpu_ib_handle *ib)
 168 {
 169         int r;
 170         struct list_head *head;
 171         head = &context->ib_pools[ib_size];
 172
 173         r = -ENOMEM;
 174         pthread_mutex_lock(&context->pool_mutex);
 175         if (!LIST_IS_EMPTY(head)) {
 176                 *ib = LIST_ENTRY(struct amdgpu_ib, head->next, list_node);
 177                 LIST_DEL(&(*ib)->list_node);
 178                 r = 0;
 179         }
 180         pthread_mutex_unlock(&context->pool_mutex);
 181
 182         return r;
 183 }
 184
 185 /**
 186  * Free an IB buffer to IB pools.
 187  *
 188  * \param   context - \c [in]  GPU Context
 189  * \param   ib - \c [in] the IB buffer
 190  *
 191  * \return  N/A
 192 */
 193 static void amdgpu_cs_free_to_ib_pool(amdgpu_context_handle context,
 194                                       amdgpu_ib_handle ib)
 195 {
 196         struct list_head *head;
 197         head = &context->ib_pools[ib->ib_size];
 198         pthread_mutex_lock(&context->pool_mutex);
 199         LIST_ADD(&ib->list_node, head);
 200         pthread_mutex_unlock(&context->pool_mutex);
 201         return;
 202 }
 203
 204 /**
 205  * Destroy all IB buffers in pools
 206  *
 207  * \param   dev - \c [in]  Device handle
 208  * \param   context - \c [in]  GPU Context
 209  *
 210  * \return  0 on success otherwise POSIX Error code
 211 */
 212 static int amdgpu_cs_destroy_ib_pool(amdgpu_context_handle context)
 213 {
 214         struct list_head *head;
 215         struct amdgpu_ib *next;
 216         struct amdgpu_ib *storage;
 217         int i, r;
 218
 219         r = 0;
 220         pthread_mutex_lock(&context->pool_mutex);
 221         for (i = 0; i < AMDGPU_CS_IB_SIZE_NUM; i++) {
 222                 head = &context->ib_pools[i];
 223                 LIST_FOR_EACH_ENTRY_SAFE(next, storage, head, list_node) {
 224                         r = amdgpu_cs_destroy_ib(next);
 225                         if (r)
 226                                 break;
 227                 }
 228         }
 229         pthread_mutex_unlock(&context->pool_mutex);
 230         pthread_mutex_destroy(&context->pool_mutex);
 231         return r;
 232 }
 233
 234 /**
 235  * Initialize pending IB lists
 236  *
 237  * \param   context - \c [in]  GPU Context
 238  *
 239  * \return  0 on success otherwise POSIX Error code
 240 */
 241 static int amdgpu_cs_init_pendings(amdgpu_context_handle context)
 242 {
 243         unsigned ip, inst;
 244         uint32_t ring;
 245         int r;
 246
 247         r = pthread_mutex_init(&context->pendings_mutex, NULL);
 248         if (r)
 249                 return r;
 250
 251         for (ip = 0; ip < AMDGPU_HW_IP_NUM; ip++)
 252                 for (inst = 0; inst < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; inst++)
 253                         for (ring = 0; ring < AMDGPU_CS_MAX_RINGS; ring++)
 254                                 LIST_INITHEAD(&context->pendings[ip][inst][ring]);
 255
 256         LIST_INITHEAD(&context->freed);
 257         return 0;
 258 }
 259
 260 /**
 261  * Free pending IBs
 262  *
 263  * \param   dev - \c [in]  Device handle
 264  * \param   context - \c [in]  GPU Context
 265  *
 266  * \return  0 on success otherwise POSIX Error code
 267 */
 268 static int amdgpu_cs_destroy_pendings(amdgpu_context_handle context)
 269 {
 270         int ip, inst;
 271         uint32_t ring;
 272         int r;
 273         struct amdgpu_ib *next;
 274         struct amdgpu_ib *s;
 275         struct list_head *head;
 276
 277         r = 0;
 278         pthread_mutex_lock(&context->pendings_mutex);
 279         for (ip = 0; ip < AMDGPU_HW_IP_NUM; ip++)
 280                 for (inst = 0; inst < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; inst++)
 281                         for (ring = 0; ring < AMDGPU_CS_MAX_RINGS; ring++) {
 282                                 head = &context->pendings[ip][inst][ring];
 283                                 LIST_FOR_EACH_ENTRY_SAFE(next, s, head, list_node) {
 284                                         r = amdgpu_cs_destroy_ib(next);
 285                                         if (r)
 286                                                 break;
 287                                 }
 288                         }
 289
 290         head = &context->freed;
 291         LIST_FOR_EACH_ENTRY_SAFE(next, s, head, list_node) {
 292                 r = amdgpu_cs_destroy_ib(next);
 293                 if (r)
 294                         break;
 295         }
 296
 297         pthread_mutex_unlock(&context->pendings_mutex);
 298         pthread_mutex_destroy(&context->pendings_mutex);
 299         return r;
 300 }
 301
 302 /**
 303  * Add IB to pending IB lists without holding sequence_mutex.
 304  *
 305  * \param   context - \c [in]  GPU Context
 306  * \param   ib - \c [in]  ib to added to pending lists
 307  * \param   ip - \c [in]  hw ip block
 308  * \param   ip_instance - \c [in]  instance of the hw ip block
 309  * \param   ring - \c [in]  Ring of hw ip
 310  *
 311  * \return  N/A
 312 */
 313 static void amdgpu_cs_add_pending(amdgpu_context_handle context,
 314                                   amdgpu_ib_handle ib,
 315                                   unsigned ip, unsigned ip_instance,
 316                                   uint32_t ring)
 317 {
 318         struct list_head *head;
 319         pthread_mutex_lock(&context->pendings_mutex);
 320         head = &context->pendings[ip][ip_instance][ring];
 321         LIST_ADDTAIL(&ib->list_node, head);
 322         pthread_mutex_unlock(&context->pendings_mutex);
 323         return;
 324 }
 325
 326 /**
 327  * Garbage collector on a pending IB list without holding pendings_mutex.
 328  * This function by itself is not multithread safe.
 329  *
 330  * \param   context - \c [in]  GPU Context
 331  * \param   ip - \c [in]  hw ip block
 332  * \param   ip_instance - \c [in]  instance of the hw ip block
 333  * \param   ring - \c [in]  Ring of hw ip
 334  * \param   expired_fence - \c [in]  fence expired
 335  *
 336  * \return  N/A
 337  * \note Hold pendings_mutex before calling this function.
 338 */
 339 static void amdgpu_cs_pending_gc_not_safe(amdgpu_context_handle context,
 340                                           unsigned ip, unsigned ip_instance,
 341                                           uint32_t ring,
 342                                           uint64_t expired_fence)
 343 {
 344         struct list_head *head;
 345         struct amdgpu_ib *next;
 346         struct amdgpu_ib *s;
 347         int r;
 348
 349         head = &context->pendings[ip][ip_instance][ring];
 350         LIST_FOR_EACH_ENTRY_SAFE(next, s, head, list_node)
 351                 if (next->cs_handle <= expired_fence) {
 352                         LIST_DEL(&next->list_node);
 353                         amdgpu_cs_free_to_ib_pool(context, next);
 354                 } else {
 355                         /* The pending list is a sorted list.
 356                            There is no need to continue. */
 357                         break;
 358                 }
 359
 360         /* walk the freed list as well */
 361         head = &context->freed;
 362         LIST_FOR_EACH_ENTRY_SAFE(next, s, head, list_node) {
 363                 bool busy;
 364
 365                 r = amdgpu_bo_wait_for_idle(next->buf_handle, 0, &busy);
 366                 if (r || busy)
 367                         break;
 368
 369                 LIST_DEL(&next->list_node);
 370                 amdgpu_cs_free_to_ib_pool(context, next);
 371         }
 372
 373         return;
 374 }
 375
 376 /**
 377  * Garbage collector on a pending IB list
 378  *
 379  * \param   context - \c [in]  GPU Context
 380  * \param   ip - \c [in]  hw ip block
 381  * \param   ip_instance - \c [in]  instance of the hw ip block
 382  * \param   ring - \c [in]  Ring of hw ip
 383  * \param   expired_fence - \c [in]  fence expired
 384  *
 385  * \return  N/A
 386 */
 387 static void amdgpu_cs_pending_gc(amdgpu_context_handle context,
 388                                  unsigned ip, unsigned ip_instance,
 389                                  uint32_t ring,
 390                                  uint64_t expired_fence)
 391 {
 392         pthread_mutex_lock(&context->pendings_mutex);
 393         amdgpu_cs_pending_gc_not_safe(context, ip, ip_instance, ring,
 394                                       expired_fence);
 395         pthread_mutex_unlock(&context->pendings_mutex);
 396         return;
 397 }
 398
 399 /**
 400  * Garbage collector on all pending IB lists
 401  *
 402  * \param   context - \c [in]  GPU Context
 403  *
 404  * \return  N/A
 405 */
 406 static void amdgpu_cs_all_pending_gc(amdgpu_context_handle context)
 407 {
 408         unsigned ip, inst;
 409         uint32_t ring;
 410         uint64_t expired_fences[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS];
 411
 412         pthread_mutex_lock(&context->sequence_mutex);
 413         for (ip = 0; ip < AMDGPU_HW_IP_NUM; ip++)
 414                 for (inst = 0; inst < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; inst++)
 415                         for (ring = 0; ring < AMDGPU_CS_MAX_RINGS; ring++)
 416                                 expired_fences[ip][inst][ring] =
 417                                         context->expired_fences[ip][inst][ring];
 418         pthread_mutex_unlock(&context->sequence_mutex);
 419
 420         pthread_mutex_lock(&context->pendings_mutex);
 421         for (ip = 0; ip < AMDGPU_HW_IP_NUM; ip++)
 422                 for (inst = 0; inst < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; inst++)
 423                         for (ring = 0; ring < AMDGPU_CS_MAX_RINGS; ring++)
 424                                 amdgpu_cs_pending_gc_not_safe(context, ip, inst, ring,
 425                                         expired_fences[ip][inst][ring]);
 426         pthread_mutex_unlock(&context->pendings_mutex);
 427 }
 428
 429 /**
 430  * Allocate an IB buffer
 431  * If there is no free IB buffer in pools, create one.
 432  *
 433  * \param   dev - \c [in] Device handle
 434  * \param   context - \c [in] GPU Context
 435  * \param   ib_size - \c [in] Size of allocation
 436  * \param   ib - \c [out] return the pointer to the allocated IB buffer
 437  *
 438  * \return  0 on success otherwise POSIX Error code
 439 */
 440 static int amdgpu_cs_alloc_ib_local(amdgpu_context_handle context,
 441                                     enum amdgpu_cs_ib_size ib_size,
 442                                     amdgpu_ib_handle *ib)
 443 {
 444         int r;
 445
 446         r = amdgpu_cs_alloc_from_ib_pool(context, ib_size, ib);
 447         if (!r)
 448                 return r;
 449
 450         amdgpu_cs_all_pending_gc(context);
 451
 452         /* Retry to allocate from free IB pools after garbage collector. */
 453         r = amdgpu_cs_alloc_from_ib_pool(context, ib_size, ib);
 454         if (!r)
 455                 return r;
 456
 457         /* There is no suitable IB in free pools. Create one. */
 458         r = amdgpu_cs_create_ib(context, ib_size, ib);
 459         return r;
 460 }
 461
 462 int amdgpu_cs_alloc_ib(amdgpu_context_handle context,
 463                        enum amdgpu_cs_ib_size ib_size,
 464                        struct amdgpu_cs_ib_alloc_result *output)
 465 {
 466         int r;
 467         amdgpu_ib_handle ib;
 468
 469         if (NULL == context)
 470                 return -EINVAL;
 471         if (NULL == output)
 472                 return -EINVAL;
 473         if (ib_size >= AMDGPU_CS_IB_SIZE_NUM)
 474                 return -EINVAL;
 475
 476         r = amdgpu_cs_alloc_ib_local(context, ib_size, &ib);
 477         if (!r) {
 478                 output->handle = ib;
 479                 output->cpu = ib->cpu;
 480                 output->mc_address = ib->virtual_mc_base_address;
 481         }
 482
 483         return r;
 484 }
 485
 486 int amdgpu_cs_free_ib(amdgpu_ib_handle handle)
 487 {
 488         amdgpu_context_handle context;
 489
 490         if (NULL == handle)
 491                 return -EINVAL;
 492
 493         context = handle->context;
 494         pthread_mutex_lock(&context->pendings_mutex);
 495         LIST_ADD(&handle->list_node, &context->freed);
 496         pthread_mutex_unlock(&context->pendings_mutex);
 497         return 0;
 498 }
 499
 500 /**
 501  * Create command submission context
 502  *
 503  * \param   dev - \c [in] amdgpu device handle
 504  * \param   context - \c [out] amdgpu context handle
 505  *
 506  * \return  0 on success otherwise POSIX Error code
 507 */
 508 int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
 509                          amdgpu_context_handle *context)
 510 {
 511         struct amdgpu_context *gpu_context;
 512         union drm_amdgpu_ctx args;
 513         int r;
 514
 515         if (NULL == dev)
 516                 return -EINVAL;
 517         if (NULL == context)
 518                 return -EINVAL;
 519
 520         gpu_context = calloc(1, sizeof(struct amdgpu_context));
 521         if (NULL == gpu_context)
 522                 return -ENOMEM;
 523
 524         gpu_context->dev = dev;
 525
 526         r = pthread_mutex_init(&gpu_context->sequence_mutex, NULL);
 527         if (r)
 528                 goto error_mutex;
 529
 530         r = amdgpu_cs_init_ib_pool(gpu_context);
 531         if (r)
 532                 goto error_pool;
 533
 534         r = amdgpu_cs_init_pendings(gpu_context);
 535         if (r)
 536                 goto error_pendings;
 537
 538         r = amdgpu_cs_alloc_ib_local(gpu_context, amdgpu_cs_ib_size_4K,
 539                                      &gpu_context->fence_ib);
 540         if (r)
 541                 goto error_fence_ib;
 542
 543
 544         memset(&args, 0, sizeof(args));
 545         args.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
 546         r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args));
 547         if (r)
 548                 goto error_kernel;
 549
 550         gpu_context->id = args.out.alloc.ctx_id;
 551         *context = (amdgpu_context_handle)gpu_context;
 552
 553         return 0;
 554
 555 error_kernel:
 556         amdgpu_cs_free_ib(gpu_context->fence_ib);
 557
 558 error_fence_ib:
 559         amdgpu_cs_destroy_pendings(gpu_context);
 560
 561 error_pendings:
 562         amdgpu_cs_destroy_ib_pool(gpu_context);
 563
 564 error_pool:
 565         pthread_mutex_destroy(&gpu_context->sequence_mutex);
 566
 567 error_mutex:
 568         free(gpu_context);
 569         return r;
 570 }
 571
 572 /**
 573  * Release command submission context
 574  *
 575  * \param   dev - \c [in] amdgpu device handle
 576  * \param   context - \c [in] amdgpu context handle
 577  *
 578  * \return  0 on success otherwise POSIX Error code
 579 */
 580 int amdgpu_cs_ctx_free(amdgpu_context_handle context)
 581 {
 582         union drm_amdgpu_ctx args;
 583         int r;
 584
 585         if (NULL == context)
 586                 return -EINVAL;
 587
 588         r = amdgpu_cs_free_ib(context->fence_ib);
 589         if (r)
 590                 return r;
 591
 592         r = amdgpu_cs_destroy_pendings(context);
 593         if (r)
 594                 return r;
 595
 596         r = amdgpu_cs_destroy_ib_pool(context);
 597         if (r)
 598                 return r;
 599
 600         pthread_mutex_destroy(&context->sequence_mutex);
 601
 602         /* now deal with kernel side */
 603         memset(&args, 0, sizeof(args));
 604         args.in.op = AMDGPU_CTX_OP_FREE_CTX;
 605         args.in.ctx_id = context->id;
 606         r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
 607                                 &args, sizeof(args));
 608
 609         free(context);
 610
 611         return r;
 612 }
 613
 614 int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
 615                                 uint32_t *state, uint32_t *hangs)
 616 {
 617         union drm_amdgpu_ctx args;
 618         int r;
 619
 620         if (!context)
 621                 return -EINVAL;
 622
 623         memset(&args, 0, sizeof(args));
 624         args.in.op = AMDGPU_CTX_OP_QUERY_STATE;
 625         args.in.ctx_id = context->id;
 626         r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
 627                                 &args, sizeof(args));
 628         if (!r) {
 629                 *state = args.out.state.reset_status;
 630                 *hangs = args.out.state.hangs;
 631         }
 632         return r;
 633 }
 634
 635 static uint32_t amdgpu_cs_fence_index(unsigned ip, unsigned ring)
 636 {
 637         return ip * AMDGPU_CS_MAX_RINGS + ring;
 638 }
 639
 640 /**
 641  * Submit command to kernel DRM
 642  * \param   dev - \c [in]  Device handle
 643  * \param   context - \c [in]  GPU Context
 644  * \param   ibs_request - \c [in]  Pointer to submission requests
 645  * \param   fence - \c [out] return fence for this submission
 646  *
 647  * \return  0 on success otherwise POSIX Error code
 648  * \sa amdgpu_cs_submit()
 649 */
 650 static int amdgpu_cs_submit_one(amdgpu_context_handle context,
 651                                 struct amdgpu_cs_request *ibs_request,
 652                                 uint64_t *fence)
 653 {
 654         int r;
 655         uint32_t i, size;
 656         union drm_amdgpu_cs cs;
 657         uint64_t *chunk_array;
 658         struct drm_amdgpu_cs_chunk *chunks;
 659         struct drm_amdgpu_cs_chunk_data *chunk_data;
 660
 661         if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM)
 662                 return -EINVAL;
 663         if (ibs_request->ring >= AMDGPU_CS_MAX_RINGS)
 664                 return -EINVAL;
 665         if (ibs_request->number_of_ibs > AMDGPU_CS_MAX_IBS_PER_SUBMIT)
 666                 return -EINVAL;
 667
 668         size = (ibs_request->number_of_ibs + 1) * (
 669                 sizeof(uint64_t) +
 670                 sizeof(struct drm_amdgpu_cs_chunk) +
 671                 sizeof(struct drm_amdgpu_cs_chunk_data));
 672         chunk_array = malloc(size);
 673         if (NULL == chunk_array)
 674                 return -ENOMEM;
 675         memset(chunk_array, 0, size);
 676
 677         chunks = (struct drm_amdgpu_cs_chunk *)(chunk_array + ibs_request->number_of_ibs + 1);
 678         chunk_data = (struct drm_amdgpu_cs_chunk_data *)(chunks + ibs_request->number_of_ibs + 1);
 679
 680         memset(&cs, 0, sizeof(cs));
 681         cs.in.chunks = (uint64_t)(uintptr_t)chunk_array;
 682         cs.in.ctx_id = context->id;
 683         if (ibs_request->resources)
 684                 cs.in.bo_list_handle = ibs_request->resources->handle;
 685         cs.in.num_chunks = ibs_request->number_of_ibs;
 686         /* IB chunks */
 687         for (i = 0; i < ibs_request->number_of_ibs; i++) {
 688                 struct amdgpu_cs_ib_info *ib;
 689                 chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
 690                 chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB;
 691                 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
 692                 chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
 693
 694                 ib = &ibs_request->ibs[i];
 695
 696                 chunk_data[i].ib_data.handle = ib->ib_handle->buf_handle->handle;
 697                 chunk_data[i].ib_data.va_start = ib->ib_handle->virtual_mc_base_address;
 698                 chunk_data[i].ib_data.ib_bytes = ib->size * 4;
 699                 chunk_data[i].ib_data.ip_type = ibs_request->ip_type;
 700                 chunk_data[i].ib_data.ip_instance = ibs_request->ip_instance;
 701                 chunk_data[i].ib_data.ring = ibs_request->ring;
 702                 chunk_data[i].ib_data.flags = ib->flags;
 703         }
 704
 705         pthread_mutex_lock(&context->sequence_mutex);
 706
 707         if (ibs_request->ip_type != AMDGPU_HW_IP_UVD &&
 708             ibs_request->ip_type != AMDGPU_HW_IP_VCE) {
 709                 i = cs.in.num_chunks++;
 710
 711                 /* fence chunk */
 712                 chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
 713                 chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE;
 714                 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
 715                 chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
 716
 717                 /* fence bo handle */
 718                 chunk_data[i].fence_data.handle = context->fence_ib->buf_handle->handle;
 719                 /* offset */
 720                 chunk_data[i].fence_data.offset = amdgpu_cs_fence_index(
 721                         ibs_request->ip_type, ibs_request->ring);
 722                 chunk_data[i].fence_data.offset *= sizeof(uint64_t);
 723         }
 724
 725         r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
 726                                 &cs, sizeof(cs));
 727         if (r)
 728                 goto error_unlock;
 729
 730
 731         /* Hold sequence_mutex while adding record to the pending list.
 732            So the pending list is a sorted list according to fence value. */
 733
 734         for (i = 0; i < ibs_request->number_of_ibs; i++) {
 735                 struct amdgpu_cs_ib_info *ib;
 736
 737                 ib = &ibs_request->ibs[i];
 738                 if (ib->flags & AMDGPU_CS_REUSE_IB)
 739                         continue;
 740
 741                 ib->ib_handle->cs_handle = cs.out.handle;
 742
 743                 amdgpu_cs_add_pending(context, ib->ib_handle, ibs_request->ip_type,
 744                                       ibs_request->ip_instance,
 745                                       ibs_request->ring);
 746         }
 747
 748         *fence = cs.out.handle;
 749
 750         pthread_mutex_unlock(&context->sequence_mutex);
 751
 752         free(chunk_array);
 753         return 0;
 754
 755 error_unlock:
 756         pthread_mutex_unlock(&context->sequence_mutex);
 757         free(chunk_array);
 758         return r;
 759 }
 760
 761 int amdgpu_cs_submit(amdgpu_context_handle context,
 762                      uint64_t flags,
 763                      struct amdgpu_cs_request *ibs_request,
 764                      uint32_t number_of_requests,
 765                      uint64_t *fences)
 766 {
 767         uint32_t i;
 768         int r;
 769
 770         if (NULL == context)
 771                 return -EINVAL;
 772         if (NULL == ibs_request)
 773                 return -EINVAL;
 774         if (NULL == fences)
 775                 return -EINVAL;
 776
 777         r = 0;
 778         for (i = 0; i < number_of_requests; i++) {
 779                 r = amdgpu_cs_submit_one(context, ibs_request, fences);
 780                 if (r)
 781                         break;
 782                 fences++;
 783                 ibs_request++;
 784         }
 785
 786         return r;
 787 }
 788
 789 /**
 790  * Calculate absolute timeout.
 791  *
 792  * \param   timeout - \c [in] timeout in nanoseconds.
 793  *
 794  * \return  absolute timeout in nanoseconds
 795 */
 796 uint64_t amdgpu_cs_calculate_timeout(uint64_t timeout)
 797 {
 798         int r;
 799
 800         if (timeout != AMDGPU_TIMEOUT_INFINITE) {
 801                 struct timespec current;
 802                 r = clock_gettime(CLOCK_MONOTONIC, &current);
 803                 if (r)
 804                         return r;
 805
 806                 timeout += ((uint64_t)current.tv_sec) * 1000000000ull;
 807                 timeout += current.tv_nsec;
 808         }
 809         return timeout;
 810 }
 811
 812 static int amdgpu_ioctl_wait_cs(amdgpu_context_handle context,
 813                                 unsigned ip,
 814                                 unsigned ip_instance,
 815                                 uint32_t ring,
 816                                 uint64_t handle,
 817                                 uint64_t timeout_ns,
 818                                 bool *busy)
 819 {
 820         amdgpu_device_handle dev = context->dev;
 821         union drm_amdgpu_wait_cs args;
 822         int r;
 823
 824         memset(&args, 0, sizeof(args));
 825         args.in.handle = handle;
 826         args.in.ip_type = ip;
 827         args.in.ip_instance = ip_instance;
 828         args.in.ring = ring;
 829         args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns);
 830         args.in.ctx_id = context->id;
 831
 832         /* Handle errors manually here because of timeout */
 833         r = ioctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_CS, &args);
 834         if (r == -1 && (errno == EINTR || errno == EAGAIN)) {
 835                 *busy = true;
 836                 return 0;
 837         } else if (r)
 838                 return -errno;
 839
 840         *busy = args.out.status;
 841         return 0;
 842 }
 843
 844 int amdgpu_cs_query_fence_status(struct amdgpu_cs_query_fence *fence,
 845                                  uint32_t *expired)
 846 {
 847         amdgpu_context_handle context;
 848         uint64_t *signaled_fence;
 849         uint64_t *expired_fence;
 850         unsigned ip_type, ip_instance;
 851         uint32_t ring;
 852         bool busy = true;
 853         int r;
 854
 855         if (NULL == fence)
 856                 return -EINVAL;
 857         if (NULL == expired)
 858                 return -EINVAL;
 859         if (NULL == fence->context)
 860                 return -EINVAL;
 861         if (fence->ip_type >= AMDGPU_HW_IP_NUM)
 862                 return -EINVAL;
 863         if (fence->ring >= AMDGPU_CS_MAX_RINGS)
 864                 return -EINVAL;
 865
 866         context = fence->context;
 867         ip_type = fence->ip_type;
 868         ip_instance = fence->ip_instance;
 869         ring = fence->ring;
 870         signaled_fence = context->fence_ib->cpu;
 871         signaled_fence += amdgpu_cs_fence_index(ip_type, ring);
 872         expired_fence = &context->expired_fences[ip_type][ip_instance][ring];
 873         *expired = false;
 874
 875         pthread_mutex_lock(&context->sequence_mutex);
 876         if (fence->fence <= *expired_fence) {
 877                 /* This fence value is expired already. */
 878                 pthread_mutex_unlock(&context->sequence_mutex);
 879                 *expired = true;
 880                 return 0;
 881         }
 882
 883         if (fence->fence <= *signaled_fence) {
 884                 /* This fence value is signaled already. */
 885                 *expired_fence = *signaled_fence;
 886                 pthread_mutex_unlock(&context->sequence_mutex);
 887                 amdgpu_cs_pending_gc(context, ip_type, ip_instance, ring,
 888                                      fence->fence);
 889                 *expired = true;
 890                 return 0;
 891         }
 892
 893         pthread_mutex_unlock(&context->sequence_mutex);
 894
 895         r = amdgpu_ioctl_wait_cs(context, ip_type, ip_instance, ring,
 896                                  fence->fence, fence->timeout_ns, &busy);
 897         if (!r && !busy) {
 898                 *expired = true;
 899                 pthread_mutex_lock(&context->sequence_mutex);
 900                 /* The thread doesn't hold sequence_mutex. Other thread could
 901                    update *expired_fence already. Check whether there is a
 902                    newerly expired fence. */
 903                 if (fence->fence > *expired_fence) {
 904                         *expired_fence = fence->fence;
 905                         pthread_mutex_unlock(&context->sequence_mutex);
 906                         amdgpu_cs_pending_gc(context, ip_type, ip_instance,
 907                                              ring, fence->fence);
 908                 } else {
 909                         pthread_mutex_unlock(&context->sequence_mutex);
 910                 }
 911         }
 912
 913         return r;
 914 }
 915