amdgpu/amdgpu_cs.c

   1 /*
   2  * Copyright 2014 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  */
  23
  24 #ifdef HAVE_CONFIG_H
  25 #include "config.h"
  26 #endif
  27
  28 #include <stdlib.h>
  29 #include <stdio.h>
  30 #include <string.h>
  31 #include <errno.h>
  32 #include <pthread.h>
  33 #include <sched.h>
  34 #include <sys/ioctl.h>
  35 #ifdef HAVE_ALLOCA_H
  36 # include <alloca.h>
  37 #endif
  38
  39 #include "xf86drm.h"
  40 #include "amdgpu_drm.h"
  41 #include "amdgpu_internal.h"
  42
  43 static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem);
  44 static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem);
  45
  46 /**
  47  * Create command submission context
  48  *
  49  * \param   dev      - \c [in] Device handle. See #amdgpu_device_initialize()
  50  * \param   priority - \c [in] Context creation flags. See AMDGPU_CTX_PRIORITY_*
  51  * \param   context  - \c [out] GPU Context handle
  52  *
  53  * \return  0 on success otherwise POSIX Error code
  54 */
  55 int amdgpu_cs_ctx_create2(amdgpu_device_handle dev, uint32_t priority,
  56                                                         amdgpu_context_handle *context)
  57 {
  58         struct amdgpu_context *gpu_context;
  59         union drm_amdgpu_ctx args;
  60         int i, j, k;
  61         int r;
  62
  63         if (!dev || !context)
  64                 return -EINVAL;
  65
  66         gpu_context = calloc(1, sizeof(struct amdgpu_context));
  67         if (!gpu_context)
  68                 return -ENOMEM;
  69
  70         gpu_context->dev = dev;
  71
  72         r = pthread_mutex_init(&gpu_context->sequence_mutex, NULL);
  73         if (r)
  74                 goto error;
  75
  76         /* Create the context */
  77         memset(&args, 0, sizeof(args));
  78         args.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
  79         args.in.priority = priority;
  80
  81         r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args));
  82         if (r)
  83                 goto error;
  84
  85         gpu_context->id = args.out.alloc.ctx_id;
  86         for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
  87                 for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++)
  88                         for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++)
  89                                 list_inithead(&gpu_context->sem_list[i][j][k]);
  90         *context = (amdgpu_context_handle)gpu_context;
  91
  92         return 0;
  93
  94 error:
  95         pthread_mutex_destroy(&gpu_context->sequence_mutex);
  96         free(gpu_context);
  97         return r;
  98 }
  99
 100 int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
 101                          amdgpu_context_handle *context)
 102 {
 103         return amdgpu_cs_ctx_create2(dev, AMDGPU_CTX_PRIORITY_NORMAL, context);
 104 }
 105
 106 /**
 107  * Release command submission context
 108  *
 109  * \param   dev - \c [in] amdgpu device handle
 110  * \param   context - \c [in] amdgpu context handle
 111  *
 112  * \return  0 on success otherwise POSIX Error code
 113 */
 114 int amdgpu_cs_ctx_free(amdgpu_context_handle context)
 115 {
 116         union drm_amdgpu_ctx args;
 117         int i, j, k;
 118         int r;
 119
 120         if (!context)
 121                 return -EINVAL;
 122
 123         pthread_mutex_destroy(&context->sequence_mutex);
 124
 125         /* now deal with kernel side */
 126         memset(&args, 0, sizeof(args));
 127         args.in.op = AMDGPU_CTX_OP_FREE_CTX;
 128         args.in.ctx_id = context->id;
 129         r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
 130                                 &args, sizeof(args));
 131         for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
 132                 for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++) {
 133                         for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++) {
 134                                 amdgpu_semaphore_handle sem;
 135                                 LIST_FOR_EACH_ENTRY(sem, &context->sem_list[i][j][k], list) {
 136                                         list_del(&sem->list);
 137                                         amdgpu_cs_reset_sem(sem);
 138                                         amdgpu_cs_unreference_sem(sem);
 139                                 }
 140                         }
 141                 }
 142         }
 143         free(context);
 144
 145         return r;
 146 }
 147
 148 int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
 149                                 uint32_t *state, uint32_t *hangs)
 150 {
 151         union drm_amdgpu_ctx args;
 152         int r;
 153
 154         if (!context)
 155                 return -EINVAL;
 156
 157         memset(&args, 0, sizeof(args));
 158         args.in.op = AMDGPU_CTX_OP_QUERY_STATE;
 159         args.in.ctx_id = context->id;
 160         r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
 161                                 &args, sizeof(args));
 162         if (!r) {
 163                 *state = args.out.state.reset_status;
 164                 *hangs = args.out.state.hangs;
 165         }
 166         return r;
 167 }
 168
 169 /**
 170  * Submit command to kernel DRM
 171  * \param   dev - \c [in]  Device handle
 172  * \param   context - \c [in]  GPU Context
 173  * \param   ibs_request - \c [in]  Pointer to submission requests
 174  * \param   fence - \c [out] return fence for this submission
 175  *
 176  * \return  0 on success otherwise POSIX Error code
 177  * \sa amdgpu_cs_submit()
 178 */
 179 static int amdgpu_cs_submit_one(amdgpu_context_handle context,
 180                                 struct amdgpu_cs_request *ibs_request)
 181 {
 182         union drm_amdgpu_cs cs;
 183         uint64_t *chunk_array;
 184         struct drm_amdgpu_cs_chunk *chunks;
 185         struct drm_amdgpu_cs_chunk_data *chunk_data;
 186         struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
 187         struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL;
 188         struct list_head *sem_list;
 189         amdgpu_semaphore_handle sem, tmp;
 190         uint32_t i, size, sem_count = 0;
 191         bool user_fence;
 192         int r = 0;
 193
 194         if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM)
 195                 return -EINVAL;
 196         if (ibs_request->ring >= AMDGPU_CS_MAX_RINGS)
 197                 return -EINVAL;
 198         if (ibs_request->number_of_ibs > AMDGPU_CS_MAX_IBS_PER_SUBMIT)
 199                 return -EINVAL;
 200         if (ibs_request->number_of_ibs == 0) {
 201                 ibs_request->seq_no = AMDGPU_NULL_SUBMIT_SEQ;
 202                 return 0;
 203         }
 204         user_fence = (ibs_request->fence_info.handle != NULL);
 205
 206         size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1;
 207
 208         chunk_array = alloca(sizeof(uint64_t) * size);
 209         chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
 210
 211         size = ibs_request->number_of_ibs + (user_fence ? 1 : 0);
 212
 213         chunk_data = alloca(sizeof(struct drm_amdgpu_cs_chunk_data) * size);
 214
 215         memset(&cs, 0, sizeof(cs));
 216         cs.in.chunks = (uint64_t)(uintptr_t)chunk_array;
 217         cs.in.ctx_id = context->id;
 218         if (ibs_request->resources)
 219                 cs.in.bo_list_handle = ibs_request->resources->handle;
 220         cs.in.num_chunks = ibs_request->number_of_ibs;
 221         /* IB chunks */
 222         for (i = 0; i < ibs_request->number_of_ibs; i++) {
 223                 struct amdgpu_cs_ib_info *ib;
 224                 chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
 225                 chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB;
 226                 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
 227                 chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
 228
 229                 ib = &ibs_request->ibs[i];
 230
 231                 chunk_data[i].ib_data._pad = 0;
 232                 chunk_data[i].ib_data.va_start = ib->ib_mc_address;
 233                 chunk_data[i].ib_data.ib_bytes = ib->size * 4;
 234                 chunk_data[i].ib_data.ip_type = ibs_request->ip_type;
 235                 chunk_data[i].ib_data.ip_instance = ibs_request->ip_instance;
 236                 chunk_data[i].ib_data.ring = ibs_request->ring;
 237                 chunk_data[i].ib_data.flags = ib->flags;
 238         }
 239
 240         pthread_mutex_lock(&context->sequence_mutex);
 241
 242         if (user_fence) {
 243                 i = cs.in.num_chunks++;
 244
 245                 /* fence chunk */
 246                 chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
 247                 chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE;
 248                 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
 249                 chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
 250
 251                 /* fence bo handle */
 252                 chunk_data[i].fence_data.handle = ibs_request->fence_info.handle->handle;
 253                 /* offset */
 254                 chunk_data[i].fence_data.offset =
 255                         ibs_request->fence_info.offset * sizeof(uint64_t);
 256         }
 257
 258         if (ibs_request->number_of_dependencies) {
 259                 dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) *
 260                         ibs_request->number_of_dependencies);
 261                 if (!dependencies) {
 262                         r = -ENOMEM;
 263                         goto error_unlock;
 264                 }
 265
 266                 for (i = 0; i < ibs_request->number_of_dependencies; ++i) {
 267                         struct amdgpu_cs_fence *info = &ibs_request->dependencies[i];
 268                         struct drm_amdgpu_cs_chunk_dep *dep = &dependencies[i];
 269                         dep->ip_type = info->ip_type;
 270                         dep->ip_instance = info->ip_instance;
 271                         dep->ring = info->ring;
 272                         dep->ctx_id = info->context->id;
 273                         dep->handle = info->fence;
 274                 }
 275
 276                 i = cs.in.num_chunks++;
 277
 278                 /* dependencies chunk */
 279                 chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
 280                 chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
 281                 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4
 282                         * ibs_request->number_of_dependencies;
 283                 chunks[i].chunk_data = (uint64_t)(uintptr_t)dependencies;
 284         }
 285
 286         sem_list = &context->sem_list[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring];
 287         LIST_FOR_EACH_ENTRY(sem, sem_list, list)
 288                 sem_count++;
 289         if (sem_count) {
 290                 sem_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) * sem_count);
 291                 if (!sem_dependencies) {
 292                         r = -ENOMEM;
 293                         goto error_unlock;
 294                 }
 295                 sem_count = 0;
 296                 LIST_FOR_EACH_ENTRY_SAFE(sem, tmp, sem_list, list) {
 297                         struct amdgpu_cs_fence *info = &sem->signal_fence;
 298                         struct drm_amdgpu_cs_chunk_dep *dep = &sem_dependencies[sem_count++];
 299                         dep->ip_type = info->ip_type;
 300                         dep->ip_instance = info->ip_instance;
 301                         dep->ring = info->ring;
 302                         dep->ctx_id = info->context->id;
 303                         dep->handle = info->fence;
 304
 305                         list_del(&sem->list);
 306                         amdgpu_cs_reset_sem(sem);
 307                         amdgpu_cs_unreference_sem(sem);
 308                 }
 309                 i = cs.in.num_chunks++;
 310
 311                 /* dependencies chunk */
 312                 chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
 313                 chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
 314                 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4 * sem_count;
 315                 chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies;
 316         }
 317
 318         r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
 319                                 &cs, sizeof(cs));
 320         if (r)
 321                 goto error_unlock;
 322
 323         ibs_request->seq_no = cs.out.handle;
 324         context->last_seq[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring] = ibs_request->seq_no;
 325 error_unlock:
 326         pthread_mutex_unlock(&context->sequence_mutex);
 327         free(dependencies);
 328         free(sem_dependencies);
 329         return r;
 330 }
 331
 332 int amdgpu_cs_submit(amdgpu_context_handle context,
 333                      uint64_t flags,
 334                      struct amdgpu_cs_request *ibs_request,
 335                      uint32_t number_of_requests)
 336 {
 337         uint32_t i;
 338         int r;
 339
 340         if (!context || !ibs_request)
 341                 return -EINVAL;
 342
 343         r = 0;
 344         for (i = 0; i < number_of_requests; i++) {
 345                 r = amdgpu_cs_submit_one(context, ibs_request);
 346                 if (r)
 347                         break;
 348                 ibs_request++;
 349         }
 350
 351         return r;
 352 }
 353
 354 /**
 355  * Calculate absolute timeout.
 356  *
 357  * \param   timeout - \c [in] timeout in nanoseconds.
 358  *
 359  * \return  absolute timeout in nanoseconds
 360 */
 361 drm_private uint64_t amdgpu_cs_calculate_timeout(uint64_t timeout)
 362 {
 363         int r;
 364
 365         if (timeout != AMDGPU_TIMEOUT_INFINITE) {
 366                 struct timespec current;
 367                 uint64_t current_ns;
 368                 r = clock_gettime(CLOCK_MONOTONIC, &current);
 369                 if (r) {
 370                         fprintf(stderr, "clock_gettime() returned error (%d)!", errno);
 371                         return AMDGPU_TIMEOUT_INFINITE;
 372                 }
 373
 374                 current_ns = ((uint64_t)current.tv_sec) * 1000000000ull;
 375                 current_ns += current.tv_nsec;
 376                 timeout += current_ns;
 377                 if (timeout < current_ns)
 378                         timeout = AMDGPU_TIMEOUT_INFINITE;
 379         }
 380         return timeout;
 381 }
 382
 383 static int amdgpu_ioctl_wait_cs(amdgpu_context_handle context,
 384                                 unsigned ip,
 385                                 unsigned ip_instance,
 386                                 uint32_t ring,
 387                                 uint64_t handle,
 388                                 uint64_t timeout_ns,
 389                                 uint64_t flags,
 390                                 bool *busy)
 391 {
 392         amdgpu_device_handle dev = context->dev;
 393         union drm_amdgpu_wait_cs args;
 394         int r;
 395
 396         memset(&args, 0, sizeof(args));
 397         args.in.handle = handle;
 398         args.in.ip_type = ip;
 399         args.in.ip_instance = ip_instance;
 400         args.in.ring = ring;
 401         args.in.ctx_id = context->id;
 402
 403         if (flags & AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE)
 404                 args.in.timeout = timeout_ns;
 405         else
 406                 args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns);
 407
 408         r = drmIoctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_CS, &args);
 409         if (r)
 410                 return -errno;
 411
 412         *busy = args.out.status;
 413         return 0;
 414 }
 415
 416 int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence *fence,
 417                                  uint64_t timeout_ns,
 418                                  uint64_t flags,
 419                                  uint32_t *expired)
 420 {
 421         bool busy = true;
 422         int r;
 423
 424         if (!fence || !expired || !fence->context)
 425                 return -EINVAL;
 426         if (fence->ip_type >= AMDGPU_HW_IP_NUM)
 427                 return -EINVAL;
 428         if (fence->ring >= AMDGPU_CS_MAX_RINGS)
 429                 return -EINVAL;
 430         if (fence->fence == AMDGPU_NULL_SUBMIT_SEQ) {
 431                 *expired = true;
 432                 return 0;
 433         }
 434
 435         *expired = false;
 436
 437         r = amdgpu_ioctl_wait_cs(fence->context, fence->ip_type,
 438                                 fence->ip_instance, fence->ring,
 439                                 fence->fence, timeout_ns, flags, &busy);
 440
 441         if (!r && !busy)
 442                 *expired = true;
 443
 444         return r;
 445 }
 446
 447 static int amdgpu_ioctl_wait_fences(struct amdgpu_cs_fence *fences,
 448                                     uint32_t fence_count,
 449                                     bool wait_all,
 450                                     uint64_t timeout_ns,
 451                                     uint32_t *status,
 452                                     uint32_t *first)
 453 {
 454         struct drm_amdgpu_fence *drm_fences;
 455         amdgpu_device_handle dev = fences[0].context->dev;
 456         union drm_amdgpu_wait_fences args;
 457         int r;
 458         uint32_t i;
 459
 460         drm_fences = alloca(sizeof(struct drm_amdgpu_fence) * fence_count);
 461         for (i = 0; i < fence_count; i++) {
 462                 drm_fences[i].ctx_id = fences[i].context->id;
 463                 drm_fences[i].ip_type = fences[i].ip_type;
 464                 drm_fences[i].ip_instance = fences[i].ip_instance;
 465                 drm_fences[i].ring = fences[i].ring;
 466                 drm_fences[i].seq_no = fences[i].fence;
 467         }
 468
 469         memset(&args, 0, sizeof(args));
 470         args.in.fences = (uint64_t)(uintptr_t)drm_fences;
 471         args.in.fence_count = fence_count;
 472         args.in.wait_all = wait_all;
 473         args.in.timeout_ns = amdgpu_cs_calculate_timeout(timeout_ns);
 474
 475         r = drmIoctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_FENCES, &args);
 476         if (r)
 477                 return -errno;
 478
 479         *status = args.out.status;
 480
 481         if (first)
 482                 *first = args.out.first_signaled;
 483
 484         return 0;
 485 }
 486
 487 int amdgpu_cs_wait_fences(struct amdgpu_cs_fence *fences,
 488                           uint32_t fence_count,
 489                           bool wait_all,
 490                           uint64_t timeout_ns,
 491                           uint32_t *status,
 492                           uint32_t *first)
 493 {
 494         uint32_t i;
 495
 496         /* Sanity check */
 497         if (!fences || !status || !fence_count)
 498                 return -EINVAL;
 499
 500         for (i = 0; i < fence_count; i++) {
 501                 if (NULL == fences[i].context)
 502                         return -EINVAL;
 503                 if (fences[i].ip_type >= AMDGPU_HW_IP_NUM)
 504                         return -EINVAL;
 505                 if (fences[i].ring >= AMDGPU_CS_MAX_RINGS)
 506                         return -EINVAL;
 507         }
 508
 509         *status = 0;
 510
 511         return amdgpu_ioctl_wait_fences(fences, fence_count, wait_all,
 512                                         timeout_ns, status, first);
 513 }
 514
 515 int amdgpu_cs_create_semaphore(amdgpu_semaphore_handle *sem)
 516 {
 517         struct amdgpu_semaphore *gpu_semaphore;
 518
 519         if (!sem)
 520                 return -EINVAL;
 521
 522         gpu_semaphore = calloc(1, sizeof(struct amdgpu_semaphore));
 523         if (!gpu_semaphore)
 524                 return -ENOMEM;
 525
 526         atomic_set(&gpu_semaphore->refcount, 1);
 527         *sem = gpu_semaphore;
 528
 529         return 0;
 530 }
 531
 532 int amdgpu_cs_signal_semaphore(amdgpu_context_handle ctx,
 533                                uint32_t ip_type,
 534                                uint32_t ip_instance,
 535                                uint32_t ring,
 536                                amdgpu_semaphore_handle sem)
 537 {
 538         if (!ctx || !sem)
 539                 return -EINVAL;
 540         if (ip_type >= AMDGPU_HW_IP_NUM)
 541                 return -EINVAL;
 542         if (ring >= AMDGPU_CS_MAX_RINGS)
 543                 return -EINVAL;
 544         /* sem has been signaled */
 545         if (sem->signal_fence.context)
 546                 return -EINVAL;
 547         pthread_mutex_lock(&ctx->sequence_mutex);
 548         sem->signal_fence.context = ctx;
 549         sem->signal_fence.ip_type = ip_type;
 550         sem->signal_fence.ip_instance = ip_instance;
 551         sem->signal_fence.ring = ring;
 552         sem->signal_fence.fence = ctx->last_seq[ip_type][ip_instance][ring];
 553         update_references(NULL, &sem->refcount);
 554         pthread_mutex_unlock(&ctx->sequence_mutex);
 555         return 0;
 556 }
 557
 558 int amdgpu_cs_wait_semaphore(amdgpu_context_handle ctx,
 559                              uint32_t ip_type,
 560                              uint32_t ip_instance,
 561                              uint32_t ring,
 562                              amdgpu_semaphore_handle sem)
 563 {
 564         if (!ctx || !sem)
 565                 return -EINVAL;
 566         if (ip_type >= AMDGPU_HW_IP_NUM)
 567                 return -EINVAL;
 568         if (ring >= AMDGPU_CS_MAX_RINGS)
 569                 return -EINVAL;
 570         /* must signal first */
 571         if (!sem->signal_fence.context)
 572                 return -EINVAL;
 573
 574         pthread_mutex_lock(&ctx->sequence_mutex);
 575         list_add(&sem->list, &ctx->sem_list[ip_type][ip_instance][ring]);
 576         pthread_mutex_unlock(&ctx->sequence_mutex);
 577         return 0;
 578 }
 579
 580 static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem)
 581 {
 582         if (!sem || !sem->signal_fence.context)
 583                 return -EINVAL;
 584
 585         sem->signal_fence.context = NULL;;
 586         sem->signal_fence.ip_type = 0;
 587         sem->signal_fence.ip_instance = 0;
 588         sem->signal_fence.ring = 0;
 589         sem->signal_fence.fence = 0;
 590
 591         return 0;
 592 }
 593
 594 static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem)
 595 {
 596         if (!sem)
 597                 return -EINVAL;
 598
 599         if (update_references(&sem->refcount, NULL))
 600                 free(sem);
 601         return 0;
 602 }
 603
 604 int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem)
 605 {
 606         return amdgpu_cs_unreference_sem(sem);
 607 }
 608
 609 int amdgpu_cs_create_syncobj2(amdgpu_device_handle dev,
 610                               uint32_t  flags,
 611                               uint32_t *handle)
 612 {
 613         if (NULL == dev)
 614                 return -EINVAL;
 615
 616         return drmSyncobjCreate(dev->fd, flags, handle);
 617 }
 618
 619 int amdgpu_cs_create_syncobj(amdgpu_device_handle dev,
 620                              uint32_t *handle)
 621 {
 622         if (NULL == dev)
 623                 return -EINVAL;
 624
 625         return drmSyncobjCreate(dev->fd, 0, handle);
 626 }
 627
 628 int amdgpu_cs_destroy_syncobj(amdgpu_device_handle dev,
 629                               uint32_t handle)
 630 {
 631         if (NULL == dev)
 632                 return -EINVAL;
 633
 634         return drmSyncobjDestroy(dev->fd, handle);
 635 }
 636
 637 int amdgpu_cs_syncobj_wait(amdgpu_device_handle dev,
 638                            uint32_t *handles, unsigned num_handles,
 639                            int64_t timeout_nsec, unsigned flags,
 640                            uint32_t *first_signaled)
 641 {
 642         if (NULL == dev)
 643                 return -EINVAL;
 644
 645         return drmSyncobjWait(dev->fd, handles, num_handles, timeout_nsec,
 646                               flags, first_signaled);
 647 }
 648
 649 int amdgpu_cs_export_syncobj(amdgpu_device_handle dev,
 650                              uint32_t handle,
 651                              int *shared_fd)
 652 {
 653         if (NULL == dev)
 654                 return -EINVAL;
 655
 656         return drmSyncobjHandleToFD(dev->fd, handle, shared_fd);
 657 }
 658
 659 int amdgpu_cs_import_syncobj(amdgpu_device_handle dev,
 660                              int shared_fd,
 661                              uint32_t *handle)
 662 {
 663         if (NULL == dev)
 664                 return -EINVAL;
 665
 666         return drmSyncobjFDToHandle(dev->fd, shared_fd, handle);
 667 }
 668
 669 int amdgpu_cs_syncobj_export_sync_file(amdgpu_device_handle dev,
 670                                        uint32_t syncobj,
 671                                        int *sync_file_fd)
 672 {
 673         if (NULL == dev)
 674                 return -EINVAL;
 675
 676         return drmSyncobjExportSyncFile(dev->fd, syncobj, sync_file_fd);
 677 }
 678
 679 int amdgpu_cs_syncobj_import_sync_file(amdgpu_device_handle dev,
 680                                        uint32_t syncobj,
 681                                        int sync_file_fd)
 682 {
 683         if (NULL == dev)
 684                 return -EINVAL;
 685
 686         return drmSyncobjImportSyncFile(dev->fd, syncobj, sync_file_fd);
 687 }
 688
 689 int amdgpu_cs_submit_raw(amdgpu_device_handle dev,
 690                          amdgpu_context_handle context,
 691                          amdgpu_bo_list_handle bo_list_handle,
 692                          int num_chunks,
 693                          struct drm_amdgpu_cs_chunk *chunks,
 694                          uint64_t *seq_no)
 695 {
 696         union drm_amdgpu_cs cs = {0};
 697         uint64_t *chunk_array;
 698         int i, r;
 699         if (num_chunks == 0)
 700                 return -EINVAL;
 701
 702         chunk_array = alloca(sizeof(uint64_t) * num_chunks);
 703         for (i = 0; i < num_chunks; i++)
 704                 chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
 705         cs.in.chunks = (uint64_t)(uintptr_t)chunk_array;
 706         cs.in.ctx_id = context->id;
 707         cs.in.bo_list_handle = bo_list_handle ? bo_list_handle->handle : 0;
 708         cs.in.num_chunks = num_chunks;
 709         r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CS,
 710                                 &cs, sizeof(cs));
 711         if (r)
 712                 return r;
 713
 714         if (seq_no)
 715                 *seq_no = cs.out.handle;
 716         return 0;
 717 }
 718
 719 void amdgpu_cs_chunk_fence_info_to_data(struct amdgpu_cs_fence_info *fence_info,
 720                                         struct drm_amdgpu_cs_chunk_data *data)
 721 {
 722         data->fence_data.handle = fence_info->handle->handle;
 723         data->fence_data.offset = fence_info->offset * sizeof(uint64_t);
 724 }
 725
 726 void amdgpu_cs_chunk_fence_to_dep(struct amdgpu_cs_fence *fence,
 727                                   struct drm_amdgpu_cs_chunk_dep *dep)
 728 {
 729         dep->ip_type = fence->ip_type;
 730         dep->ip_instance = fence->ip_instance;
 731         dep->ring = fence->ring;
 732         dep->ctx_id = fence->context->id;
 733         dep->handle = fence->fence;
 734 }
 735
 736 int amdgpu_cs_fence_to_handle(amdgpu_device_handle dev,
 737                               struct amdgpu_cs_fence *fence,
 738                               uint32_t what,
 739                               uint32_t *out_handle)
 740 {
 741         union drm_amdgpu_fence_to_handle fth = {0};
 742         int r;
 743
 744         fth.in.fence.ctx_id = fence->context->id;
 745         fth.in.fence.ip_type = fence->ip_type;
 746         fth.in.fence.ip_instance = fence->ip_instance;
 747         fth.in.fence.ring = fence->ring;
 748         fth.in.fence.seq_no = fence->fence;
 749         fth.in.what = what;
 750
 751         r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_FENCE_TO_HANDLE,
 752                                 &fth, sizeof(fth));
 753         if (r == 0)
 754                 *out_handle = fth.out.handle;
 755         return r;
 756 }