amdgpu/amdgpu_cs.c

   1 /*
   2  * Copyright 2014 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  */
  23
  24 #ifdef HAVE_CONFIG_H
  25 #include "config.h"
  26 #endif
  27
  28 #include <stdlib.h>
  29 #include <stdio.h>
  30 #include <string.h>
  31 #include <errno.h>
  32 #include <pthread.h>
  33 #include <sched.h>
  34 #include <sys/ioctl.h>
  35
  36 #include "xf86drm.h"
  37 #include "amdgpu_drm.h"
  38 #include "amdgpu_internal.h"
  39
  40 /**
  41  * Create command submission context
  42  *
  43  * \param   dev - \c [in] amdgpu device handle
  44  * \param   context - \c [out] amdgpu context handle
  45  *
  46  * \return  0 on success otherwise POSIX Error code
  47 */
  48 int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
  49                          amdgpu_context_handle *context)
  50 {
  51         struct amdgpu_context *gpu_context;
  52         union drm_amdgpu_ctx args;
  53         int r;
  54
  55         if (NULL == dev)
  56                 return -EINVAL;
  57         if (NULL == context)
  58                 return -EINVAL;
  59
  60         gpu_context = calloc(1, sizeof(struct amdgpu_context));
  61         if (NULL == gpu_context)
  62                 return -ENOMEM;
  63
  64         gpu_context->dev = dev;
  65
  66         r = pthread_mutex_init(&gpu_context->sequence_mutex, NULL);
  67         if (r)
  68                 goto error;
  69
  70         /* Create the context */
  71         memset(&args, 0, sizeof(args));
  72         args.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
  73         r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args));
  74         if (r)
  75                 goto error;
  76
  77         gpu_context->id = args.out.alloc.ctx_id;
  78         *context = (amdgpu_context_handle)gpu_context;
  79
  80         return 0;
  81
  82 error:
  83         pthread_mutex_destroy(&gpu_context->sequence_mutex);
  84         free(gpu_context);
  85         return r;
  86 }
  87
  88 /**
  89  * Release command submission context
  90  *
  91  * \param   dev - \c [in] amdgpu device handle
  92  * \param   context - \c [in] amdgpu context handle
  93  *
  94  * \return  0 on success otherwise POSIX Error code
  95 */
  96 int amdgpu_cs_ctx_free(amdgpu_context_handle context)
  97 {
  98         union drm_amdgpu_ctx args;
  99         int r;
 100
 101         if (NULL == context)
 102                 return -EINVAL;
 103
 104         pthread_mutex_destroy(&context->sequence_mutex);
 105
 106         /* now deal with kernel side */
 107         memset(&args, 0, sizeof(args));
 108         args.in.op = AMDGPU_CTX_OP_FREE_CTX;
 109         args.in.ctx_id = context->id;
 110         r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
 111                                 &args, sizeof(args));
 112
 113         free(context);
 114
 115         return r;
 116 }
 117
 118 int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
 119                                 uint32_t *state, uint32_t *hangs)
 120 {
 121         union drm_amdgpu_ctx args;
 122         int r;
 123
 124         if (!context)
 125                 return -EINVAL;
 126
 127         memset(&args, 0, sizeof(args));
 128         args.in.op = AMDGPU_CTX_OP_QUERY_STATE;
 129         args.in.ctx_id = context->id;
 130         r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
 131                                 &args, sizeof(args));
 132         if (!r) {
 133                 *state = args.out.state.reset_status;
 134                 *hangs = args.out.state.hangs;
 135         }
 136         return r;
 137 }
 138
 139 /**
 140  * Submit command to kernel DRM
 141  * \param   dev - \c [in]  Device handle
 142  * \param   context - \c [in]  GPU Context
 143  * \param   ibs_request - \c [in]  Pointer to submission requests
 144  * \param   fence - \c [out] return fence for this submission
 145  *
 146  * \return  0 on success otherwise POSIX Error code
 147  * \sa amdgpu_cs_submit()
 148 */
 149 static int amdgpu_cs_submit_one(amdgpu_context_handle context,
 150                                 struct amdgpu_cs_request *ibs_request)
 151 {
 152         union drm_amdgpu_cs cs;
 153         uint64_t *chunk_array;
 154         struct drm_amdgpu_cs_chunk *chunks;
 155         struct drm_amdgpu_cs_chunk_data *chunk_data;
 156         struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
 157         uint32_t i, size;
 158         bool user_fence;
 159         int r = 0;
 160
 161         if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM)
 162                 return -EINVAL;
 163         if (ibs_request->ring >= AMDGPU_CS_MAX_RINGS)
 164                 return -EINVAL;
 165         if (ibs_request->number_of_ibs > AMDGPU_CS_MAX_IBS_PER_SUBMIT)
 166                 return -EINVAL;
 167         user_fence = (ibs_request->fence_info.handle != NULL);
 168
 169         size = ibs_request->number_of_ibs + (user_fence ? 2 : 1);
 170
 171         chunk_array = alloca(sizeof(uint64_t) * size);
 172         chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
 173
 174         size = ibs_request->number_of_ibs + (user_fence ? 1 : 0);
 175
 176         chunk_data = alloca(sizeof(struct drm_amdgpu_cs_chunk_data) * size);
 177
 178         memset(&cs, 0, sizeof(cs));
 179         cs.in.chunks = (uint64_t)(uintptr_t)chunk_array;
 180         cs.in.ctx_id = context->id;
 181         if (ibs_request->resources)
 182                 cs.in.bo_list_handle = ibs_request->resources->handle;
 183         cs.in.num_chunks = ibs_request->number_of_ibs;
 184         /* IB chunks */
 185         for (i = 0; i < ibs_request->number_of_ibs; i++) {
 186                 struct amdgpu_cs_ib_info *ib;
 187                 chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
 188                 chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB;
 189                 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
 190                 chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
 191
 192                 ib = &ibs_request->ibs[i];
 193
 194                 chunk_data[i].ib_data._pad = 0;
 195                 chunk_data[i].ib_data.va_start = ib->ib_mc_address;
 196                 chunk_data[i].ib_data.ib_bytes = ib->size * 4;
 197                 chunk_data[i].ib_data.ip_type = ibs_request->ip_type;
 198                 chunk_data[i].ib_data.ip_instance = ibs_request->ip_instance;
 199                 chunk_data[i].ib_data.ring = ibs_request->ring;
 200                 chunk_data[i].ib_data.flags = ib->flags;
 201         }
 202
 203         pthread_mutex_lock(&context->sequence_mutex);
 204
 205         if (user_fence) {
 206                 i = cs.in.num_chunks++;
 207
 208                 /* fence chunk */
 209                 chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
 210                 chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE;
 211                 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
 212                 chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
 213
 214                 /* fence bo handle */
 215                 chunk_data[i].fence_data.handle = ibs_request->fence_info.handle->handle;
 216                 /* offset */
 217                 chunk_data[i].fence_data.offset =
 218                         ibs_request->fence_info.offset * sizeof(uint64_t);
 219         }
 220
 221         if (ibs_request->number_of_dependencies) {
 222                 dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) *
 223                         ibs_request->number_of_dependencies);
 224                 if (!dependencies) {
 225                         r = -ENOMEM;
 226                         goto error_unlock;
 227                 }
 228
 229                 for (i = 0; i < ibs_request->number_of_dependencies; ++i) {
 230                         struct amdgpu_cs_fence *info = &ibs_request->dependencies[i];
 231                         struct drm_amdgpu_cs_chunk_dep *dep = &dependencies[i];
 232                         dep->ip_type = info->ip_type;
 233                         dep->ip_instance = info->ip_instance;
 234                         dep->ring = info->ring;
 235                         dep->ctx_id = info->context->id;
 236                         dep->handle = info->fence;
 237                 }
 238
 239                 i = cs.in.num_chunks++;
 240
 241                 /* dependencies chunk */
 242                 chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
 243                 chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
 244                 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4
 245                         * ibs_request->number_of_dependencies;
 246                 chunks[i].chunk_data = (uint64_t)(uintptr_t)dependencies;
 247         }
 248
 249         r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
 250                                 &cs, sizeof(cs));
 251         if (r)
 252                 goto error_unlock;
 253
 254         ibs_request->seq_no = cs.out.handle;
 255
 256 error_unlock:
 257         pthread_mutex_unlock(&context->sequence_mutex);
 258         free(dependencies);
 259         return r;
 260 }
 261
 262 int amdgpu_cs_submit(amdgpu_context_handle context,
 263                      uint64_t flags,
 264                      struct amdgpu_cs_request *ibs_request,
 265                      uint32_t number_of_requests)
 266 {
 267         uint32_t i;
 268         int r;
 269
 270         if (NULL == context)
 271                 return -EINVAL;
 272         if (NULL == ibs_request)
 273                 return -EINVAL;
 274
 275         r = 0;
 276         for (i = 0; i < number_of_requests; i++) {
 277                 r = amdgpu_cs_submit_one(context, ibs_request);
 278                 if (r)
 279                         break;
 280                 ibs_request++;
 281         }
 282
 283         return r;
 284 }
 285
 286 /**
 287  * Calculate absolute timeout.
 288  *
 289  * \param   timeout - \c [in] timeout in nanoseconds.
 290  *
 291  * \return  absolute timeout in nanoseconds
 292 */
 293 drm_private uint64_t amdgpu_cs_calculate_timeout(uint64_t timeout)
 294 {
 295         int r;
 296
 297         if (timeout != AMDGPU_TIMEOUT_INFINITE) {
 298                 struct timespec current;
 299                 r = clock_gettime(CLOCK_MONOTONIC, &current);
 300                 if (r)
 301                         return r;
 302
 303                 timeout += ((uint64_t)current.tv_sec) * 1000000000ull;
 304                 timeout += current.tv_nsec;
 305         }
 306         return timeout;
 307 }
 308
 309 static int amdgpu_ioctl_wait_cs(amdgpu_context_handle context,
 310                                 unsigned ip,
 311                                 unsigned ip_instance,
 312                                 uint32_t ring,
 313                                 uint64_t handle,
 314                                 uint64_t timeout_ns,
 315                                 uint64_t flags,
 316                                 bool *busy)
 317 {
 318         amdgpu_device_handle dev = context->dev;
 319         union drm_amdgpu_wait_cs args;
 320         int r;
 321
 322         memset(&args, 0, sizeof(args));
 323         args.in.handle = handle;
 324         args.in.ip_type = ip;
 325         args.in.ip_instance = ip_instance;
 326         args.in.ring = ring;
 327         args.in.ctx_id = context->id;
 328
 329         if (flags & AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE)
 330                 args.in.timeout = timeout_ns;
 331         else
 332                 args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns);
 333
 334         r = drmIoctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_CS, &args);
 335         if (r)
 336                 return -errno;
 337
 338         *busy = args.out.status;
 339         return 0;
 340 }
 341
 342 int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence *fence,
 343                                  uint64_t timeout_ns,
 344                                  uint64_t flags,
 345                                  uint32_t *expired)
 346 {
 347         bool busy = true;
 348         int r;
 349
 350         if (NULL == fence)
 351                 return -EINVAL;
 352         if (NULL == expired)
 353                 return -EINVAL;
 354         if (NULL == fence->context)
 355                 return -EINVAL;
 356         if (fence->ip_type >= AMDGPU_HW_IP_NUM)
 357                 return -EINVAL;
 358         if (fence->ring >= AMDGPU_CS_MAX_RINGS)
 359                 return -EINVAL;
 360
 361         *expired = false;
 362
 363         r = amdgpu_ioctl_wait_cs(fence->context, fence->ip_type,
 364                                 fence->ip_instance, fence->ring,
 365                                 fence->fence, timeout_ns, flags, &busy);
 366
 367         if (!r && !busy)
 368                 *expired = true;
 369
 370         return r;
 371 }
 372