drivers/gpu/drm/nouveau/nouveau_exec.c

   1 // SPDX-License-Identifier: MIT
   2
   3 #include <drm/drm_exec.h>
   4
   5 #include "nouveau_drv.h"
   6 #include "nouveau_gem.h"
   7 #include "nouveau_mem.h"
   8 #include "nouveau_dma.h"
   9 #include "nouveau_exec.h"
  10 #include "nouveau_abi16.h"
  11 #include "nouveau_chan.h"
  12 #include "nouveau_sched.h"
  13 #include "nouveau_uvmm.h"
  14
  15 /**
  16  * DOC: Overview
  17  *
  18  * Nouveau's VM_BIND / EXEC UAPI consists of three ioctls: DRM_NOUVEAU_VM_INIT,
  19  * DRM_NOUVEAU_VM_BIND and DRM_NOUVEAU_EXEC.
  20  *
  21  * In order to use the UAPI firstly a user client must initialize the VA space
  22  * using the DRM_NOUVEAU_VM_INIT ioctl specifying which region of the VA space
  23  * should be managed by the kernel and which by the UMD.
  24  *
  25  * The DRM_NOUVEAU_VM_BIND ioctl provides clients an interface to manage the
  26  * userspace-managable portion of the VA space. It provides operations to map
  27  * and unmap memory. Mappings may be flagged as sparse. Sparse mappings are not
  28  * backed by a GEM object and the kernel will ignore GEM handles provided
  29  * alongside a sparse mapping.
  30  *
  31  * Userspace may request memory backed mappings either within or outside of the
  32  * bounds (but not crossing those bounds) of a previously mapped sparse
  33  * mapping. Subsequently requested memory backed mappings within a sparse
  34  * mapping will take precedence over the corresponding range of the sparse
  35  * mapping. If such memory backed mappings are unmapped the kernel will make
  36  * sure that the corresponding sparse mapping will take their place again.
  37  * Requests to unmap a sparse mapping that still contains memory backed mappings
  38  * will result in those memory backed mappings being unmapped first.
  39  *
  40  * Unmap requests are not bound to the range of existing mappings and can even
  41  * overlap the bounds of sparse mappings. For such a request the kernel will
  42  * make sure to unmap all memory backed mappings within the given range,
  43  * splitting up memory backed mappings which are only partially contained
  44  * within the given range. Unmap requests with the sparse flag set must match
  45  * the range of a previously mapped sparse mapping exactly though.
  46  *
  47  * While the kernel generally permits arbitrary sequences and ranges of memory
  48  * backed mappings being mapped and unmapped, either within a single or multiple
  49  * VM_BIND ioctl calls, there are some restrictions for sparse mappings.
  50  *
  51  * The kernel does not permit to:
  52  *   - unmap non-existent sparse mappings
  53  *   - unmap a sparse mapping and map a new sparse mapping overlapping the range
  54  *     of the previously unmapped sparse mapping within the same VM_BIND ioctl
  55  *   - unmap a sparse mapping and map new memory backed mappings overlapping the
  56  *     range of the previously unmapped sparse mapping within the same VM_BIND
  57  *     ioctl
  58  *
  59  * When using the VM_BIND ioctl to request the kernel to map memory to a given
  60  * virtual address in the GPU's VA space there is no guarantee that the actual
  61  * mappings are created in the GPU's MMU. If the given memory is swapped out
  62  * at the time the bind operation is executed the kernel will stash the mapping
  63  * details into it's internal alloctor and create the actual MMU mappings once
  64  * the memory is swapped back in. While this is transparent for userspace, it is
  65  * guaranteed that all the backing memory is swapped back in and all the memory
  66  * mappings, as requested by userspace previously, are actually mapped once the
  67  * DRM_NOUVEAU_EXEC ioctl is called to submit an exec job.
  68  *
  69  * A VM_BIND job can be executed either synchronously or asynchronously. If
  70  * exectued asynchronously, userspace may provide a list of syncobjs this job
  71  * will wait for and/or a list of syncobj the kernel will signal once the
  72  * VM_BIND job finished execution. If executed synchronously the ioctl will
  73  * block until the bind job is finished. For synchronous jobs the kernel will
  74  * not permit any syncobjs submitted to the kernel.
  75  *
  76  * To execute a push buffer the UAPI provides the DRM_NOUVEAU_EXEC ioctl. EXEC
  77  * jobs are always executed asynchronously, and, equal to VM_BIND jobs, provide
  78  * the option to synchronize them with syncobjs.
  79  *
  80  * Besides that, EXEC jobs can be scheduled for a specified channel to execute on.
  81  *
  82  * Since VM_BIND jobs update the GPU's VA space on job submit, EXEC jobs do have
  83  * an up to date view of the VA space. However, the actual mappings might still
  84  * be pending. Hence, EXEC jobs require to have the particular fences - of
  85  * the corresponding VM_BIND jobs they depent on - attached to them.
  86  */
  87
  88 static int
  89 nouveau_exec_job_submit(struct nouveau_job *job)
  90 {
  91         struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
  92         struct nouveau_cli *cli = job->cli;
  93         struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
  94         struct drm_exec *exec = &job->exec;
  95         struct drm_gem_object *obj;
  96         unsigned long index;
  97         int ret;
  98
  99         /* Create a new fence, but do not emit yet. */
 100         ret = nouveau_fence_create(&exec_job->fence, exec_job->chan);
 101         if (ret)
 102                 return ret;
 103
 104         nouveau_uvmm_lock(uvmm);
 105         drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
 106                             DRM_EXEC_IGNORE_DUPLICATES);
 107         drm_exec_until_all_locked(exec) {
 108                 struct drm_gpuva *va;
 109
 110                 drm_gpuva_for_each_va(va, &uvmm->umgr) {
 111                         if (unlikely(va == &uvmm->umgr.kernel_alloc_node))
 112                                 continue;
 113
 114                         ret = drm_exec_prepare_obj(exec, va->gem.obj, 1);
 115                         drm_exec_retry_on_contention(exec);
 116                         if (ret)
 117                                 goto err_uvmm_unlock;
 118                 }
 119         }
 120         nouveau_uvmm_unlock(uvmm);
 121
 122         drm_exec_for_each_locked_object(exec, index, obj) {
 123                 struct nouveau_bo *nvbo = nouveau_gem_object(obj);
 124
 125                 ret = nouveau_bo_validate(nvbo, true, false);
 126                 if (ret)
 127                         goto err_exec_fini;
 128         }
 129
 130         return 0;
 131
 132 err_uvmm_unlock:
 133         nouveau_uvmm_unlock(uvmm);
 134 err_exec_fini:
 135         drm_exec_fini(exec);
 136         return ret;
 137
 138 }
 139
 140 static void
 141 nouveau_exec_job_armed_submit(struct nouveau_job *job)
 142 {
 143         struct drm_exec *exec = &job->exec;
 144         struct drm_gem_object *obj;
 145         unsigned long index;
 146
 147         drm_exec_for_each_locked_object(exec, index, obj)
 148                 dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage);
 149
 150         drm_exec_fini(exec);
 151 }
 152
 153 static struct dma_fence *
 154 nouveau_exec_job_run(struct nouveau_job *job)
 155 {
 156         struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
 157         struct nouveau_channel *chan = exec_job->chan;
 158         struct nouveau_fence *fence = exec_job->fence;
 159         int i, ret;
 160
 161         ret = nouveau_dma_wait(chan, exec_job->push.count + 1, 16);
 162         if (ret) {
 163                 NV_PRINTK(err, job->cli, "nv50cal_space: %d\n", ret);
 164                 return ERR_PTR(ret);
 165         }
 166
 167         for (i = 0; i < exec_job->push.count; i++) {
 168                 struct drm_nouveau_exec_push *p = &exec_job->push.s[i];
 169                 bool no_prefetch = p->flags & DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH;
 170
 171                 nv50_dma_push(chan, p->va, p->va_len, no_prefetch);
 172         }
 173
 174         ret = nouveau_fence_emit(fence);
 175         if (ret) {
 176                 nouveau_fence_unref(&exec_job->fence);
 177                 NV_PRINTK(err, job->cli, "error fencing pushbuf: %d\n", ret);
 178                 WIND_RING(chan);
 179                 return ERR_PTR(ret);
 180         }
 181
 182         /* The fence was emitted successfully, set the job's fence pointer to
 183          * NULL in order to avoid freeing it up when the job is cleaned up.
 184          */
 185         exec_job->fence = NULL;
 186
 187         return &fence->base;
 188 }
 189
 190 static void
 191 nouveau_exec_job_free(struct nouveau_job *job)
 192 {
 193         struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
 194
 195         nouveau_job_free(job);
 196
 197         kfree(exec_job->fence);
 198         kfree(exec_job->push.s);
 199         kfree(exec_job);
 200 }
 201
 202 static enum drm_gpu_sched_stat
 203 nouveau_exec_job_timeout(struct nouveau_job *job)
 204 {
 205         struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
 206         struct nouveau_channel *chan = exec_job->chan;
 207
 208         if (unlikely(!atomic_read(&chan->killed)))
 209                 nouveau_channel_kill(chan);
 210
 211         NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n",
 212                   chan->chid);
 213
 214         nouveau_sched_entity_fini(job->entity);
 215
 216         return DRM_GPU_SCHED_STAT_NOMINAL;
 217 }
 218
 219 static struct nouveau_job_ops nouveau_exec_job_ops = {
 220         .submit = nouveau_exec_job_submit,
 221         .armed_submit = nouveau_exec_job_armed_submit,
 222         .run = nouveau_exec_job_run,
 223         .free = nouveau_exec_job_free,
 224         .timeout = nouveau_exec_job_timeout,
 225 };
 226
 227 int
 228 nouveau_exec_job_init(struct nouveau_exec_job **pjob,
 229                       struct nouveau_exec_job_args *__args)
 230 {
 231         struct nouveau_exec_job *job;
 232         struct nouveau_job_args args = {};
 233         int i, ret;
 234
 235         for (i = 0; i < __args->push.count; i++) {
 236                 struct drm_nouveau_exec_push *p = &__args->push.s[i];
 237
 238                 if (unlikely(p->va_len > NV50_DMA_PUSH_MAX_LENGTH)) {
 239                         NV_PRINTK(err, nouveau_cli(__args->file_priv),
 240                                   "pushbuf size exceeds limit: 0x%x max 0x%x\n",
 241                                   p->va_len, NV50_DMA_PUSH_MAX_LENGTH);
 242                         return -EINVAL;
 243                 }
 244         }
 245
 246         job = *pjob = kzalloc(sizeof(*job), GFP_KERNEL);
 247         if (!job)
 248                 return -ENOMEM;
 249
 250         job->push.count = __args->push.count;
 251         if (__args->push.count) {
 252                 job->push.s = kmemdup(__args->push.s,
 253                                       sizeof(*__args->push.s) *
 254                                       __args->push.count,
 255                                       GFP_KERNEL);
 256                 if (!job->push.s) {
 257                         ret = -ENOMEM;
 258                         goto err_free_job;
 259                 }
 260         }
 261
 262         job->chan = __args->chan;
 263
 264         args.sched_entity = __args->sched_entity;
 265         args.file_priv = __args->file_priv;
 266
 267         args.in_sync.count = __args->in_sync.count;
 268         args.in_sync.s = __args->in_sync.s;
 269
 270         args.out_sync.count = __args->out_sync.count;
 271         args.out_sync.s = __args->out_sync.s;
 272
 273         args.ops = &nouveau_exec_job_ops;
 274         args.resv_usage = DMA_RESV_USAGE_WRITE;
 275
 276         ret = nouveau_job_init(&job->base, &args);
 277         if (ret)
 278                 goto err_free_pushs;
 279
 280         return 0;
 281
 282 err_free_pushs:
 283         kfree(job->push.s);
 284 err_free_job:
 285         kfree(job);
 286         *pjob = NULL;
 287
 288         return ret;
 289 }
 290
 291 static int
 292 nouveau_exec(struct nouveau_exec_job_args *args)
 293 {
 294         struct nouveau_exec_job *job;
 295         int ret;
 296
 297         ret = nouveau_exec_job_init(&job, args);
 298         if (ret)
 299                 return ret;
 300
 301         ret = nouveau_job_submit(&job->base);
 302         if (ret)
 303                 goto err_job_fini;
 304
 305         return 0;
 306
 307 err_job_fini:
 308         nouveau_job_fini(&job->base);
 309         return ret;
 310 }
 311
 312 static int
 313 nouveau_exec_ucopy(struct nouveau_exec_job_args *args,
 314                    struct drm_nouveau_exec *req)
 315 {
 316         struct drm_nouveau_sync **s;
 317         u32 inc = req->wait_count;
 318         u64 ins = req->wait_ptr;
 319         u32 outc = req->sig_count;
 320         u64 outs = req->sig_ptr;
 321         u32 pushc = req->push_count;
 322         u64 pushs = req->push_ptr;
 323         int ret;
 324
 325         if (pushc) {
 326                 args->push.count = pushc;
 327                 args->push.s = u_memcpya(pushs, pushc, sizeof(*args->push.s));
 328                 if (IS_ERR(args->push.s))
 329                         return PTR_ERR(args->push.s);
 330         }
 331
 332         if (inc) {
 333                 s = &args->in_sync.s;
 334
 335                 args->in_sync.count = inc;
 336                 *s = u_memcpya(ins, inc, sizeof(**s));
 337                 if (IS_ERR(*s)) {
 338                         ret = PTR_ERR(*s);
 339                         goto err_free_pushs;
 340                 }
 341         }
 342
 343         if (outc) {
 344                 s = &args->out_sync.s;
 345
 346                 args->out_sync.count = outc;
 347                 *s = u_memcpya(outs, outc, sizeof(**s));
 348                 if (IS_ERR(*s)) {
 349                         ret = PTR_ERR(*s);
 350                         goto err_free_ins;
 351                 }
 352         }
 353
 354         return 0;
 355
 356 err_free_pushs:
 357         u_free(args->push.s);
 358 err_free_ins:
 359         u_free(args->in_sync.s);
 360         return ret;
 361 }
 362
 363 static void
 364 nouveau_exec_ufree(struct nouveau_exec_job_args *args)
 365 {
 366         u_free(args->push.s);
 367         u_free(args->in_sync.s);
 368         u_free(args->out_sync.s);
 369 }
 370
 371 int
 372 nouveau_exec_ioctl_exec(struct drm_device *dev,
 373                         void *data,
 374                         struct drm_file *file_priv)
 375 {
 376         struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
 377         struct nouveau_cli *cli = nouveau_cli(file_priv);
 378         struct nouveau_abi16_chan *chan16;
 379         struct nouveau_channel *chan = NULL;
 380         struct nouveau_exec_job_args args = {};
 381         struct drm_nouveau_exec *req = data;
 382         int push_max, ret = 0;
 383
 384         if (unlikely(!abi16))
 385                 return -ENOMEM;
 386
 387         /* abi16 locks already */
 388         if (unlikely(!nouveau_cli_uvmm(cli)))
 389                 return nouveau_abi16_put(abi16, -ENOSYS);
 390
 391         list_for_each_entry(chan16, &abi16->channels, head) {
 392                 if (chan16->chan->chid == req->channel) {
 393                         chan = chan16->chan;
 394                         break;
 395                 }
 396         }
 397
 398         if (!chan)
 399                 return nouveau_abi16_put(abi16, -ENOENT);
 400
 401         if (unlikely(atomic_read(&chan->killed)))
 402                 return nouveau_abi16_put(abi16, -ENODEV);
 403
 404         if (!chan->dma.ib_max)
 405                 return nouveau_abi16_put(abi16, -ENOSYS);
 406
 407         push_max = nouveau_exec_push_max_from_ib_max(chan->dma.ib_max);
 408         if (unlikely(req->push_count > push_max)) {
 409                 NV_PRINTK(err, cli, "pushbuf push count exceeds limit: %d max %d\n",
 410                           req->push_count, push_max);
 411                 return nouveau_abi16_put(abi16, -EINVAL);
 412         }
 413
 414         ret = nouveau_exec_ucopy(&args, req);
 415         if (ret)
 416                 goto out;
 417
 418         args.sched_entity = &chan16->sched_entity;
 419         args.file_priv = file_priv;
 420         args.chan = chan;
 421
 422         ret = nouveau_exec(&args);
 423         if (ret)
 424                 goto out_free_args;
 425
 426 out_free_args:
 427         nouveau_exec_ufree(&args);
 428 out:
 429         return nouveau_abi16_put(abi16, ret);
 430 }