2 * Copyright 2008 Jerome Glisse.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Jerome Glisse <glisse@freedesktop.org>
28 #include <linux/file.h>
29 #include <linux/pagemap.h>
30 #include <linux/sync_file.h>
31 #include <linux/dma-buf.h>
33 #include <drm/amdgpu_drm.h>
34 #include <drm/drm_syncobj.h>
35 #include <drm/ttm/ttm_tt.h>
37 #include "amdgpu_cs.h"
39 #include "amdgpu_trace.h"
40 #include "amdgpu_gmc.h"
41 #include "amdgpu_gem.h"
42 #include "amdgpu_ras.h"
44 static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
45 struct amdgpu_device *adev,
46 struct drm_file *filp,
47 union drm_amdgpu_cs *cs)
49 struct amdgpu_fpriv *fpriv = filp->driver_priv;
51 if (cs->in.num_chunks == 0)
54 memset(p, 0, sizeof(*p));
58 p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
62 if (atomic_read(&p->ctx->guilty)) {
63 amdgpu_ctx_put(p->ctx);
67 amdgpu_sync_create(&p->sync);
71 static int amdgpu_cs_job_idx(struct amdgpu_cs_parser *p,
72 struct drm_amdgpu_cs_chunk_ib *chunk_ib)
74 struct drm_sched_entity *entity;
78 r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type,
79 chunk_ib->ip_instance,
80 chunk_ib->ring, &entity);
85 * Abort if there is no run queue associated with this entity.
86 * Possibly because of disabled HW IP.
88 if (entity->rq == NULL)
91 /* Check if we can add this IB to some existing job */
92 for (i = 0; i < p->gang_size; ++i)
93 if (p->entities[i] == entity)
96 /* If not increase the gang size if possible */
97 if (i == AMDGPU_CS_GANG_SIZE)
100 p->entities[i] = entity;
101 p->gang_size = i + 1;
105 static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
106 struct drm_amdgpu_cs_chunk_ib *chunk_ib,
107 unsigned int *num_ibs)
111 r = amdgpu_cs_job_idx(p, chunk_ib);
115 if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type))
119 p->gang_leader_idx = r;
123 static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p,
124 struct drm_amdgpu_cs_chunk_fence *data,
127 struct drm_gem_object *gobj;
128 struct amdgpu_bo *bo;
132 gobj = drm_gem_object_lookup(p->filp, data->handle);
136 bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
137 p->uf_entry.priority = 0;
138 p->uf_entry.tv.bo = &bo->tbo;
139 /* One for TTM and two for the CS job */
140 p->uf_entry.tv.num_shared = 3;
142 drm_gem_object_put(gobj);
144 size = amdgpu_bo_size(bo);
145 if (size != PAGE_SIZE || (data->offset + 8) > size) {
150 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
155 *offset = data->offset;
160 amdgpu_bo_unref(&bo);
164 static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p,
165 struct drm_amdgpu_bo_list_in *data)
167 struct drm_amdgpu_bo_list_entry *info;
170 r = amdgpu_bo_create_list_entry_array(data, &info);
174 r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,
188 /* Copy the data from userspace and go over it the first time */
189 static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
190 union drm_amdgpu_cs *cs)
192 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
193 unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { };
194 struct amdgpu_vm *vm = &fpriv->vm;
195 uint64_t *chunk_array_user;
196 uint64_t *chunk_array;
197 uint32_t uf_offset = 0;
202 chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t),
208 chunk_array_user = u64_to_user_ptr(cs->in.chunks);
209 if (copy_from_user(chunk_array, chunk_array_user,
210 sizeof(uint64_t)*cs->in.num_chunks)) {
215 p->nchunks = cs->in.num_chunks;
216 p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
223 for (i = 0; i < p->nchunks; i++) {
224 struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
225 struct drm_amdgpu_cs_chunk user_chunk;
226 uint32_t __user *cdata;
228 chunk_ptr = u64_to_user_ptr(chunk_array[i]);
229 if (copy_from_user(&user_chunk, chunk_ptr,
230 sizeof(struct drm_amdgpu_cs_chunk))) {
233 goto free_partial_kdata;
235 p->chunks[i].chunk_id = user_chunk.chunk_id;
236 p->chunks[i].length_dw = user_chunk.length_dw;
238 size = p->chunks[i].length_dw;
239 cdata = u64_to_user_ptr(user_chunk.chunk_data);
241 p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t),
243 if (p->chunks[i].kdata == NULL) {
246 goto free_partial_kdata;
248 size *= sizeof(uint32_t);
249 if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
251 goto free_partial_kdata;
254 /* Assume the worst on the following checks */
256 switch (p->chunks[i].chunk_id) {
257 case AMDGPU_CHUNK_ID_IB:
258 if (size < sizeof(struct drm_amdgpu_cs_chunk_ib))
259 goto free_partial_kdata;
261 ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs);
263 goto free_partial_kdata;
266 case AMDGPU_CHUNK_ID_FENCE:
267 if (size < sizeof(struct drm_amdgpu_cs_chunk_fence))
268 goto free_partial_kdata;
270 ret = amdgpu_cs_p1_user_fence(p, p->chunks[i].kdata,
273 goto free_partial_kdata;
276 case AMDGPU_CHUNK_ID_BO_HANDLES:
277 if (size < sizeof(struct drm_amdgpu_bo_list_in))
278 goto free_partial_kdata;
280 ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata);
282 goto free_partial_kdata;
285 case AMDGPU_CHUNK_ID_DEPENDENCIES:
286 case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
287 case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
288 case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
289 case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
290 case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
291 case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
295 goto free_partial_kdata;
301 goto free_partial_kdata;
304 for (i = 0; i < p->gang_size; ++i) {
305 ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm,
306 num_ibs[i], &p->jobs[i]);
310 p->gang_leader = p->jobs[p->gang_leader_idx];
312 if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) {
317 if (p->uf_entry.tv.bo)
318 p->gang_leader->uf_addr = uf_offset;
321 /* Use this opportunity to fill in task info for the vm */
322 amdgpu_vm_set_task_info(vm);
330 kvfree(p->chunks[i].kdata);
340 static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
341 struct amdgpu_cs_chunk *chunk,
342 unsigned int *ce_preempt,
343 unsigned int *de_preempt)
345 struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata;
346 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
347 struct amdgpu_vm *vm = &fpriv->vm;
348 struct amdgpu_ring *ring;
349 struct amdgpu_job *job;
350 struct amdgpu_ib *ib;
353 r = amdgpu_cs_job_idx(p, chunk_ib);
358 ring = amdgpu_job_ring(job);
359 ib = &job->ibs[job->num_ibs++];
361 /* MM engine doesn't support user fences */
362 if (p->uf_entry.tv.bo && ring->funcs->no_user_fence)
365 if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
366 chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
367 if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
372 /* Each GFX command submit allows only 1 IB max
373 * preemptible for CE & DE */
374 if (*ce_preempt > 1 || *de_preempt > 1)
378 if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
379 job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
381 r = amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ?
382 chunk_ib->ib_bytes : 0,
383 AMDGPU_IB_POOL_DELAYED, ib);
385 DRM_ERROR("Failed to get ib !\n");
389 ib->gpu_addr = chunk_ib->va_start;
390 ib->length_dw = chunk_ib->ib_bytes / 4;
391 ib->flags = chunk_ib->flags;
395 static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
396 struct amdgpu_cs_chunk *chunk)
398 struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata;
399 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
400 unsigned int num_deps;
403 num_deps = chunk->length_dw * 4 /
404 sizeof(struct drm_amdgpu_cs_chunk_dep);
406 for (i = 0; i < num_deps; ++i) {
407 struct amdgpu_ctx *ctx;
408 struct drm_sched_entity *entity;
409 struct dma_fence *fence;
411 ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
415 r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
417 deps[i].ring, &entity);
423 fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
427 return PTR_ERR(fence);
431 if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
432 struct drm_sched_fence *s_fence;
433 struct dma_fence *old = fence;
435 s_fence = to_drm_sched_fence(fence);
436 fence = dma_fence_get(&s_fence->scheduled);
440 r = amdgpu_sync_fence(&p->sync, fence);
441 dma_fence_put(fence);
448 static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
449 uint32_t handle, u64 point,
452 struct dma_fence *fence;
455 r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
457 DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
462 r = amdgpu_sync_fence(&p->sync, fence);
463 dma_fence_put(fence);
467 static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p,
468 struct amdgpu_cs_chunk *chunk)
470 struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
471 unsigned int num_deps;
474 num_deps = chunk->length_dw * 4 /
475 sizeof(struct drm_amdgpu_cs_chunk_sem);
476 for (i = 0; i < num_deps; ++i) {
477 r = amdgpu_syncobj_lookup_and_add(p, deps[i].handle, 0, 0);
485 static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p,
486 struct amdgpu_cs_chunk *chunk)
488 struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
489 unsigned int num_deps;
492 num_deps = chunk->length_dw * 4 /
493 sizeof(struct drm_amdgpu_cs_chunk_syncobj);
494 for (i = 0; i < num_deps; ++i) {
495 r = amdgpu_syncobj_lookup_and_add(p, syncobj_deps[i].handle,
496 syncobj_deps[i].point,
497 syncobj_deps[i].flags);
505 static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p,
506 struct amdgpu_cs_chunk *chunk)
508 struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
509 unsigned int num_deps;
512 num_deps = chunk->length_dw * 4 /
513 sizeof(struct drm_amdgpu_cs_chunk_sem);
518 p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
520 p->num_post_deps = 0;
526 for (i = 0; i < num_deps; ++i) {
527 p->post_deps[i].syncobj =
528 drm_syncobj_find(p->filp, deps[i].handle);
529 if (!p->post_deps[i].syncobj)
531 p->post_deps[i].chain = NULL;
532 p->post_deps[i].point = 0;
539 static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,
540 struct amdgpu_cs_chunk *chunk)
542 struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
543 unsigned int num_deps;
546 num_deps = chunk->length_dw * 4 /
547 sizeof(struct drm_amdgpu_cs_chunk_syncobj);
552 p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
554 p->num_post_deps = 0;
559 for (i = 0; i < num_deps; ++i) {
560 struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
563 if (syncobj_deps[i].point) {
564 dep->chain = dma_fence_chain_alloc();
569 dep->syncobj = drm_syncobj_find(p->filp,
570 syncobj_deps[i].handle);
572 dma_fence_chain_free(dep->chain);
575 dep->point = syncobj_deps[i].point;
582 static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
583 struct amdgpu_cs_chunk *chunk)
585 struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
588 if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW)
591 for (i = 0; i < p->gang_size; ++i) {
592 p->jobs[i]->shadow_va = shadow->shadow_va;
593 p->jobs[i]->csa_va = shadow->csa_va;
594 p->jobs[i]->gds_va = shadow->gds_va;
595 p->jobs[i]->init_shadow =
596 shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW;
602 static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
604 unsigned int ce_preempt = 0, de_preempt = 0;
607 for (i = 0; i < p->nchunks; ++i) {
608 struct amdgpu_cs_chunk *chunk;
610 chunk = &p->chunks[i];
612 switch (chunk->chunk_id) {
613 case AMDGPU_CHUNK_ID_IB:
614 r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt);
618 case AMDGPU_CHUNK_ID_DEPENDENCIES:
619 case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
620 r = amdgpu_cs_p2_dependencies(p, chunk);
624 case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
625 r = amdgpu_cs_p2_syncobj_in(p, chunk);
629 case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
630 r = amdgpu_cs_p2_syncobj_out(p, chunk);
634 case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
635 r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk);
639 case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
640 r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk);
644 case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
645 r = amdgpu_cs_p2_shadow(p, chunk);
655 /* Convert microseconds to bytes. */
656 static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
658 if (us <= 0 || !adev->mm_stats.log2_max_MBps)
661 /* Since accum_us is incremented by a million per second, just
662 * multiply it by the number of MB/s to get the number of bytes.
664 return us << adev->mm_stats.log2_max_MBps;
667 static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
669 if (!adev->mm_stats.log2_max_MBps)
672 return bytes >> adev->mm_stats.log2_max_MBps;
675 /* Returns how many bytes TTM can move right now. If no bytes can be moved,
676 * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
677 * which means it can go over the threshold once. If that happens, the driver
678 * will be in debt and no other buffer migrations can be done until that debt
681 * This approach allows moving a buffer of any size (it's important to allow
684 * The currency is simply time in microseconds and it increases as the clock
685 * ticks. The accumulated microseconds (us) are converted to bytes and
688 static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
692 s64 time_us, increment_us;
693 u64 free_vram, total_vram, used_vram;
694 /* Allow a maximum of 200 accumulated ms. This is basically per-IB
697 * It means that in order to get full max MBps, at least 5 IBs per
698 * second must be submitted and not more than 200ms apart from each
701 const s64 us_upper_bound = 200000;
703 if (!adev->mm_stats.log2_max_MBps) {
709 total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
710 used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);
711 free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
713 spin_lock(&adev->mm_stats.lock);
715 /* Increase the amount of accumulated us. */
716 time_us = ktime_to_us(ktime_get());
717 increment_us = time_us - adev->mm_stats.last_update_us;
718 adev->mm_stats.last_update_us = time_us;
719 adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
722 /* This prevents the short period of low performance when the VRAM
723 * usage is low and the driver is in debt or doesn't have enough
724 * accumulated us to fill VRAM quickly.
726 * The situation can occur in these cases:
727 * - a lot of VRAM is freed by userspace
728 * - the presence of a big buffer causes a lot of evictions
729 * (solution: split buffers into smaller ones)
731 * If 128 MB or 1/8th of VRAM is free, start filling it now by setting
732 * accum_us to a positive number.
734 if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
737 /* Be more aggressive on dGPUs. Try to fill a portion of free
740 if (!(adev->flags & AMD_IS_APU))
741 min_us = bytes_to_us(adev, free_vram / 4);
743 min_us = 0; /* Reset accum_us on APUs. */
745 adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
748 /* This is set to 0 if the driver is in debt to disallow (optional)
751 *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
753 /* Do the same for visible VRAM if half of it is free */
754 if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
755 u64 total_vis_vram = adev->gmc.visible_vram_size;
757 amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
759 if (used_vis_vram < total_vis_vram) {
760 u64 free_vis_vram = total_vis_vram - used_vis_vram;
762 adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
763 increment_us, us_upper_bound);
765 if (free_vis_vram >= total_vis_vram / 2)
766 adev->mm_stats.accum_us_vis =
767 max(bytes_to_us(adev, free_vis_vram / 2),
768 adev->mm_stats.accum_us_vis);
771 *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
776 spin_unlock(&adev->mm_stats.lock);
779 /* Report how many bytes have really been moved for the last command
780 * submission. This can result in a debt that can stop buffer migrations
783 void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
786 spin_lock(&adev->mm_stats.lock);
787 adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
788 adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
789 spin_unlock(&adev->mm_stats.lock);
792 static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)
794 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
795 struct amdgpu_cs_parser *p = param;
796 struct ttm_operation_ctx ctx = {
797 .interruptible = true,
798 .no_wait_gpu = false,
799 .resv = bo->tbo.base.resv
804 if (bo->tbo.pin_count)
807 /* Don't move this buffer if we have depleted our allowance
808 * to move it. Don't move anything if the threshold is zero.
810 if (p->bytes_moved < p->bytes_moved_threshold &&
811 (!bo->tbo.base.dma_buf ||
812 list_empty(&bo->tbo.base.dma_buf->attachments))) {
813 if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
814 (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
815 /* And don't move a CPU_ACCESS_REQUIRED BO to limited
816 * visible VRAM if we've depleted our allowance to do
819 if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
820 domain = bo->preferred_domains;
822 domain = bo->allowed_domains;
824 domain = bo->preferred_domains;
827 domain = bo->allowed_domains;
831 amdgpu_bo_placement_from_domain(bo, domain);
832 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
834 p->bytes_moved += ctx.bytes_moved;
835 if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
836 amdgpu_bo_in_cpu_visible_vram(bo))
837 p->bytes_moved_vis += ctx.bytes_moved;
839 if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
840 domain = bo->allowed_domains;
847 static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
848 struct list_head *validated)
850 struct ttm_operation_ctx ctx = { true, false };
851 struct amdgpu_bo_list_entry *lobj;
854 list_for_each_entry(lobj, validated, tv.head) {
855 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
856 struct mm_struct *usermm;
858 usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
859 if (usermm && usermm != current->mm)
862 if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
863 lobj->user_invalidated && lobj->user_pages) {
864 amdgpu_bo_placement_from_domain(bo,
865 AMDGPU_GEM_DOMAIN_CPU);
866 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
870 amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
874 r = amdgpu_cs_bo_validate(p, bo);
878 kvfree(lobj->user_pages);
879 lobj->user_pages = NULL;
884 static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
885 union drm_amdgpu_cs *cs)
887 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
888 struct amdgpu_vm *vm = &fpriv->vm;
889 struct amdgpu_bo_list_entry *e;
890 struct list_head duplicates;
894 INIT_LIST_HEAD(&p->validated);
896 /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
897 if (cs->in.bo_list_handle) {
901 r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
905 } else if (!p->bo_list) {
906 /* Create a empty bo_list when no handle is provided */
907 r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,
913 mutex_lock(&p->bo_list->bo_list_mutex);
915 /* One for TTM and one for the CS job */
916 amdgpu_bo_list_for_each_entry(e, p->bo_list)
917 e->tv.num_shared = 2;
919 amdgpu_bo_list_get_list(p->bo_list, &p->validated);
921 INIT_LIST_HEAD(&duplicates);
922 amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
924 if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
925 list_add(&p->uf_entry.tv.head, &p->validated);
927 /* Get userptr backing pages. If pages are updated after registered
928 * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
929 * amdgpu_ttm_backend_bind() to flush and invalidate new pages
931 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
932 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
933 bool userpage_invalidated = false;
936 e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
937 sizeof(struct page *),
938 GFP_KERNEL | __GFP_ZERO);
939 if (!e->user_pages) {
940 DRM_ERROR("kvmalloc_array failure\n");
942 goto out_free_user_pages;
945 r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range);
947 kvfree(e->user_pages);
948 e->user_pages = NULL;
949 goto out_free_user_pages;
952 for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
953 if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
954 userpage_invalidated = true;
958 e->user_invalidated = userpage_invalidated;
961 r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
963 if (unlikely(r != 0)) {
964 if (r != -ERESTARTSYS)
965 DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
966 goto out_free_user_pages;
969 amdgpu_bo_list_for_each_entry(e, p->bo_list) {
970 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
972 e->bo_va = amdgpu_vm_bo_find(vm, bo);
975 amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
976 &p->bytes_moved_vis_threshold);
978 p->bytes_moved_vis = 0;
980 r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
981 amdgpu_cs_bo_validate, p);
983 DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
987 r = amdgpu_cs_list_validate(p, &duplicates);
991 r = amdgpu_cs_list_validate(p, &p->validated);
995 if (p->uf_entry.tv.bo) {
996 struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
998 r = amdgpu_ttm_alloc_gart(&uf->tbo);
1000 goto error_validate;
1002 p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(uf);
1005 amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
1006 p->bytes_moved_vis);
1008 for (i = 0; i < p->gang_size; ++i)
1009 amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj,
1010 p->bo_list->gws_obj,
1011 p->bo_list->oa_obj);
1015 ttm_eu_backoff_reservation(&p->ticket, &p->validated);
1017 out_free_user_pages:
1018 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
1019 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
1023 amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range);
1024 kvfree(e->user_pages);
1025 e->user_pages = NULL;
1028 mutex_unlock(&p->bo_list->bo_list_mutex);
1032 static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *p)
1036 if (!trace_amdgpu_cs_enabled())
1039 for (i = 0; i < p->gang_size; ++i) {
1040 struct amdgpu_job *job = p->jobs[i];
1042 for (j = 0; j < job->num_ibs; ++j)
1043 trace_amdgpu_cs(p, job, &job->ibs[j]);
1047 static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,
1048 struct amdgpu_job *job)
1050 struct amdgpu_ring *ring = amdgpu_job_ring(job);
1054 /* Only for UVD/VCE VM emulation */
1055 if (!ring->funcs->parse_cs && !ring->funcs->patch_cs_in_place)
1058 for (i = 0; i < job->num_ibs; ++i) {
1059 struct amdgpu_ib *ib = &job->ibs[i];
1060 struct amdgpu_bo_va_mapping *m;
1061 struct amdgpu_bo *aobj;
1065 va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK;
1066 r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
1068 DRM_ERROR("IB va_start is invalid\n");
1072 if ((va_start + ib->length_dw * 4) >
1073 (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
1074 DRM_ERROR("IB va_start+ib_bytes is invalid\n");
1078 /* the IB should be reserved at this point */
1079 r = amdgpu_bo_kmap(aobj, (void **)&kptr);
1083 kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE);
1085 if (ring->funcs->parse_cs) {
1086 memcpy(ib->ptr, kptr, ib->length_dw * 4);
1087 amdgpu_bo_kunmap(aobj);
1089 r = amdgpu_ring_parse_cs(ring, p, job, ib);
1093 ib->ptr = (uint32_t *)kptr;
1094 r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib);
1095 amdgpu_bo_kunmap(aobj);
1104 static int amdgpu_cs_patch_jobs(struct amdgpu_cs_parser *p)
1109 for (i = 0; i < p->gang_size; ++i) {
1110 r = amdgpu_cs_patch_ibs(p, p->jobs[i]);
1117 static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
1119 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1120 struct amdgpu_job *job = p->gang_leader;
1121 struct amdgpu_device *adev = p->adev;
1122 struct amdgpu_vm *vm = &fpriv->vm;
1123 struct amdgpu_bo_list_entry *e;
1124 struct amdgpu_bo_va *bo_va;
1125 struct amdgpu_bo *bo;
1129 r = amdgpu_vm_clear_freed(adev, vm, NULL);
1133 r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
1137 r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update);
1141 if (fpriv->csa_va) {
1142 bo_va = fpriv->csa_va;
1144 r = amdgpu_vm_bo_update(adev, bo_va, false);
1148 r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);
1153 amdgpu_bo_list_for_each_entry(e, p->bo_list) {
1154 /* ignore duplicates */
1155 bo = ttm_to_amdgpu_bo(e->tv.bo);
1163 r = amdgpu_vm_bo_update(adev, bo_va, false);
1167 r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);
1172 r = amdgpu_vm_handle_moved(adev, vm);
1176 r = amdgpu_vm_update_pdes(adev, vm, false);
1180 r = amdgpu_sync_fence(&p->sync, vm->last_update);
1184 for (i = 0; i < p->gang_size; ++i) {
1190 job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
1193 if (amdgpu_vm_debug) {
1194 /* Invalidate all BOs to test for userspace bugs */
1195 amdgpu_bo_list_for_each_entry(e, p->bo_list) {
1196 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
1198 /* ignore duplicates */
1202 amdgpu_vm_bo_invalidate(adev, bo, false);
1209 static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
1211 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1212 struct drm_gpu_scheduler *sched;
1213 struct amdgpu_bo_list_entry *e;
1214 struct dma_fence *fence;
1218 r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);
1220 if (r != -ERESTARTSYS)
1221 DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
1225 list_for_each_entry(e, &p->validated, tv.head) {
1226 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
1227 struct dma_resv *resv = bo->tbo.base.resv;
1228 enum amdgpu_sync_mode sync_mode;
1230 sync_mode = amdgpu_bo_explicit_sync(bo) ?
1231 AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
1232 r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode,
1238 for (i = 0; i < p->gang_size; ++i) {
1239 r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]);
1244 sched = p->gang_leader->base.entity->rq->sched;
1245 while ((fence = amdgpu_sync_get_fence(&p->sync))) {
1246 struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);
1249 * When we have an dependency it might be necessary to insert a
1250 * pipeline sync to make sure that all caches etc are flushed and the
1251 * next job actually sees the results from the previous one
1252 * before we start executing on the same scheduler ring.
1254 if (!s_fence || s_fence->sched != sched) {
1255 dma_fence_put(fence);
1259 r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence);
1260 dma_fence_put(fence);
1267 static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
1271 for (i = 0; i < p->num_post_deps; ++i) {
1272 if (p->post_deps[i].chain && p->post_deps[i].point) {
1273 drm_syncobj_add_point(p->post_deps[i].syncobj,
1274 p->post_deps[i].chain,
1275 p->fence, p->post_deps[i].point);
1276 p->post_deps[i].chain = NULL;
1278 drm_syncobj_replace_fence(p->post_deps[i].syncobj,
1284 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1285 union drm_amdgpu_cs *cs)
1287 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1288 struct amdgpu_job *leader = p->gang_leader;
1289 struct amdgpu_bo_list_entry *e;
1294 for (i = 0; i < p->gang_size; ++i)
1295 drm_sched_job_arm(&p->jobs[i]->base);
1297 for (i = 0; i < p->gang_size; ++i) {
1298 struct dma_fence *fence;
1300 if (p->jobs[i] == leader)
1303 fence = &p->jobs[i]->base.s_fence->scheduled;
1304 dma_fence_get(fence);
1305 r = drm_sched_job_add_dependency(&leader->base, fence);
1307 dma_fence_put(fence);
1312 if (p->gang_size > 1) {
1313 for (i = 0; i < p->gang_size; ++i)
1314 amdgpu_job_set_gang_leader(p->jobs[i], leader);
1317 /* No memory allocation is allowed while holding the notifier lock.
1318 * The lock is held until amdgpu_cs_submit is finished and fence is
1321 mutex_lock(&p->adev->notifier_lock);
1323 /* If userptr are invalidated after amdgpu_cs_parser_bos(), return
1324 * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
1327 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
1328 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
1330 r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range);
1338 p->fence = dma_fence_get(&leader->base.s_fence->finished);
1339 list_for_each_entry(e, &p->validated, tv.head) {
1341 /* Everybody except for the gang leader uses READ */
1342 for (i = 0; i < p->gang_size; ++i) {
1343 if (p->jobs[i] == leader)
1346 dma_resv_add_fence(e->tv.bo->base.resv,
1347 &p->jobs[i]->base.s_fence->finished,
1348 DMA_RESV_USAGE_READ);
1351 /* The gang leader is remembered as writer */
1352 e->tv.num_shared = 0;
1355 seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx],
1357 amdgpu_cs_post_dependencies(p);
1359 if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
1360 !p->ctx->preamble_presented) {
1361 leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
1362 p->ctx->preamble_presented = true;
1365 cs->out.handle = seq;
1366 leader->uf_sequence = seq;
1368 amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
1369 for (i = 0; i < p->gang_size; ++i) {
1370 amdgpu_job_free_resources(p->jobs[i]);
1371 trace_amdgpu_cs_ioctl(p->jobs[i]);
1372 drm_sched_entity_push_job(&p->jobs[i]->base);
1376 amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
1377 ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
1379 mutex_unlock(&p->adev->notifier_lock);
1380 mutex_unlock(&p->bo_list->bo_list_mutex);
1384 mutex_unlock(&p->adev->notifier_lock);
1387 for (i = 0; i < p->gang_size; ++i)
1388 drm_sched_job_cleanup(&p->jobs[i]->base);
1392 /* Cleanup the parser structure */
1393 static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
1397 amdgpu_sync_free(&parser->sync);
1398 for (i = 0; i < parser->num_post_deps; i++) {
1399 drm_syncobj_put(parser->post_deps[i].syncobj);
1400 kfree(parser->post_deps[i].chain);
1402 kfree(parser->post_deps);
1404 dma_fence_put(parser->fence);
1407 amdgpu_ctx_put(parser->ctx);
1408 if (parser->bo_list)
1409 amdgpu_bo_list_put(parser->bo_list);
1411 for (i = 0; i < parser->nchunks; i++)
1412 kvfree(parser->chunks[i].kdata);
1413 kvfree(parser->chunks);
1414 for (i = 0; i < parser->gang_size; ++i) {
1415 if (parser->jobs[i])
1416 amdgpu_job_free(parser->jobs[i]);
1418 if (parser->uf_entry.tv.bo) {
1419 struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
1421 amdgpu_bo_unref(&uf);
1425 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1427 struct amdgpu_device *adev = drm_to_adev(dev);
1428 struct amdgpu_cs_parser parser;
1431 if (amdgpu_ras_intr_triggered())
1434 if (!adev->accel_working)
1437 r = amdgpu_cs_parser_init(&parser, adev, filp, data);
1439 if (printk_ratelimit())
1440 DRM_ERROR("Failed to initialize parser %d!\n", r);
1444 r = amdgpu_cs_pass1(&parser, data);
1448 r = amdgpu_cs_pass2(&parser);
1452 r = amdgpu_cs_parser_bos(&parser, data);
1455 DRM_ERROR("Not enough memory for command submission!\n");
1456 else if (r != -ERESTARTSYS && r != -EAGAIN)
1457 DRM_ERROR("Failed to process the buffer list %d!\n", r);
1461 r = amdgpu_cs_patch_jobs(&parser);
1465 r = amdgpu_cs_vm_handling(&parser);
1469 r = amdgpu_cs_sync_rings(&parser);
1473 trace_amdgpu_cs_ibs(&parser);
1475 r = amdgpu_cs_submit(&parser, data);
1479 amdgpu_cs_parser_fini(&parser);
1483 ttm_eu_backoff_reservation(&parser.ticket, &parser.validated);
1484 mutex_unlock(&parser.bo_list->bo_list_mutex);
1487 amdgpu_cs_parser_fini(&parser);
1492 * amdgpu_cs_wait_ioctl - wait for a command submission to finish
1495 * @data: data from userspace
1496 * @filp: file private
1498 * Wait for the command submission identified by handle to finish.
1500 int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
1501 struct drm_file *filp)
1503 union drm_amdgpu_wait_cs *wait = data;
1504 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
1505 struct drm_sched_entity *entity;
1506 struct amdgpu_ctx *ctx;
1507 struct dma_fence *fence;
1510 ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
1514 r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
1515 wait->in.ring, &entity);
1517 amdgpu_ctx_put(ctx);
1521 fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
1525 r = dma_fence_wait_timeout(fence, true, timeout);
1526 if (r > 0 && fence->error)
1528 dma_fence_put(fence);
1532 amdgpu_ctx_put(ctx);
1536 memset(wait, 0, sizeof(*wait));
1537 wait->out.status = (r == 0);
1543 * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
1545 * @adev: amdgpu device
1546 * @filp: file private
1547 * @user: drm_amdgpu_fence copied from user space
1549 static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
1550 struct drm_file *filp,
1551 struct drm_amdgpu_fence *user)
1553 struct drm_sched_entity *entity;
1554 struct amdgpu_ctx *ctx;
1555 struct dma_fence *fence;
1558 ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
1560 return ERR_PTR(-EINVAL);
1562 r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
1563 user->ring, &entity);
1565 amdgpu_ctx_put(ctx);
1569 fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
1570 amdgpu_ctx_put(ctx);
1575 int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
1576 struct drm_file *filp)
1578 struct amdgpu_device *adev = drm_to_adev(dev);
1579 union drm_amdgpu_fence_to_handle *info = data;
1580 struct dma_fence *fence;
1581 struct drm_syncobj *syncobj;
1582 struct sync_file *sync_file;
1585 fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
1587 return PTR_ERR(fence);
1590 fence = dma_fence_get_stub();
1592 switch (info->in.what) {
1593 case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
1594 r = drm_syncobj_create(&syncobj, 0, fence);
1595 dma_fence_put(fence);
1598 r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle);
1599 drm_syncobj_put(syncobj);
1602 case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD:
1603 r = drm_syncobj_create(&syncobj, 0, fence);
1604 dma_fence_put(fence);
1607 r = drm_syncobj_get_fd(syncobj, (int *)&info->out.handle);
1608 drm_syncobj_put(syncobj);
1611 case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD:
1612 fd = get_unused_fd_flags(O_CLOEXEC);
1614 dma_fence_put(fence);
1618 sync_file = sync_file_create(fence);
1619 dma_fence_put(fence);
1625 fd_install(fd, sync_file->file);
1626 info->out.handle = fd;
1630 dma_fence_put(fence);
1636 * amdgpu_cs_wait_all_fences - wait on all fences to signal
1638 * @adev: amdgpu device
1639 * @filp: file private
1640 * @wait: wait parameters
1641 * @fences: array of drm_amdgpu_fence
1643 static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
1644 struct drm_file *filp,
1645 union drm_amdgpu_wait_fences *wait,
1646 struct drm_amdgpu_fence *fences)
1648 uint32_t fence_count = wait->in.fence_count;
1652 for (i = 0; i < fence_count; i++) {
1653 struct dma_fence *fence;
1654 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1656 fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1658 return PTR_ERR(fence);
1662 r = dma_fence_wait_timeout(fence, true, timeout);
1663 dma_fence_put(fence);
1671 return fence->error;
1674 memset(wait, 0, sizeof(*wait));
1675 wait->out.status = (r > 0);
1681 * amdgpu_cs_wait_any_fence - wait on any fence to signal
1683 * @adev: amdgpu device
1684 * @filp: file private
1685 * @wait: wait parameters
1686 * @fences: array of drm_amdgpu_fence
1688 static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
1689 struct drm_file *filp,
1690 union drm_amdgpu_wait_fences *wait,
1691 struct drm_amdgpu_fence *fences)
1693 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1694 uint32_t fence_count = wait->in.fence_count;
1695 uint32_t first = ~0;
1696 struct dma_fence **array;
1700 /* Prepare the fence array */
1701 array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL);
1706 for (i = 0; i < fence_count; i++) {
1707 struct dma_fence *fence;
1709 fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1710 if (IS_ERR(fence)) {
1712 goto err_free_fence_array;
1715 } else { /* NULL, the fence has been already signaled */
1722 r = dma_fence_wait_any_timeout(array, fence_count, true, timeout,
1725 goto err_free_fence_array;
1728 memset(wait, 0, sizeof(*wait));
1729 wait->out.status = (r > 0);
1730 wait->out.first_signaled = first;
1732 if (first < fence_count && array[first])
1733 r = array[first]->error;
1737 err_free_fence_array:
1738 for (i = 0; i < fence_count; i++)
1739 dma_fence_put(array[i]);
1746 * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish
1749 * @data: data from userspace
1750 * @filp: file private
1752 int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
1753 struct drm_file *filp)
1755 struct amdgpu_device *adev = drm_to_adev(dev);
1756 union drm_amdgpu_wait_fences *wait = data;
1757 uint32_t fence_count = wait->in.fence_count;
1758 struct drm_amdgpu_fence *fences_user;
1759 struct drm_amdgpu_fence *fences;
1762 /* Get the fences from userspace */
1763 fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
1768 fences_user = u64_to_user_ptr(wait->in.fences);
1769 if (copy_from_user(fences, fences_user,
1770 sizeof(struct drm_amdgpu_fence) * fence_count)) {
1772 goto err_free_fences;
1775 if (wait->in.wait_all)
1776 r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
1778 r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
1787 * amdgpu_cs_find_mapping - find bo_va for VM address
1789 * @parser: command submission parser context
1791 * @bo: resulting BO of the mapping found
1792 * @map: Placeholder to return found BO mapping
1794 * Search the buffer objects in the command submission context for a certain
1795 * virtual memory address. Returns allocation structure when found, NULL
1798 int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
1799 uint64_t addr, struct amdgpu_bo **bo,
1800 struct amdgpu_bo_va_mapping **map)
1802 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
1803 struct ttm_operation_ctx ctx = { false, false };
1804 struct amdgpu_vm *vm = &fpriv->vm;
1805 struct amdgpu_bo_va_mapping *mapping;
1808 addr /= AMDGPU_GPU_PAGE_SIZE;
1810 mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
1811 if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
1814 *bo = mapping->bo_va->base.bo;
1817 /* Double check that the BO is reserved by this CS */
1818 if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket)
1821 if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
1822 (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
1823 amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
1824 r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
1829 return amdgpu_ttm_alloc_gart(&(*bo)->tbo);