From 1f7fd484fff1b432373f0d0682fb0e2015113161 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 22 Aug 2019 07:15:57 +0100 Subject: [PATCH] drm/i915: Replace i915_vma_put_fence() Avoid calling i915_vma_put_fence() by using our alternate paths that bind a secondary vma avoiding the original fenced vma. For the few instances where we need to release the fence (i.e. on binding when the GGTT range becomes invalid), replace the put_fence with a revoke_fence. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190822061557.18402-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/display/intel_overlay.c | 4 --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 35 +++++++++++++++++++------ drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 9 +++---- drivers/gpu/drm/i915/i915_gem.c | 36 ++++++++++---------------- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 16 +++--------- drivers/gpu/drm/i915/i915_vma.c | 4 ++- drivers/gpu/drm/i915/i915_vma.h | 4 +-- 7 files changed, 53 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index eca41c4..29edfc3 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -770,10 +770,6 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, } intel_frontbuffer_flush(new_bo->frontbuffer, ORIGIN_DIRTYFB); - ret = i915_vma_put_fence(vma); - if (ret) - goto out_unpin; - if (!overlay->active) { u32 oconfig; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index a1afc26..9c58e8f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -221,6 +221,8 @@ restart: * state and so involves less work. */ if (atomic_read(&obj->bind_count)) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + /* Before we change the PTE, the GPU must not be accessing it. * If we wait upon the object, we know that all the bound * VMA are no longer active. @@ -232,18 +234,30 @@ restart: if (ret) return ret; - if (!HAS_LLC(to_i915(obj->base.dev)) && - cache_level != I915_CACHE_NONE) { - /* Access to snoopable pages through the GTT is + if (!HAS_LLC(i915) && cache_level != I915_CACHE_NONE) { + intel_wakeref_t wakeref = + intel_runtime_pm_get(&i915->runtime_pm); + + /* + * Access to snoopable pages through the GTT is * incoherent and on some machines causes a hard * lockup. Relinquish the CPU mmaping to force * userspace to refault in the pages and we can * then double check if the GTT mapping is still * valid for that pointer access. */ - i915_gem_object_release_mmap(obj); + ret = mutex_lock_interruptible(&i915->ggtt.vm.mutex); + if (ret) { + intel_runtime_pm_put(&i915->runtime_pm, + wakeref); + return ret; + } - /* As we no longer need a fence for GTT access, + if (obj->userfault_count) + __i915_gem_object_release_mmap(obj); + + /* + * As we no longer need a fence for GTT access, * we can relinquish it now (and so prevent having * to steal a fence from someone else on the next * fence request). Note GPU activity would have @@ -251,12 +265,17 @@ restart: * supposed to be linear. */ for_each_ggtt_vma(vma, obj) { - ret = i915_vma_put_fence(vma); + ret = i915_vma_revoke_fence(vma); if (ret) - return ret; + break; } + mutex_unlock(&i915->ggtt.vm.mutex); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + if (ret) + return ret; } else { - /* We either have incoherent backing store and + /* + * We either have incoherent backing store and * so no GTT access or the architecture is fully * coherent. In such cases, existing GTT mmaps * ignore the cache bit in the PTE and we can diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index a5c3fbb..f813fcb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1024,6 +1024,9 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, struct i915_vma *vma; int err; + if (i915_gem_object_is_tiled(obj)) + return ERR_PTR(-EINVAL); + if (use_cpu_reloc(cache, obj)) return NULL; @@ -1047,12 +1050,6 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, if (err) /* no inactive aperture space, use cpu reloc */ return NULL; } else { - err = i915_vma_put_fence(vma); - if (err) { - i915_vma_unpin(vma); - return ERR_PTR(err); - } - cache->node.start = vma->node.start; cache->node.mm = (void *)vma; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6897668..eb31b69 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -343,20 +343,16 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, return ret; wakeref = intel_runtime_pm_get(&i915->runtime_pm); - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | - PIN_NONBLOCK /* NOWARN */ | - PIN_NOEVICT); + vma = ERR_PTR(-ENODEV); + if (!i915_gem_object_is_tiled(obj)) + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + PIN_MAPPABLE | + PIN_NONBLOCK /* NOWARN */ | + PIN_NOEVICT); if (!IS_ERR(vma)) { node.start = i915_ggtt_offset(vma); node.allocated = false; - ret = i915_vma_put_fence(vma); - if (ret) { - i915_vma_unpin(vma); - vma = ERR_PTR(ret); - } - } - if (IS_ERR(vma)) { + } else { ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); if (ret) goto out_unlock; @@ -557,20 +553,16 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, wakeref = intel_runtime_pm_get(rpm); } - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | - PIN_NONBLOCK /* NOWARN */ | - PIN_NOEVICT); + vma = ERR_PTR(-ENODEV); + if (!i915_gem_object_is_tiled(obj)) + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + PIN_MAPPABLE | + PIN_NONBLOCK /* NOWARN */ | + PIN_NOEVICT); if (!IS_ERR(vma)) { node.start = i915_ggtt_offset(vma); node.allocated = false; - ret = i915_vma_put_fence(vma); - if (ret) { - i915_vma_unpin(vma); - vma = ERR_PTR(ret); - } - } - if (IS_ERR(vma)) { + } else { ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); if (ret) goto out_rpm; diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 6a33a0b..615a9f4 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -287,7 +287,7 @@ static int fence_update(struct i915_fence_reg *fence, } /** - * i915_vma_put_fence - force-remove fence for a VMA + * i915_vma_revoke_fence - force-remove fence for a VMA * @vma: vma to map linearly (not through a fence reg) * * This function force-removes any fence from the given object, which is useful @@ -297,26 +297,18 @@ static int fence_update(struct i915_fence_reg *fence, * * 0 on success, negative error code on failure. */ -int i915_vma_put_fence(struct i915_vma *vma) +int i915_vma_revoke_fence(struct i915_vma *vma) { - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); struct i915_fence_reg *fence = vma->fence; - int err; + lockdep_assert_held(&vma->vm->mutex); if (!fence) return 0; if (atomic_read(&fence->pin_count)) return -EBUSY; - err = mutex_lock_interruptible(&ggtt->vm.mutex); - if (err) - return err; - - err = fence_update(fence, NULL); - mutex_unlock(&ggtt->vm.mutex); - - return err; + return fence_update(fence, NULL); } static struct i915_fence_reg *fence_find(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 9840cb2..e0e677b 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -982,7 +982,9 @@ int i915_vma_unbind(struct i915_vma *vma) GEM_BUG_ON(i915_vma_has_ggtt_write(vma)); /* release the fence reg _after_ flushing */ - ret = i915_vma_put_fence(vma); + mutex_lock(&vma->vm->mutex); + ret = i915_vma_revoke_fence(vma); + mutex_unlock(&vma->vm->mutex); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index cf6c043..889fc7c 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -421,8 +421,8 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma) * * True if the vma has a fence, false otherwise. */ -int i915_vma_pin_fence(struct i915_vma *vma); -int __must_check i915_vma_put_fence(struct i915_vma *vma); +int __must_check i915_vma_pin_fence(struct i915_vma *vma); +int __must_check i915_vma_revoke_fence(struct i915_vma *vma); static inline void __i915_vma_unpin_fence(struct i915_vma *vma) { -- 2.7.4