drm/i915: Reorder await_execution before await_request
authorChris Wilson <chris@chris-wilson.co.uk>
Tue, 26 May 2020 09:07:52 +0000 (10:07 +0100)
committerJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
Mon, 1 Jun 2020 12:22:05 +0000 (15:22 +0300)
Reorder the code so that we can reuse the await_execution from a special
case in await_request in the next patch.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200526090753.11329-1-chris@chris-wilson.co.uk
(cherry picked from commit ffb0c600c240103f6f34e07892a7e0a75502b243)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
drivers/gpu/drm/i915/i915_request.c

index c282719ad3aca19afebb60f4173f908ce6b4656a..33bbad623e02fae14041f42388ca1b7d3d366d0d 100644 (file)
@@ -1053,37 +1053,91 @@ await_fence:
                                             I915_FENCE_GFP);
 }
 
+static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
+                                         struct dma_fence *fence)
+{
+       return __intel_timeline_sync_is_later(tl,
+                                             fence->context,
+                                             fence->seqno - 1);
+}
+
+static int intel_timeline_sync_set_start(struct intel_timeline *tl,
+                                        const struct dma_fence *fence)
+{
+       return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
+}
+
 static int
-i915_request_await_request(struct i915_request *to, struct i915_request *from)
+__i915_request_await_execution(struct i915_request *to,
+                              struct i915_request *from,
+                              void (*hook)(struct i915_request *rq,
+                                           struct dma_fence *signal))
 {
-       int ret;
+       int err;
 
-       GEM_BUG_ON(to == from);
-       GEM_BUG_ON(to->timeline == from->timeline);
+       GEM_BUG_ON(intel_context_is_barrier(from->context));
 
-       if (i915_request_completed(from)) {
-               i915_sw_fence_set_error_once(&to->submit, from->fence.error);
+       /* Submit both requests at the same time */
+       err = __await_execution(to, from, hook, I915_FENCE_GFP);
+       if (err)
+               return err;
+
+       /* Squash repeated depenendices to the same timelines */
+       if (intel_timeline_sync_has_start(i915_request_timeline(to),
+                                         &from->fence))
                return 0;
+
+       /*
+        * Wait until the start of this request.
+        *
+        * The execution cb fires when we submit the request to HW. But in
+        * many cases this may be long before the request itself is ready to
+        * run (consider that we submit 2 requests for the same context, where
+        * the request of interest is behind an indefinite spinner). So we hook
+        * up to both to reduce our queues and keep the execution lag minimised
+        * in the worst case, though we hope that the await_start is elided.
+        */
+       err = i915_request_await_start(to, from);
+       if (err < 0)
+               return err;
+
+       /*
+        * Ensure both start together [after all semaphores in signal]
+        *
+        * Now that we are queued to the HW at roughly the same time (thanks
+        * to the execute cb) and are ready to run at roughly the same time
+        * (thanks to the await start), our signaler may still be indefinitely
+        * delayed by waiting on a semaphore from a remote engine. If our
+        * signaler depends on a semaphore, so indirectly do we, and we do not
+        * want to start our payload until our signaler also starts theirs.
+        * So we wait.
+        *
+        * However, there is also a second condition for which we need to wait
+        * for the precise start of the signaler. Consider that the signaler
+        * was submitted in a chain of requests following another context
+        * (with just an ordinary intra-engine fence dependency between the
+        * two). In this case the signaler is queued to HW, but not for
+        * immediate execution, and so we must wait until it reaches the
+        * active slot.
+        */
+       if (intel_engine_has_semaphores(to->engine) &&
+           !i915_request_has_initial_breadcrumb(to)) {
+               err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
+               if (err < 0)
+                       return err;
        }
 
+       /* Couple the dependency tree for PI on this exposed to->fence */
        if (to->engine->schedule) {
-               ret = i915_sched_node_add_dependency(&to->sched,
+               err = i915_sched_node_add_dependency(&to->sched,
                                                     &from->sched,
-                                                    I915_DEPENDENCY_EXTERNAL);
-               if (ret < 0)
-                       return ret;
+                                                    I915_DEPENDENCY_WEAK);
+               if (err < 0)
+                       return err;
        }
 
-       if (to->engine == from->engine)
-               ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
-                                                      &from->submit,
-                                                      I915_FENCE_GFP);
-       else
-               ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
-       if (ret < 0)
-               return ret;
-
-       return 0;
+       return intel_timeline_sync_set_start(i915_request_timeline(to),
+                                            &from->fence);
 }
 
 static void mark_external(struct i915_request *rq)
@@ -1136,23 +1190,20 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
 }
 
 int
-i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
+i915_request_await_execution(struct i915_request *rq,
+                            struct dma_fence *fence,
+                            void (*hook)(struct i915_request *rq,
+                                         struct dma_fence *signal))
 {
        struct dma_fence **child = &fence;
        unsigned int nchild = 1;
        int ret;
 
-       /*
-        * Note that if the fence-array was created in signal-on-any mode,
-        * we should *not* decompose it into its individual fences. However,
-        * we don't currently store which mode the fence-array is operating
-        * in. Fortunately, the only user of signal-on-any is private to
-        * amdgpu and we should not see any incoming fence-array from
-        * sync-file being in signal-on-any mode.
-        */
        if (dma_fence_is_array(fence)) {
                struct dma_fence_array *array = to_dma_fence_array(fence);
 
+               /* XXX Error for signal-on-any fence arrays */
+
                child = array->fences;
                nchild = array->num_fences;
                GEM_BUG_ON(!nchild);
@@ -1165,138 +1216,78 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
                        continue;
                }
 
-               /*
-                * Requests on the same timeline are explicitly ordered, along
-                * with their dependencies, by i915_request_add() which ensures
-                * that requests are submitted in-order through each ring.
-                */
                if (fence->context == rq->fence.context)
                        continue;
 
-               /* Squash repeated waits to the same timelines */
-               if (fence->context &&
-                   intel_timeline_sync_is_later(i915_request_timeline(rq),
-                                                fence))
-                       continue;
+               /*
+                * We don't squash repeated fence dependencies here as we
+                * want to run our callback in all cases.
+                */
 
                if (dma_fence_is_i915(fence))
-                       ret = i915_request_await_request(rq, to_request(fence));
+                       ret = __i915_request_await_execution(rq,
+                                                            to_request(fence),
+                                                            hook);
                else
                        ret = i915_request_await_external(rq, fence);
                if (ret < 0)
                        return ret;
-
-               /* Record the latest fence used against each timeline */
-               if (fence->context)
-                       intel_timeline_sync_set(i915_request_timeline(rq),
-                                               fence);
        } while (--nchild);
 
        return 0;
 }
 
-static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
-                                         struct dma_fence *fence)
-{
-       return __intel_timeline_sync_is_later(tl,
-                                             fence->context,
-                                             fence->seqno - 1);
-}
-
-static int intel_timeline_sync_set_start(struct intel_timeline *tl,
-                                        const struct dma_fence *fence)
-{
-       return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
-}
-
 static int
-__i915_request_await_execution(struct i915_request *to,
-                              struct i915_request *from,
-                              void (*hook)(struct i915_request *rq,
-                                           struct dma_fence *signal))
+i915_request_await_request(struct i915_request *to, struct i915_request *from)
 {
-       int err;
-
-       GEM_BUG_ON(intel_context_is_barrier(from->context));
+       int ret;
 
-       /* Submit both requests at the same time */
-       err = __await_execution(to, from, hook, I915_FENCE_GFP);
-       if (err)
-               return err;
+       GEM_BUG_ON(to == from);
+       GEM_BUG_ON(to->timeline == from->timeline);
 
-       /* Squash repeated depenendices to the same timelines */
-       if (intel_timeline_sync_has_start(i915_request_timeline(to),
-                                         &from->fence))
+       if (i915_request_completed(from)) {
+               i915_sw_fence_set_error_once(&to->submit, from->fence.error);
                return 0;
-
-       /*
-        * Wait until the start of this request.
-        *
-        * The execution cb fires when we submit the request to HW. But in
-        * many cases this may be long before the request itself is ready to
-        * run (consider that we submit 2 requests for the same context, where
-        * the request of interest is behind an indefinite spinner). So we hook
-        * up to both to reduce our queues and keep the execution lag minimised
-        * in the worst case, though we hope that the await_start is elided.
-        */
-       err = i915_request_await_start(to, from);
-       if (err < 0)
-               return err;
-
-       /*
-        * Ensure both start together [after all semaphores in signal]
-        *
-        * Now that we are queued to the HW at roughly the same time (thanks
-        * to the execute cb) and are ready to run at roughly the same time
-        * (thanks to the await start), our signaler may still be indefinitely
-        * delayed by waiting on a semaphore from a remote engine. If our
-        * signaler depends on a semaphore, so indirectly do we, and we do not
-        * want to start our payload until our signaler also starts theirs.
-        * So we wait.
-        *
-        * However, there is also a second condition for which we need to wait
-        * for the precise start of the signaler. Consider that the signaler
-        * was submitted in a chain of requests following another context
-        * (with just an ordinary intra-engine fence dependency between the
-        * two). In this case the signaler is queued to HW, but not for
-        * immediate execution, and so we must wait until it reaches the
-        * active slot.
-        */
-       if (intel_engine_has_semaphores(to->engine) &&
-           !i915_request_has_initial_breadcrumb(to)) {
-               err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
-               if (err < 0)
-                       return err;
        }
 
-       /* Couple the dependency tree for PI on this exposed to->fence */
        if (to->engine->schedule) {
-               err = i915_sched_node_add_dependency(&to->sched,
+               ret = i915_sched_node_add_dependency(&to->sched,
                                                     &from->sched,
-                                                    I915_DEPENDENCY_WEAK);
-               if (err < 0)
-                       return err;
+                                                    I915_DEPENDENCY_EXTERNAL);
+               if (ret < 0)
+                       return ret;
        }
 
-       return intel_timeline_sync_set_start(i915_request_timeline(to),
-                                            &from->fence);
+       if (to->engine == READ_ONCE(from->engine))
+               ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
+                                                      &from->submit,
+                                                      I915_FENCE_GFP);
+       else
+               ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
+       if (ret < 0)
+               return ret;
+
+       return 0;
 }
 
 int
-i915_request_await_execution(struct i915_request *rq,
-                            struct dma_fence *fence,
-                            void (*hook)(struct i915_request *rq,
-                                         struct dma_fence *signal))
+i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
 {
        struct dma_fence **child = &fence;
        unsigned int nchild = 1;
        int ret;
 
+       /*
+        * Note that if the fence-array was created in signal-on-any mode,
+        * we should *not* decompose it into its individual fences. However,
+        * we don't currently store which mode the fence-array is operating
+        * in. Fortunately, the only user of signal-on-any is private to
+        * amdgpu and we should not see any incoming fence-array from
+        * sync-file being in signal-on-any mode.
+        */
        if (dma_fence_is_array(fence)) {
                struct dma_fence_array *array = to_dma_fence_array(fence);
 
-               /* XXX Error for signal-on-any fence arrays */
-
                child = array->fences;
                nchild = array->num_fences;
                GEM_BUG_ON(!nchild);
@@ -1309,22 +1300,31 @@ i915_request_await_execution(struct i915_request *rq,
                        continue;
                }
 
+               /*
+                * Requests on the same timeline are explicitly ordered, along
+                * with their dependencies, by i915_request_add() which ensures
+                * that requests are submitted in-order through each ring.
+                */
                if (fence->context == rq->fence.context)
                        continue;
 
-               /*
-                * We don't squash repeated fence dependencies here as we
-                * want to run our callback in all cases.
-                */
+               /* Squash repeated waits to the same timelines */
+               if (fence->context &&
+                   intel_timeline_sync_is_later(i915_request_timeline(rq),
+                                                fence))
+                       continue;
 
                if (dma_fence_is_i915(fence))
-                       ret = __i915_request_await_execution(rq,
-                                                            to_request(fence),
-                                                            hook);
+                       ret = i915_request_await_request(rq, to_request(fence));
                else
                        ret = i915_request_await_external(rq, fence);
                if (ret < 0)
                        return ret;
+
+               /* Record the latest fence used against each timeline */
+               if (fence->context)
+                       intel_timeline_sync_set(i915_request_timeline(rq),
+                                               fence);
        } while (--nchild);
 
        return 0;