drm/i915: Reorder await_execution before await_request

author Chris Wilson <chris@chris-wilson.co.uk>

Tue, 26 May 2020 09:07:52 +0000 (10:07 +0100)

committer Chris Wilson <chris@chris-wilson.co.uk>

Wed, 27 May 2020 08:07:45 +0000 (09:07 +0100)
author Chris Wilson <chris@chris-wilson.co.uk>
Tue, 26 May 2020 09:07:52 +0000 (10:07 +0100)
committer Chris Wilson <chris@chris-wilson.co.uk>
Wed, 27 May 2020 08:07:45 +0000 (09:07 +0100)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c

index 877eaac..e64d82f 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1058,37 +1058,91 @@ await_fence:
                                              I915_FENCE_GFP);
  }
  
+static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
+                                         struct dma_fence *fence)
+{
+       return __intel_timeline_sync_is_later(tl,
+                                             fence->context,
+                                             fence->seqno - 1);
+}
+
+static int intel_timeline_sync_set_start(struct intel_timeline *tl,
+                                        const struct dma_fence *fence)
+{
+       return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
+}
+
  static int
-i915_request_await_request(struct i915_request *to, struct i915_request *from)
+__i915_request_await_execution(struct i915_request *to,
+                              struct i915_request *from,
+                              void (*hook)(struct i915_request *rq,
+                                           struct dma_fence *signal))
  {
-       int ret;
+       int err;
  
-       GEM_BUG_ON(to == from);
-       GEM_BUG_ON(to->timeline == from->timeline);
+       GEM_BUG_ON(intel_context_is_barrier(from->context));
  
-       if (i915_request_completed(from)) {
-               i915_sw_fence_set_error_once(&to->submit, from->fence.error);
+       /* Submit both requests at the same time */
+       err = __await_execution(to, from, hook, I915_FENCE_GFP);
+       if (err)
+               return err;
+
+       /* Squash repeated depenendices to the same timelines */
+       if (intel_timeline_sync_has_start(i915_request_timeline(to),
+                                         &from->fence))
                 return 0;
+
+       /*
+        * Wait until the start of this request.
+        *
+        * The execution cb fires when we submit the request to HW. But in
+        * many cases this may be long before the request itself is ready to
+        * run (consider that we submit 2 requests for the same context, where
+        * the request of interest is behind an indefinite spinner). So we hook
+        * up to both to reduce our queues and keep the execution lag minimised
+        * in the worst case, though we hope that the await_start is elided.
+        */
+       err = i915_request_await_start(to, from);
+       if (err < 0)
+               return err;
+
+       /*
+        * Ensure both start together [after all semaphores in signal]
+        *
+        * Now that we are queued to the HW at roughly the same time (thanks
+        * to the execute cb) and are ready to run at roughly the same time
+        * (thanks to the await start), our signaler may still be indefinitely
+        * delayed by waiting on a semaphore from a remote engine. If our
+        * signaler depends on a semaphore, so indirectly do we, and we do not
+        * want to start our payload until our signaler also starts theirs.
+        * So we wait.
+        *
+        * However, there is also a second condition for which we need to wait
+        * for the precise start of the signaler. Consider that the signaler
+        * was submitted in a chain of requests following another context
+        * (with just an ordinary intra-engine fence dependency between the
+        * two). In this case the signaler is queued to HW, but not for
+        * immediate execution, and so we must wait until it reaches the
+        * active slot.
+        */
+       if (intel_engine_has_semaphores(to->engine) &&
+           !i915_request_has_initial_breadcrumb(to)) {
+               err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
+               if (err < 0)
+                       return err;
         }
  
+       /* Couple the dependency tree for PI on this exposed to->fence */
         if (to->engine->schedule) {
-               ret = i915_sched_node_add_dependency(&to->sched,
+               err = i915_sched_node_add_dependency(&to->sched,
                                                      &from->sched,
-                                                    I915_DEPENDENCY_EXTERNAL);
-               if (ret < 0)
-                       return ret;
+                                                    I915_DEPENDENCY_WEAK);
+               if (err < 0)
+                       return err;
         }
  
-       if (to->engine == from->engine)
-               ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
-                                                      &from->submit,
-                                                      I915_FENCE_GFP);
-       else
-               ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
-       if (ret < 0)
-               return ret;
-
-       return 0;
+       return intel_timeline_sync_set_start(i915_request_timeline(to),
+                                            &from->fence);
  }
  
  static void mark_external(struct i915_request *rq)
@@ -1141,23 +1195,20 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
  }
  
  int
-i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
+i915_request_await_execution(struct i915_request *rq,
+                            struct dma_fence *fence,
+                            void (*hook)(struct i915_request *rq,
+                                         struct dma_fence *signal))
  {
         struct dma_fence **child = &fence;
         unsigned int nchild = 1;
         int ret;
  
-       /*
-        * Note that if the fence-array was created in signal-on-any mode,
-        * we should *not* decompose it into its individual fences. However,
-        * we don't currently store which mode the fence-array is operating
-        * in. Fortunately, the only user of signal-on-any is private to
-        * amdgpu and we should not see any incoming fence-array from
-        * sync-file being in signal-on-any mode.
-        */
         if (dma_fence_is_array(fence)) {
                 struct dma_fence_array *array = to_dma_fence_array(fence);
  
+               /* XXX Error for signal-on-any fence arrays */
+
                 child = array->fences;
                 nchild = array->num_fences;
                 GEM_BUG_ON(!nchild);
@@ -1170,138 +1221,78 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
                         continue;
                 }
  
-               /*
-                * Requests on the same timeline are explicitly ordered, along
-                * with their dependencies, by i915_request_add() which ensures
-                * that requests are submitted in-order through each ring.
-                */
                 if (fence->context == rq->fence.context)
                         continue;
  
-               /* Squash repeated waits to the same timelines */
-               if (fence->context &&
-                   intel_timeline_sync_is_later(i915_request_timeline(rq),
-                                                fence))
-                       continue;
+               /*
+                * We don't squash repeated fence dependencies here as we
+                * want to run our callback in all cases.
+                */
  
                 if (dma_fence_is_i915(fence))
-                       ret = i915_request_await_request(rq, to_request(fence));
+                       ret = __i915_request_await_execution(rq,
+                                                            to_request(fence),
+                                                            hook);
                 else
                         ret = i915_request_await_external(rq, fence);
                 if (ret < 0)
                         return ret;
-
-               /* Record the latest fence used against each timeline */
-               if (fence->context)
-                       intel_timeline_sync_set(i915_request_timeline(rq),
-                                               fence);
         } while (--nchild);
  
         return 0;
  }
  
-static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
-                                         struct dma_fence *fence)
-{
-       return __intel_timeline_sync_is_later(tl,
-                                             fence->context,
-                                             fence->seqno - 1);
-}
-
-static int intel_timeline_sync_set_start(struct intel_timeline *tl,
-                                        const struct dma_fence *fence)
-{
-       return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
-}
-
  static int
-__i915_request_await_execution(struct i915_request *to,
-                              struct i915_request *from,
-                              void (*hook)(struct i915_request *rq,
-                                           struct dma_fence *signal))
+i915_request_await_request(struct i915_request *to, struct i915_request *from)
  {
-       int err;
-
-       GEM_BUG_ON(intel_context_is_barrier(from->context));
+       int ret;
  
-       /* Submit both requests at the same time */
-       err = __await_execution(to, from, hook, I915_FENCE_GFP);
-       if (err)
-               return err;
+       GEM_BUG_ON(to == from);
+       GEM_BUG_ON(to->timeline == from->timeline);
  
-       /* Squash repeated depenendices to the same timelines */
-       if (intel_timeline_sync_has_start(i915_request_timeline(to),
-                                         &from->fence))
+       if (i915_request_completed(from)) {
+               i915_sw_fence_set_error_once(&to->submit, from->fence.error);
                 return 0;
-
-       /*
-        * Wait until the start of this request.
-        *
-        * The execution cb fires when we submit the request to HW. But in
-        * many cases this may be long before the request itself is ready to
-        * run (consider that we submit 2 requests for the same context, where
-        * the request of interest is behind an indefinite spinner). So we hook
-        * up to both to reduce our queues and keep the execution lag minimised
-        * in the worst case, though we hope that the await_start is elided.
-        */
-       err = i915_request_await_start(to, from);
-       if (err < 0)
-               return err;
-
-       /*
-        * Ensure both start together [after all semaphores in signal]
-        *
-        * Now that we are queued to the HW at roughly the same time (thanks
-        * to the execute cb) and are ready to run at roughly the same time
-        * (thanks to the await start), our signaler may still be indefinitely
-        * delayed by waiting on a semaphore from a remote engine. If our
-        * signaler depends on a semaphore, so indirectly do we, and we do not
-        * want to start our payload until our signaler also starts theirs.
-        * So we wait.
-        *
-        * However, there is also a second condition for which we need to wait
-        * for the precise start of the signaler. Consider that the signaler
-        * was submitted in a chain of requests following another context
-        * (with just an ordinary intra-engine fence dependency between the
-        * two). In this case the signaler is queued to HW, but not for
-        * immediate execution, and so we must wait until it reaches the
-        * active slot.
-        */
-       if (intel_engine_has_semaphores(to->engine) &&
-           !i915_request_has_initial_breadcrumb(to)) {
-               err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
-               if (err < 0)
-                       return err;
         }
  
-       /* Couple the dependency tree for PI on this exposed to->fence */
         if (to->engine->schedule) {
-               err = i915_sched_node_add_dependency(&to->sched,
+               ret = i915_sched_node_add_dependency(&to->sched,
                                                      &from->sched,
-                                                    I915_DEPENDENCY_WEAK);
-               if (err < 0)
-                       return err;
+                                                    I915_DEPENDENCY_EXTERNAL);
+               if (ret < 0)
+                       return ret;
         }
  
-       return intel_timeline_sync_set_start(i915_request_timeline(to),
-                                            &from->fence);
+       if (to->engine == READ_ONCE(from->engine))
+               ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
+                                                      &from->submit,
+                                                      I915_FENCE_GFP);
+       else
+               ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
+       if (ret < 0)
+               return ret;
+
+       return 0;
  }
  
  int
-i915_request_await_execution(struct i915_request *rq,
-                            struct dma_fence *fence,
-                            void (*hook)(struct i915_request *rq,
-                                         struct dma_fence *signal))
+i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
  {
         struct dma_fence **child = &fence;
         unsigned int nchild = 1;
         int ret;
  
+       /*
+        * Note that if the fence-array was created in signal-on-any mode,
+        * we should *not* decompose it into its individual fences. However,
+        * we don't currently store which mode the fence-array is operating
+        * in. Fortunately, the only user of signal-on-any is private to
+        * amdgpu and we should not see any incoming fence-array from
+        * sync-file being in signal-on-any mode.
+        */
         if (dma_fence_is_array(fence)) {
                 struct dma_fence_array *array = to_dma_fence_array(fence);
  
-               /* XXX Error for signal-on-any fence arrays */
-
                 child = array->fences;
                 nchild = array->num_fences;
                 GEM_BUG_ON(!nchild);
@@ -1314,22 +1305,31 @@ i915_request_await_execution(struct i915_request *rq,
                         continue;
                 }
  
+               /*
+                * Requests on the same timeline are explicitly ordered, along
+                * with their dependencies, by i915_request_add() which ensures
+                * that requests are submitted in-order through each ring.
+                */
                 if (fence->context == rq->fence.context)
                         continue;
  
-               /*
-                * We don't squash repeated fence dependencies here as we
-                * want to run our callback in all cases.
-                */
+               /* Squash repeated waits to the same timelines */
+               if (fence->context &&
+                   intel_timeline_sync_is_later(i915_request_timeline(rq),
+                                                fence))
+                       continue;
  
                 if (dma_fence_is_i915(fence))
-                       ret = __i915_request_await_execution(rq,
-                                                            to_request(fence),
-                                                            hook);
+                       ret = i915_request_await_request(rq, to_request(fence));
                 else
                         ret = i915_request_await_external(rq, fence);
                 if (ret < 0)
                         return ret;
+
+               /* Record the latest fence used against each timeline */
+               if (fence->context)
+                       intel_timeline_sync_set(i915_request_timeline(rq),
+                                               fence);
         } while (--nchild);
  
         return 0;
author	Chris Wilson <chris@chris-wilson.co.uk>
	Tue, 26 May 2020 09:07:52 +0000 (10:07 +0100)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Wed, 27 May 2020 08:07:45 +0000 (09:07 +0100)