drm/i915: Improve the start alignment of bonded pairs

author Chris Wilson <chris@chris-wilson.co.uk>

Fri, 6 Mar 2020 13:38:38 +0000 (13:38 +0000)

committer Chris Wilson <chris@chris-wilson.co.uk>

Tue, 10 Mar 2020 11:13:33 +0000 (11:13 +0000)
author Chris Wilson <chris@chris-wilson.co.uk>
Fri, 6 Mar 2020 13:38:38 +0000 (13:38 +0000)
committer Chris Wilson <chris@chris-wilson.co.uk>
Tue, 10 Mar 2020 11:13:33 +0000 (11:13 +0000)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c

index 04b52bf347bf0f2152bbcee535bf753d9d1afe40..fa65ec9e16f1838249273f9123d46539126d8c61 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1128,14 +1128,45 @@ __i915_request_await_execution(struct i915_request *to,
                                           &from->fence))
                 return 0;
  
-       /* Ensure both start together [after all semaphores in signal] */
-       if (intel_engine_has_semaphores(to->engine))
-               err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
-       else
-               err = i915_request_await_start(to, from);
+       /*
+        * Wait until the start of this request.
+        *
+        * The execution cb fires when we submit the request to HW. But in
+        * many cases this may be long before the request itself is ready to
+        * run (consider that we submit 2 requests for the same context, where
+        * the request of interest is behind an indefinite spinner). So we hook
+        * up to both to reduce our queues and keep the execution lag minimised
+        * in the worst case, though we hope that the await_start is elided.
+        */
+       err = i915_request_await_start(to, from);
         if (err < 0)
                 return err;
  
+       /*
+        * Ensure both start together [after all semaphores in signal]
+        *
+        * Now that we are queued to the HW at roughly the same time (thanks
+        * to the execute cb) and are ready to run at roughly the same time
+        * (thanks to the await start), our signaler may still be indefinitely
+        * delayed by waiting on a semaphore from a remote engine. If our
+        * signaler depends on a semaphore, so indirectly do we, and we do not
+        * want to start our payload until our signaler also starts theirs.
+        * So we wait.
+        *
+        * However, there is also a second condition for which we need to wait
+        * for the precise start of the signaler. Consider that the signaler
+        * was submitted in a chain of requests following another context
+        * (with just an ordinary intra-engine fence dependency between the
+        * two). In this case the signaler is queued to HW, but not for
+        * immediate execution, and so we must wait until it reaches the
+        * active slot.
+        */
+       if (intel_engine_has_semaphores(to->engine)) {
+               err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
+               if (err < 0)
+                       return err;
+       }
+
         /* Couple the dependency tree for PI on this exposed to->fence */
         if (to->engine->schedule) {
                 err = i915_sched_node_add_dependency(&to->sched, &from->sched);
author	Chris Wilson <chris@chris-wilson.co.uk>
	Fri, 6 Mar 2020 13:38:38 +0000 (13:38 +0000)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Tue, 10 Mar 2020 11:13:33 +0000 (11:13 +0000)