drm/i915: Drop no-semaphore boosting

author Chris Wilson <chris@chris-wilson.co.uk>

Wed, 13 May 2020 17:35:04 +0000 (18:35 +0100)

committer Chris Wilson <chris@chris-wilson.co.uk>

Thu, 14 May 2020 05:14:33 +0000 (06:14 +0100)
author Chris Wilson <chris@chris-wilson.co.uk>
Wed, 13 May 2020 17:35:04 +0000 (18:35 +0100)
committer Chris Wilson <chris@chris-wilson.co.uk>
Thu, 14 May 2020 05:14:33 +0000 (06:14 +0100)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c

index 3c98aaaa8d118eccd7de05fe458509be829c25dd..c0d59d48e1986aee468d76cee45e78340cf5ae0c 100644 (file)
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2582,21 +2582,6 @@ static void eb_request_add(struct i915_execbuffer *eb)
         /* Check that the context wasn't destroyed before submission */
         if (likely(!intel_context_is_closed(eb->context))) {
                 attr = eb->gem_context->sched;
-
-               /*
-                * Boost actual workloads past semaphores!
-                *
-                * With semaphores we spin on one engine waiting for another,
-                * simply to reduce the latency of starting our work when
-                * the signaler completes. However, if there is any other
-                * work that we could be doing on this engine instead, that
-                * is better utilisation and will reduce the overall duration
-                * of the current work. To avoid PI boosting a semaphore
-                * far in the distance past over useful work, we keep a history
-                * of any semaphore use along our dependency chain.
-                */
-               if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
-                       attr.priority |= I915_PRIORITY_NOSEMAPHORE;
         } else {
                 /* Serialise with context_close via the add_to_timeline */
                 i915_request_set_error_once(rq, -ENOENT);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c

index 51470e96cdd49ef58a7f0434292687ff487b4d5f..32fdb7cb3ae8eb473b612a9ce713480ef85f095c 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -429,15 +429,6 @@ static int effective_prio(const struct i915_request *rq)
         if (i915_request_has_nopreempt(rq))
                 prio = I915_PRIORITY_UNPREEMPTABLE;
  
-       /*
-        * On unwinding the active request, we give it a priority bump
-        * if it has completed waiting on any semaphore. If we know that
-        * the request has already started, we can prevent an unwanted
-        * preempt-to-idle cycle by taking that into account now.
-        */
-       if (__i915_request_has_started(rq))
-               prio |= I915_PRIORITY_NOSEMAPHORE;
-
         return prio;
  }
  
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c

index a56dff3b157a47f9e8f30e0778ad57555e4fcd31..52af1cee9a94eb4845766538d2598537d9dd7120 100644 (file)
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -24,6 +24,7 @@ static int request_sync(struct i915_request *rq)
  
         /* Opencode i915_request_add() so we can keep the timeline locked. */
         __i915_request_commit(rq);
+       rq->sched.attr.priority = I915_PRIORITY_BARRIER;
         __i915_request_queue(rq, NULL);
  
         timeout = i915_request_wait(rq, 0, HZ / 10);
diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h

index e18723d8df86b61d3c48a4a03e5871e2db469c07..5003a71113cbe9b6cd81b5bb6e24fd90b5b71ec5 100644 (file)
--- a/drivers/gpu/drm/i915/i915_priolist_types.h
+++ b/drivers/gpu/drm/i915/i915_priolist_types.h
@@ -24,14 +24,12 @@ enum {
         I915_PRIORITY_DISPLAY,
  };
  
-#define I915_USER_PRIORITY_SHIFT 1
+#define I915_USER_PRIORITY_SHIFT 0
  #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
  
  #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
  #define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1)
  
-#define I915_PRIORITY_NOSEMAPHORE      ((u8)BIT(0))
-
  /* Smallest priority value that cannot be bumped. */
  #define I915_PRIORITY_INVALID (INT_MIN | (u8)I915_PRIORITY_MASK)
  
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c

index 00b7c4eb3f325ef7256e26377788b323ef7db13c..526c1e9acbd587117fcf0aa18ce81975d6076187 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -368,8 +368,6 @@ __await_execution(struct i915_request *rq,
         }
         spin_unlock_irq(&signal->lock);
  
-       /* Copy across semaphore status as we need the same behaviour */
-       rq->sched.flags |= signal->sched.flags;
         return 0;
  }
  
@@ -537,10 +535,8 @@ void __i915_request_unsubmit(struct i915_request *request)
         spin_unlock(&request->lock);
  
         /* We've already spun, don't charge on resubmitting. */
-       if (request->sched.semaphores && i915_request_started(request)) {
-               request->sched.attr.priority |= I915_PRIORITY_NOSEMAPHORE;
+       if (request->sched.semaphores && i915_request_started(request))
                 request->sched.semaphores = 0;
-       }
  
         /*
          * We don't need to wake_up any waiters on request->execute, they
@@ -598,15 +594,6 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
         return NOTIFY_DONE;
  }
  
-static void irq_semaphore_cb(struct irq_work *wrk)
-{
-       struct i915_request *rq =
-               container_of(wrk, typeof(*rq), semaphore_work);
-
-       i915_schedule_bump_priority(rq, I915_PRIORITY_NOSEMAPHORE);
-       i915_request_put(rq);
-}
-
  static int __i915_sw_fence_call
  semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
  {
@@ -614,11 +601,6 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
  
         switch (state) {
         case FENCE_COMPLETE:
-               if (!(READ_ONCE(rq->sched.attr.priority) & I915_PRIORITY_NOSEMAPHORE)) {
-                       i915_request_get(rq);
-                       init_irq_work(&rq->semaphore_work, irq_semaphore_cb);
-                       irq_work_queue(&rq->semaphore_work);
-               }
                 break;
  
         case FENCE_FREE:
@@ -997,6 +979,7 @@ emit_semaphore_wait(struct i915_request *to,
                     gfp_t gfp)
  {
         const intel_engine_mask_t mask = READ_ONCE(from->engine)->mask;
+       struct i915_sw_fence *wait = &to->submit;
  
         if (!intel_context_use_semaphores(to->context))
                 goto await_fence;
@@ -1031,11 +1014,10 @@ emit_semaphore_wait(struct i915_request *to,
                 goto await_fence;
  
         to->sched.semaphores |= mask;
-       to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
-       return 0;
+       wait = &to->semaphore;
  
  await_fence:
-       return i915_sw_fence_await_dma_fence(&to->submit,
+       return i915_sw_fence_await_dma_fence(wait,
                                              &from->fence, 0,
                                              I915_FENCE_GFP);
  }
@@ -1070,17 +1052,6 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
         if (ret < 0)
                 return ret;
  
-       if (to->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN) {
-               ret = i915_sw_fence_await_dma_fence(&to->semaphore,
-                                                   &from->fence, 0,
-                                                   I915_FENCE_GFP);
-               if (ret < 0)
-                       return ret;
-       }
-
-       if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN)
-               to->sched.flags |= I915_SCHED_HAS_EXTERNAL_CHAIN;
-
         return 0;
  }
  
@@ -1528,9 +1499,6 @@ void i915_request_add(struct i915_request *rq)
                 attr = ctx->sched;
         rcu_read_unlock();
  
-       if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
-               attr.priority |= I915_PRIORITY_NOSEMAPHORE;
-
         __i915_request_queue(rq, &attr);
  
         mutex_unlock(&tl->mutex);
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h

index 98ae2dc82371a2dc9f7d6c500e1668eaae551e2c..8ec7ee4dbadc9ba4f32301eedb363715ec3ae32b 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -216,7 +216,6 @@ struct i915_request {
         };
         struct list_head execute_cb;
         struct i915_sw_fence semaphore;
-       struct irq_work semaphore_work;
  
         /*
          * A list of everyone we wait upon, and everyone who waits upon us.
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c

index bec2a9c254257f4dd6441997729d45918327e77b..f4ea318781f0f541e87c379350021719fb31e14d 100644 (file)
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -51,11 +51,11 @@ static void assert_priolists(struct intel_engine_execlists * const execlists)
         GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
                    rb_first(&execlists->queue.rb_root));
  
-       last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1;
+       last_prio = INT_MAX;
         for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
                 const struct i915_priolist *p = to_priolist(rb);
  
-               GEM_BUG_ON(p->priority >= last_prio);
+               GEM_BUG_ON(p->priority > last_prio);
                 last_prio = p->priority;
  
                 GEM_BUG_ON(!p->used);
@@ -434,15 +434,12 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
                 dep->waiter = node;
                 dep->flags = flags;
  
-               /* Keep track of whether anyone on this chain has a semaphore */
-               if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN &&
-                   !node_started(signal))
-                       node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
-
                 /* All set, now publish. Beware the lockless walkers. */
                 list_add_rcu(&dep->signal_link, &node->signalers_list);
                 list_add_rcu(&dep->wait_link, &signal->waiters_list);
  
+               /* Propagate the chains */
+               node->flags |= signal->flags;
                 ret = true;
         }
  
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h

index 6ab2c5289bed3d4c60455b0f708caddec15cffae..f72e6c397b088889ae8072a35c5236a0d58427e1 100644 (file)
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -65,8 +65,7 @@ struct i915_sched_node {
         struct list_head link;
         struct i915_sched_attr attr;
         unsigned int flags;
-#define I915_SCHED_HAS_SEMAPHORE_CHAIN BIT(0)
-#define I915_SCHED_HAS_EXTERNAL_CHAIN  BIT(1)
+#define I915_SCHED_HAS_EXTERNAL_CHAIN  BIT(0)
         intel_engine_mask_t semaphores;
  };
author	Chris Wilson <chris@chris-wilson.co.uk>
	Wed, 13 May 2020 17:35:04 +0000 (18:35 +0100)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Thu, 14 May 2020 05:14:33 +0000 (06:14 +0100)
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_lrc.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/selftest_context.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_priolist_types.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_request.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_request.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_scheduler.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_scheduler_types.h		patch \| blob \| history