drm/i915/gt: Track engine round-trip times

author Chris Wilson <chris@chris-wilson.co.uk>

Thu, 19 Dec 2019 12:43:53 +0000 (12:43 +0000)

committer Chris Wilson <chris@chris-wilson.co.uk>

Thu, 19 Dec 2019 17:03:57 +0000 (17:03 +0000)
author Chris Wilson <chris@chris-wilson.co.uk>
Thu, 19 Dec 2019 12:43:53 +0000 (12:43 +0000)
committer Chris Wilson <chris@chris-wilson.co.uk>
Thu, 19 Dec 2019 17:03:57 +0000 (17:03 +0000)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c

index 3d1d48bf90cf5901a698ccf1f8aa1f2652ab615c..6dd18f93d45c867c5f7bb370e8a8db55bf3f2646 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -334,6 +334,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
         /* Nothing to do here, execute in order of dependencies */
         engine->schedule = NULL;
  
+       ewma__engine_latency_init(&engine->latency);
         seqlock_init(&engine->stats.lock);
  
         ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
@@ -1481,6 +1482,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
         drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
         drm_printf(m, "\tBarriers?: %s\n",
                    yesno(!llist_empty(&engine->barrier_tasks)));
+       drm_printf(m, "\tLatency: %luus\n",
+                  ewma__engine_latency_read(&engine->latency));
  
         rcu_read_lock();
         rq = READ_ONCE(engine->heartbeat.systole);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c

index bcbda8e52d41f2727ac2c5c52a327b57c77b3158..8fb7b34fc5a69e1f00b789474029a431c595a6cc 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -73,6 +73,15 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
  
  #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
  
+static void duration(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+       struct i915_request *rq = to_request(fence);
+
+       ewma__engine_latency_add(&rq->engine->latency,
+                                ktime_us_delta(rq->fence.timestamp,
+                                               rq->duration.emitted));
+}
+
  static void
  __queue_and_release_pm(struct i915_request *rq,
                        struct intel_timeline *tl,
@@ -163,7 +172,18 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
  
         /* Install ourselves as a preemption barrier */
         rq->sched.attr.priority = I915_PRIORITY_BARRIER;
-       __i915_request_commit(rq);
+       if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */
+               /*
+                * Use an interrupt for precise measurement of duration,
+                * otherwise we rely on someone else retiring all the requests
+                * which may delay the signaling (i.e. we will likely wait
+                * until the background request retirement running every
+                * second or two).
+                */
+               BUILD_BUG_ON(sizeof(rq->duration) > sizeof(rq->submitq));
+               dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration);
+               rq->duration.emitted = ktime_get();
+       }
  
         /* Expose ourselves to the world */
         __queue_and_release_pm(rq, ce->timeline, engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h

index 17f1f1441efc9dba383e51959aa936a4177b8bfa..7f227da09d661105f8c576ed7358b7c73a491136 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -7,6 +7,7 @@
  #ifndef __INTEL_ENGINE_TYPES__
  #define __INTEL_ENGINE_TYPES__
  
+#include <linux/average.h>
  #include <linux/hashtable.h>
  #include <linux/irq_work.h>
  #include <linux/kref.h>
@@ -119,6 +120,9 @@ enum intel_engine_id {
  #define INVALID_ENGINE ((enum intel_engine_id)-1)
  };
  
+/* A simple estimator for the round-trip latency of an engine */
+DECLARE_EWMA(_engine_latency, 6, 4)
+
  struct st_preempt_hang {
         struct completion completion;
         unsigned int count;
@@ -316,6 +320,13 @@ struct intel_engine_cs {
                 struct intel_timeline *timeline;
         } legacy;
  
+       /*
+        * We track the average duration of the idle pulse on parking the
+        * engine to keep an estimate of the how the fast the engine is
+        * under ideal conditions.
+        */
+       struct ewma__engine_latency latency;
+
         /* Rather than have every client wait upon all user interrupts,
          * with the herd waking after every interrupt and each doing the
          * heavyweight seqno dance, we delegate the task (of being the
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h

index aa38290eea3d416e080b2df5be6c94565df8b193..c18c0bcd0193c25d8989a572d6274ac9e5669341 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -150,6 +150,10 @@ struct i915_request {
         union {
                 wait_queue_entry_t submitq;
                 struct i915_sw_dma_fence_cb dmaq;
+               struct i915_request_duration_cb {
+                       struct dma_fence_cb cb;
+                       ktime_t emitted;
+               } duration;
         };
         struct list_head execute_cb;
         struct i915_sw_fence semaphore;
author	Chris Wilson <chris@chris-wilson.co.uk>
	Thu, 19 Dec 2019 12:43:53 +0000 (12:43 +0000)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Thu, 19 Dec 2019 17:03:57 +0000 (17:03 +0000)
drivers/gpu/drm/i915/gt/intel_engine_cs.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_engine_pm.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_engine_types.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_request.h		patch \| blob \| history