drm/i915/gt: Expose busywait duration to sysfs

author Chris Wilson <chris@chris-wilson.co.uk>

Fri, 28 Feb 2020 13:17:13 +0000 (13:17 +0000)

committer Chris Wilson <chris@chris-wilson.co.uk>

Fri, 28 Feb 2020 22:03:41 +0000 (22:03 +0000)
author Chris Wilson <chris@chris-wilson.co.uk>
Fri, 28 Feb 2020 13:17:13 +0000 (13:17 +0000)
committer Chris Wilson <chris@chris-wilson.co.uk>
Fri, 28 Feb 2020 22:03:41 +0000 (22:03 +0000)
diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile

index d8d4a16179bd6a01c7c4714b105a9efe8a13ba9b..9ee3b59685b903a456796708533094bb28b5ad71 100644 (file)
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -35,9 +35,9 @@ config DRM_I915_PREEMPT_TIMEOUT
  
           May be 0 to disable the timeout.
  
-config DRM_I915_SPIN_REQUEST
-       int "Busywait for request completion (us)"
-       default 5 # microseconds
+config DRM_I915_MAX_REQUEST_BUSYWAIT
+       int "Busywait for request completion limit (ns)"
+       default 8000 # nanoseconds
         help
           Before sleeping waiting for a request (GPU operation) to complete,
           we may spend some time polling for its completion. As the IRQ may
@@ -45,6 +45,9 @@ config DRM_I915_SPIN_REQUEST
           check if the request will complete in the time it would have taken
           us to enable the interrupt.
  
+         This is adjustable via
+         /sys/class/drm/card?/engine/*/max_busywait_duration_ns
+
           May be 0 to disable the initial spin. In practice, we estimate
           the cost of enabling the interrupt (if currently disabled) to be
           a few microseconds.
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c

index 119c9cb24fd4f2a0f358f0fb4af557a30b7d7b45..53ac3f00909abd7c3f32da7c5ff5b47e39b26c5b 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -313,6 +313,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
  
         engine->props.heartbeat_interval_ms =
                 CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
+       engine->props.max_busywait_duration_ns =
+               CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT;
         engine->props.preempt_timeout_ms =
                 CONFIG_DRM_I915_PREEMPT_TIMEOUT;
         engine->props.stop_timeout_ms =
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h

index b23366a8104800daeec97b26f39f4a3a998fe4b9..80cdde7128420c21e2b5cbfb9f363d421ac997d5 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -547,6 +547,7 @@ struct intel_engine_cs {
  
         struct {
                 unsigned long heartbeat_interval_ms;
+               unsigned long max_busywait_duration_ns;
                 unsigned long preempt_timeout_ms;
                 unsigned long stop_timeout_ms;
                 unsigned long timeslice_duration_ms;
diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c

index 53402edbf98a5eb110b0909f8ac5908d8fe508bb..619c43d1172d8390518c8319933f8c87e482001e 100644 (file)
--- a/drivers/gpu/drm/i915/gt/sysfs_engines.c
+++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c
@@ -142,6 +142,54 @@ all_caps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
  static struct kobj_attribute all_caps_attr =
  __ATTR(known_capabilities, 0444, all_caps_show, NULL);
  
+static ssize_t
+max_spin_store(struct kobject *kobj, struct kobj_attribute *attr,
+              const char *buf, size_t count)
+{
+       struct intel_engine_cs *engine = kobj_to_engine(kobj);
+       unsigned long long duration;
+       int err;
+
+       /*
+        * When waiting for a request, if is it currently being executed
+        * on the GPU, we busywait for a short while before sleeping. The
+        * premise is that most requests are short, and if it is already
+        * executing then there is a good chance that it will complete
+        * before we can setup the interrupt handler and go to sleep.
+        * We try to offset the cost of going to sleep, by first spinning
+        * on the request -- if it completed in less time than it would take
+        * to go sleep, process the interrupt and return back to the client,
+        * then we have saved the client some latency, albeit at the cost
+        * of spinning on an expensive CPU core.
+        *
+        * While we try to avoid waiting at all for a request that is unlikely
+        * to complete, deciding how long it is worth spinning is for is an
+        * arbitrary decision: trading off power vs latency.
+        */
+
+       err = kstrtoull(buf, 0, &duration);
+       if (err)
+               return err;
+
+       if (duration > jiffies_to_nsecs(2))
+               return -EINVAL;
+
+       WRITE_ONCE(engine->props.max_busywait_duration_ns, duration);
+
+       return count;
+}
+
+static ssize_t
+max_spin_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+       struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+       return sprintf(buf, "%lu\n", engine->props.max_busywait_duration_ns);
+}
+
+static struct kobj_attribute max_spin_attr =
+__ATTR(max_busywait_duration_ns, 0644, max_spin_show, max_spin_store);
+
  static ssize_t
  timeslice_store(struct kobject *kobj, struct kobj_attribute *attr,
                 const char *buf, size_t count)
@@ -224,6 +272,7 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915)
                 &mmio_attr.attr,
                 &caps_attr.attr,
                 &all_caps_attr.attr,
+               &max_spin_attr.attr,
                 NULL
         };
  
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c

index e5a55801f75316a895505b9cbe6f64daf3966791..feccf29381aa17425c3b0daac1e37beb68357306 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1404,7 +1404,7 @@ void i915_request_add(struct i915_request *rq)
         mutex_unlock(&tl->mutex);
  }
  
-static unsigned long local_clock_us(unsigned int *cpu)
+static unsigned long local_clock_ns(unsigned int *cpu)
  {
         unsigned long t;
  
@@ -1421,7 +1421,7 @@ static unsigned long local_clock_us(unsigned int *cpu)
          * stop busywaiting, see busywait_stop().
          */
         *cpu = get_cpu();
-       t = local_clock() >> 10;
+       t = local_clock();
         put_cpu();
  
         return t;
@@ -1431,15 +1431,15 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
  {
         unsigned int this_cpu;
  
-       if (time_after(local_clock_us(&this_cpu), timeout))
+       if (time_after(local_clock_ns(&this_cpu), timeout))
                 return true;
  
         return this_cpu != cpu;
  }
  
-static bool __i915_spin_request(const struct i915_request * const rq,
-                               int state, unsigned long timeout_us)
+static bool __i915_spin_request(const struct i915_request * const rq, int state)
  {
+       unsigned long timeout_ns;
         unsigned int cpu;
  
         /*
@@ -1467,7 +1467,8 @@ static bool __i915_spin_request(const struct i915_request * const rq,
          * takes to sleep on a request, on the order of a microsecond.
          */
  
-       timeout_us += local_clock_us(&cpu);
+       timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns);
+       timeout_ns += local_clock_ns(&cpu);
         do {
                 if (i915_request_completed(rq))
                         return true;
@@ -1475,7 +1476,7 @@ static bool __i915_spin_request(const struct i915_request * const rq,
                 if (signal_pending_state(state, current))
                         break;
  
-               if (busywait_stop(timeout_us, cpu))
+               if (busywait_stop(timeout_ns, cpu))
                         break;
  
                 cpu_relax();
@@ -1561,8 +1562,8 @@ long i915_request_wait(struct i915_request *rq,
          * completion. That requires having a good predictor for the request
          * duration, which we currently lack.
          */
-       if (IS_ACTIVE(CONFIG_DRM_I915_SPIN_REQUEST) &&
-           __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) {
+       if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) &&
+           __i915_spin_request(rq, state)) {
                 dma_fence_signal(&rq->fence);
                 goto out;
         }
author	Chris Wilson <chris@chris-wilson.co.uk>
	Fri, 28 Feb 2020 13:17:13 +0000 (13:17 +0000)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Fri, 28 Feb 2020 22:03:41 +0000 (22:03 +0000)
drivers/gpu/drm/i915/Kconfig.profile		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_engine_cs.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_engine_types.h		patch \| blob \| history
drivers/gpu/drm/i915/gt/sysfs_engines.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_request.c		patch \| blob \| history