drm/i915: Pin engine before pinning all objects, v5.

author Maarten Lankhorst <maarten.lankhorst@linux.intel.com>

Wed, 19 Aug 2020 14:08:52 +0000 (16:08 +0200)

committer Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Mon, 7 Sep 2020 11:30:52 +0000 (14:30 +0300)
author Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Wed, 19 Aug 2020 14:08:52 +0000 (16:08 +0200)
committer Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Mon, 7 Sep 2020 11:30:52 +0000 (14:30 +0300)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c

index 2ccc566f91549966e5bb1e9add19d7f5eba2f243..d3c47390ef530a561c3e47c8260811f9bf9b9e1b 100644 (file)
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -56,7 +56,8 @@ enum {
  #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
  
  #define __EXEC_HAS_RELOC       BIT(31)
-#define __EXEC_INTERNAL_FLAGS  (~0u << 31)
+#define __EXEC_ENGINE_PINNED   BIT(30)
+#define __EXEC_INTERNAL_FLAGS  (~0u << 30)
  #define UPDATE                 PIN_OFFSET_FIXED
  
  #define BATCH_OFFSET_BIAS (256*1024)
@@ -281,6 +282,7 @@ struct i915_execbuffer {
         } reloc_cache;
  
         struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */
+       struct intel_context *reloc_context;
  
         u64 invalid_flags; /** Set of execobj.flags that are invalid */
         u32 context_flags; /** Set of execobj.flags to insert from the ctx */
@@ -303,6 +305,9 @@ struct i915_execbuffer {
  };
  
  static int eb_parse(struct i915_execbuffer *eb);
+static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb,
+                                         bool throttle);
+static void eb_unpin_engine(struct i915_execbuffer *eb);
  
  static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
  {
@@ -935,7 +940,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
         }
  }
  
-static void eb_release_vmas(const struct i915_execbuffer *eb, bool final)
+static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
  {
         const unsigned int count = eb->buffer_count;
         unsigned int i;
@@ -952,6 +957,8 @@ static void eb_release_vmas(const struct i915_execbuffer *eb, bool final)
                 if (final)
                         i915_vma_put(vma);
         }
+
+       eb_unpin_engine(eb);
  }
  
  static void eb_destroy(const struct i915_execbuffer *eb)
@@ -1292,19 +1299,26 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
         if (engine == eb->context->engine) {
                 rq = i915_request_create(eb->context);
         } else {
-               struct intel_context *ce;
+               struct intel_context *ce = eb->reloc_context;
  
-               ce = intel_context_create(engine);
-               if (IS_ERR(ce)) {
-                       err = PTR_ERR(ce);
-                       goto err_unpin;
+               if (!ce) {
+                       ce = intel_context_create(engine);
+                       if (IS_ERR(ce)) {
+                               err = PTR_ERR(ce);
+                               goto err_unpin;
+                       }
+
+                       i915_vm_put(ce->vm);
+                       ce->vm = i915_vm_get(eb->context->vm);
+                       eb->reloc_context = ce;
                 }
  
-               i915_vm_put(ce->vm);
-               ce->vm = i915_vm_get(eb->context->vm);
+               err = intel_context_pin(ce);
+               if (err)
+                       goto err_unpin;
  
-               rq = intel_context_create_request(ce);
-               intel_context_put(ce);
+               rq = i915_request_create(ce);
+               intel_context_unpin(ce);
         }
         if (IS_ERR(rq)) {
                 err = PTR_ERR(rq);
@@ -1871,7 +1885,8 @@ static int eb_prefault_relocations(const struct i915_execbuffer *eb)
         return 0;
  }
  
-static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
+static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
+                                          struct i915_request *rq)
  {
         bool have_copy = false;
         struct eb_vma *ev;
@@ -1887,6 +1902,21 @@ repeat:
         eb_release_vmas(eb, false);
         i915_gem_ww_ctx_fini(&eb->ww);
  
+       if (rq) {
+               /* nonblocking is always false */
+               if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE,
+                                     MAX_SCHEDULE_TIMEOUT) < 0) {
+                       i915_request_put(rq);
+                       rq = NULL;
+
+                       err = -EINTR;
+                       goto err_relock;
+               }
+
+               i915_request_put(rq);
+               rq = NULL;
+       }
+
         /*
          * We take 3 passes through the slowpatch.
          *
@@ -1910,14 +1940,25 @@ repeat:
                 err = 0;
         }
  
-       flush_workqueue(eb->i915->mm.userptr_wq);
+       if (!err)
+               flush_workqueue(eb->i915->mm.userptr_wq);
  
+err_relock:
         i915_gem_ww_ctx_init(&eb->ww, true);
         if (err)
                 goto out;
  
         /* reacquire the objects */
  repeat_validate:
+       rq = eb_pin_engine(eb, false);
+       if (IS_ERR(rq)) {
+               err = PTR_ERR(rq);
+               goto err;
+       }
+
+       /* We didn't throttle, should be NULL */
+       GEM_WARN_ON(rq);
+
         err = eb_validate_vmas(eb);
         if (err)
                 goto err;
@@ -1988,14 +2029,49 @@ out:
                 }
         }
  
+       if (rq)
+               i915_request_put(rq);
+
         return err;
  }
  
  static int eb_relocate_parse(struct i915_execbuffer *eb)
  {
         int err;
+       struct i915_request *rq = NULL;
+       bool throttle = true;
  
  retry:
+       rq = eb_pin_engine(eb, throttle);
+       if (IS_ERR(rq)) {
+               err = PTR_ERR(rq);
+               rq = NULL;
+               if (err != -EDEADLK)
+                       return err;
+
+               goto err;
+       }
+
+       if (rq) {
+               bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
+
+               /* Need to drop all locks now for throttling, take slowpath */
+               err = i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, 0);
+               if (err == -ETIME) {
+                       if (nonblock) {
+                               err = -EWOULDBLOCK;
+                               i915_request_put(rq);
+                               goto err;
+                       }
+                       goto slow;
+               }
+               i915_request_put(rq);
+               rq = NULL;
+       }
+
+       /* only throttle once, even if we didn't need to throttle */
+       throttle = false;
+
         err = eb_validate_vmas(eb);
         if (err == -EAGAIN)
                 goto slow;
@@ -2032,7 +2108,7 @@ err:
         return err;
  
  slow:
-       err = eb_relocate_parse_slow(eb);
+       err = eb_relocate_parse_slow(eb, rq);
         if (err)
                 /*
                  * If the user expects the execobject.offset and
@@ -2486,7 +2562,7 @@ static const enum intel_engine_id user_ring_map[] = {
         [I915_EXEC_VEBOX]       = VECS0
  };
  
-static struct i915_request *eb_throttle(struct intel_context *ce)
+static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce)
  {
         struct intel_ring *ring = ce->ring;
         struct intel_timeline *tl = ce->timeline;
@@ -2520,22 +2596,17 @@ static struct i915_request *eb_throttle(struct intel_context *ce)
         return i915_request_get(rq);
  }
  
-static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
+static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool throttle)
  {
+       struct intel_context *ce = eb->context;
         struct intel_timeline *tl;
-       struct i915_request *rq;
+       struct i915_request *rq = NULL;
         int err;
  
-       /*
-        * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
-        * EIO if the GPU is already wedged.
-        */
-       err = intel_gt_terminally_wedged(ce->engine->gt);
-       if (err)
-               return err;
+       GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED);
  
         if (unlikely(intel_context_is_banned(ce)))
-               return -EIO;
+               return ERR_PTR(-EIO);
  
         /*
          * Pinning the contexts may generate requests in order to acquire
@@ -2544,7 +2615,7 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
          */
         err = intel_context_pin(ce);
         if (err)
-               return err;
+               return ERR_PTR(err);
  
         /*
          * Take a local wakeref for preparing to dispatch the execbuf as
@@ -2556,45 +2627,17 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
          */
         tl = intel_context_timeline_lock(ce);
         if (IS_ERR(tl)) {
-               err = PTR_ERR(tl);
-               goto err_unpin;
+               intel_context_unpin(ce);
+               return ERR_CAST(tl);
         }
  
         intel_context_enter(ce);
-       rq = eb_throttle(ce);
-
+       if (throttle)
+               rq = eb_throttle(eb, ce);
         intel_context_timeline_unlock(tl);
  
-       if (rq) {
-               bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
-               long timeout;
-
-               timeout = MAX_SCHEDULE_TIMEOUT;
-               if (nonblock)
-                       timeout = 0;
-
-               timeout = i915_request_wait(rq,
-                                           I915_WAIT_INTERRUPTIBLE,
-                                           timeout);
-               i915_request_put(rq);
-
-               if (timeout < 0) {
-                       err = nonblock ? -EWOULDBLOCK : timeout;
-                       goto err_exit;
-               }
-       }
-
-       eb->engine = ce->engine;
-       eb->context = ce;
-       return 0;
-
-err_exit:
-       mutex_lock(&tl->mutex);
-       intel_context_exit(ce);
-       intel_context_timeline_unlock(tl);
-err_unpin:
-       intel_context_unpin(ce);
-       return err;
+       eb->args->flags |= __EXEC_ENGINE_PINNED;
+       return rq;
  }
  
  static void eb_unpin_engine(struct i915_execbuffer *eb)
@@ -2602,6 +2645,11 @@ static void eb_unpin_engine(struct i915_execbuffer *eb)
         struct intel_context *ce = eb->context;
         struct intel_timeline *tl = ce->timeline;
  
+       if (!(eb->args->flags & __EXEC_ENGINE_PINNED))
+               return;
+
+       eb->args->flags &= ~__EXEC_ENGINE_PINNED;
+
         mutex_lock(&tl->mutex);
         intel_context_exit(ce);
         mutex_unlock(&tl->mutex);
@@ -2653,7 +2701,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb)
  }
  
  static int
-eb_pin_engine(struct i915_execbuffer *eb)
+eb_select_engine(struct i915_execbuffer *eb)
  {
         struct intel_context *ce;
         unsigned int idx;
@@ -2668,12 +2716,45 @@ eb_pin_engine(struct i915_execbuffer *eb)
         if (IS_ERR(ce))
                 return PTR_ERR(ce);
  
-       err = __eb_pin_engine(eb, ce);
-       intel_context_put(ce);
+       intel_gt_pm_get(ce->engine->gt);
  
+       if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+               err = intel_context_alloc_state(ce);
+               if (err)
+                       goto err;
+       }
+
+       /*
+        * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
+        * EIO if the GPU is already wedged.
+        */
+       err = intel_gt_terminally_wedged(ce->engine->gt);
+       if (err)
+               goto err;
+
+       eb->context = ce;
+       eb->engine = ce->engine;
+
+       /*
+        * Make sure engine pool stays alive even if we call intel_context_put
+        * during ww handling. The pool is destroyed when last pm reference
+        * is dropped, which breaks our -EDEADLK handling.
+        */
+       return err;
+
+err:
+       intel_gt_pm_put(ce->engine->gt);
+       intel_context_put(ce);
         return err;
  }
  
+static void
+eb_put_engine(struct i915_execbuffer *eb)
+{
+       intel_gt_pm_put(eb->engine->gt);
+       intel_context_put(eb->context);
+}
+
  static void
  __free_fence_array(struct eb_fence *fences, unsigned int n)
  {
@@ -3054,6 +3135,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
         eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
         eb.vma[0].vma = NULL;
         eb.reloc_pool = eb.batch_pool = NULL;
+       eb.reloc_context = NULL;
  
         eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
         reloc_cache_init(&eb.reloc_cache, eb.i915);
@@ -3122,7 +3204,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
         if (unlikely(err))
                 goto err_destroy;
  
-       err = eb_pin_engine(&eb);
+       err = eb_select_engine(&eb);
         if (unlikely(err))
                 goto err_context;
  
@@ -3259,8 +3341,10 @@ err_vma:
                 intel_gt_buffer_pool_put(eb.batch_pool);
         if (eb.reloc_pool)
                 intel_gt_buffer_pool_put(eb.reloc_pool);
+       if (eb.reloc_context)
+               intel_context_put(eb.reloc_context);
  err_engine:
-       eb_unpin_engine(&eb);
+       eb_put_engine(&eb);
  err_context:
         i915_gem_context_put(eb.gem_context);
  err_destroy:
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c

index bc08c02b576788177c219614a4777042dfe6edfb..563839cbaf1c10994cc5d358b0cdd7e4912ef78d 100644 (file)
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
@@ -135,6 +135,7 @@ static int igt_gpu_reloc(void *arg)
                         goto err_pm;
                 }
                 eb.reloc_pool = NULL;
+               eb.reloc_context = NULL;
  
                 i915_gem_ww_ctx_init(&eb.ww, false);
  retry:
@@ -153,6 +154,8 @@ retry:
  
                 if (eb.reloc_pool)
                         intel_gt_buffer_pool_put(eb.reloc_pool);
+               if (eb.reloc_context)
+                       intel_context_put(eb.reloc_context);
  
                 intel_context_put(eb.context);
  err_pm:
author	Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
	Wed, 19 Aug 2020 14:08:52 +0000 (16:08 +0200)
committer	Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
	Mon, 7 Sep 2020 11:30:52 +0000 (14:30 +0300)
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c		patch \| blob \| history
drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c		patch \| blob \| history