#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
#define __EXEC_HAS_RELOC BIT(31)
-#define __EXEC_INTERNAL_FLAGS (~0u << 31)
+#define __EXEC_ENGINE_PINNED BIT(30)
+#define __EXEC_INTERNAL_FLAGS (~0u << 30)
#define UPDATE PIN_OFFSET_FIXED
#define BATCH_OFFSET_BIAS (256*1024)
} reloc_cache;
struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */
+ struct intel_context *reloc_context;
u64 invalid_flags; /** Set of execobj.flags that are invalid */
u32 context_flags; /** Set of execobj.flags to insert from the ctx */
};
static int eb_parse(struct i915_execbuffer *eb);
+static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb,
+ bool throttle);
+static void eb_unpin_engine(struct i915_execbuffer *eb);
static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
{
}
}
-static void eb_release_vmas(const struct i915_execbuffer *eb, bool final)
+static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
{
const unsigned int count = eb->buffer_count;
unsigned int i;
if (final)
i915_vma_put(vma);
}
+
+ eb_unpin_engine(eb);
}
static void eb_destroy(const struct i915_execbuffer *eb)
if (engine == eb->context->engine) {
rq = i915_request_create(eb->context);
} else {
- struct intel_context *ce;
+ struct intel_context *ce = eb->reloc_context;
- ce = intel_context_create(engine);
- if (IS_ERR(ce)) {
- err = PTR_ERR(ce);
- goto err_unpin;
+ if (!ce) {
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto err_unpin;
+ }
+
+ i915_vm_put(ce->vm);
+ ce->vm = i915_vm_get(eb->context->vm);
+ eb->reloc_context = ce;
}
- i915_vm_put(ce->vm);
- ce->vm = i915_vm_get(eb->context->vm);
+ err = intel_context_pin(ce);
+ if (err)
+ goto err_unpin;
- rq = intel_context_create_request(ce);
- intel_context_put(ce);
+ rq = i915_request_create(ce);
+ intel_context_unpin(ce);
}
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
return 0;
}
-static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
+static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
+ struct i915_request *rq)
{
bool have_copy = false;
struct eb_vma *ev;
eb_release_vmas(eb, false);
i915_gem_ww_ctx_fini(&eb->ww);
+ if (rq) {
+ /* nonblocking is always false */
+ if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT) < 0) {
+ i915_request_put(rq);
+ rq = NULL;
+
+ err = -EINTR;
+ goto err_relock;
+ }
+
+ i915_request_put(rq);
+ rq = NULL;
+ }
+
/*
* We take 3 passes through the slowpatch.
*
err = 0;
}
- flush_workqueue(eb->i915->mm.userptr_wq);
+ if (!err)
+ flush_workqueue(eb->i915->mm.userptr_wq);
+err_relock:
i915_gem_ww_ctx_init(&eb->ww, true);
if (err)
goto out;
/* reacquire the objects */
repeat_validate:
+ rq = eb_pin_engine(eb, false);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err;
+ }
+
+ /* We didn't throttle, should be NULL */
+ GEM_WARN_ON(rq);
+
err = eb_validate_vmas(eb);
if (err)
goto err;
}
}
+ if (rq)
+ i915_request_put(rq);
+
return err;
}
static int eb_relocate_parse(struct i915_execbuffer *eb)
{
int err;
+ struct i915_request *rq = NULL;
+ bool throttle = true;
retry:
+ rq = eb_pin_engine(eb, throttle);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ rq = NULL;
+ if (err != -EDEADLK)
+ return err;
+
+ goto err;
+ }
+
+ if (rq) {
+ bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
+
+ /* Need to drop all locks now for throttling, take slowpath */
+ err = i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, 0);
+ if (err == -ETIME) {
+ if (nonblock) {
+ err = -EWOULDBLOCK;
+ i915_request_put(rq);
+ goto err;
+ }
+ goto slow;
+ }
+ i915_request_put(rq);
+ rq = NULL;
+ }
+
+ /* only throttle once, even if we didn't need to throttle */
+ throttle = false;
+
err = eb_validate_vmas(eb);
if (err == -EAGAIN)
goto slow;
return err;
slow:
- err = eb_relocate_parse_slow(eb);
+ err = eb_relocate_parse_slow(eb, rq);
if (err)
/*
* If the user expects the execobject.offset and
[I915_EXEC_VEBOX] = VECS0
};
-static struct i915_request *eb_throttle(struct intel_context *ce)
+static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce)
{
struct intel_ring *ring = ce->ring;
struct intel_timeline *tl = ce->timeline;
return i915_request_get(rq);
}
-static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
+static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool throttle)
{
+ struct intel_context *ce = eb->context;
struct intel_timeline *tl;
- struct i915_request *rq;
+ struct i915_request *rq = NULL;
int err;
- /*
- * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
- * EIO if the GPU is already wedged.
- */
- err = intel_gt_terminally_wedged(ce->engine->gt);
- if (err)
- return err;
+ GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED);
if (unlikely(intel_context_is_banned(ce)))
- return -EIO;
+ return ERR_PTR(-EIO);
/*
* Pinning the contexts may generate requests in order to acquire
*/
err = intel_context_pin(ce);
if (err)
- return err;
+ return ERR_PTR(err);
/*
* Take a local wakeref for preparing to dispatch the execbuf as
*/
tl = intel_context_timeline_lock(ce);
if (IS_ERR(tl)) {
- err = PTR_ERR(tl);
- goto err_unpin;
+ intel_context_unpin(ce);
+ return ERR_CAST(tl);
}
intel_context_enter(ce);
- rq = eb_throttle(ce);
-
+ if (throttle)
+ rq = eb_throttle(eb, ce);
intel_context_timeline_unlock(tl);
- if (rq) {
- bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
- long timeout;
-
- timeout = MAX_SCHEDULE_TIMEOUT;
- if (nonblock)
- timeout = 0;
-
- timeout = i915_request_wait(rq,
- I915_WAIT_INTERRUPTIBLE,
- timeout);
- i915_request_put(rq);
-
- if (timeout < 0) {
- err = nonblock ? -EWOULDBLOCK : timeout;
- goto err_exit;
- }
- }
-
- eb->engine = ce->engine;
- eb->context = ce;
- return 0;
-
-err_exit:
- mutex_lock(&tl->mutex);
- intel_context_exit(ce);
- intel_context_timeline_unlock(tl);
-err_unpin:
- intel_context_unpin(ce);
- return err;
+ eb->args->flags |= __EXEC_ENGINE_PINNED;
+ return rq;
}
static void eb_unpin_engine(struct i915_execbuffer *eb)
struct intel_context *ce = eb->context;
struct intel_timeline *tl = ce->timeline;
+ if (!(eb->args->flags & __EXEC_ENGINE_PINNED))
+ return;
+
+ eb->args->flags &= ~__EXEC_ENGINE_PINNED;
+
mutex_lock(&tl->mutex);
intel_context_exit(ce);
mutex_unlock(&tl->mutex);
}
static int
-eb_pin_engine(struct i915_execbuffer *eb)
+eb_select_engine(struct i915_execbuffer *eb)
{
struct intel_context *ce;
unsigned int idx;
if (IS_ERR(ce))
return PTR_ERR(ce);
- err = __eb_pin_engine(eb, ce);
- intel_context_put(ce);
+ intel_gt_pm_get(ce->engine->gt);
+ if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+ err = intel_context_alloc_state(ce);
+ if (err)
+ goto err;
+ }
+
+ /*
+ * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
+ * EIO if the GPU is already wedged.
+ */
+ err = intel_gt_terminally_wedged(ce->engine->gt);
+ if (err)
+ goto err;
+
+ eb->context = ce;
+ eb->engine = ce->engine;
+
+ /*
+ * Make sure engine pool stays alive even if we call intel_context_put
+ * during ww handling. The pool is destroyed when last pm reference
+ * is dropped, which breaks our -EDEADLK handling.
+ */
+ return err;
+
+err:
+ intel_gt_pm_put(ce->engine->gt);
+ intel_context_put(ce);
return err;
}
+static void
+eb_put_engine(struct i915_execbuffer *eb)
+{
+ intel_gt_pm_put(eb->engine->gt);
+ intel_context_put(eb->context);
+}
+
static void
__free_fence_array(struct eb_fence *fences, unsigned int n)
{
eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
eb.vma[0].vma = NULL;
eb.reloc_pool = eb.batch_pool = NULL;
+ eb.reloc_context = NULL;
eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
reloc_cache_init(&eb.reloc_cache, eb.i915);
if (unlikely(err))
goto err_destroy;
- err = eb_pin_engine(&eb);
+ err = eb_select_engine(&eb);
if (unlikely(err))
goto err_context;
intel_gt_buffer_pool_put(eb.batch_pool);
if (eb.reloc_pool)
intel_gt_buffer_pool_put(eb.reloc_pool);
+ if (eb.reloc_context)
+ intel_context_put(eb.reloc_context);
err_engine:
- eb_unpin_engine(&eb);
+ eb_put_engine(&eb);
err_context:
i915_gem_context_put(eb.gem_context);
err_destroy: