bool has_fence : 1;
bool needs_unfenced : 1;
+ struct intel_context *ce;
struct i915_request *rq;
u32 *rq_cmd;
unsigned int rq_size;
{
GEM_BUG_ON(eb->reloc_cache.rq);
+ if (eb->reloc_cache.ce)
+ intel_context_put(eb->reloc_cache.ce);
+
if (eb->lut_size > 0)
kfree(eb->buckets);
}
cache->has_fence = cache->gen < 4;
cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
cache->node.allocated = false;
+ cache->ce = NULL;
cache->rq = NULL;
cache->rq_size = 0;
}
if (err)
goto err_unmap;
- rq = i915_request_create(eb->context);
+ rq = intel_context_create_request(cache->ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_unpin;
if (!intel_engine_can_store_dword(eb->engine))
return ERR_PTR(-ENODEV);
+ if (!cache->ce) {
+ struct intel_context *ce;
+
+ /*
+ * The CS pre-parser can pre-fetch commands across
+ * memory sync points and starting gen12 it is able to
+ * pre-fetch across BB_START and BB_END boundaries
+ * (within the same context). We therefore use a
+ * separate context gen12+ to guarantee that the reloc
+ * writes land before the parser gets to the target
+ * memory location.
+ */
+ if (cache->gen >= 12)
+ ce = intel_context_create(eb->context->gem_context,
+ eb->engine);
+ else
+ ce = intel_context_get(eb->context);
+ if (IS_ERR(ce))
+ return ERR_CAST(ce);
+
+ cache->ce = ce;
+ }
+
err = __reloc_gpu_alloc(eb, vma, len);
if (unlikely(err))
return ERR_PTR(err);
return gen8_emit_fini_breadcrumb_footer(request, cs);
}
+/*
+ * Note that the CS instruction pre-parser will not stall on the breadcrumb
+ * flush and will continue pre-fetching the instructions after it before the
+ * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
+ * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
+ * of the next request before the memory has been flushed, we're guaranteed that
+ * we won't access the batch itself too early.
+ * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
+ * so, if the current request is modifying an instruction in the next request on
+ * the same intel_context, we might pre-fetch and then execute the pre-update
+ * instruction. To avoid this, the users of self-modifying code should either
+ * disable the parser around the code emitting the memory writes, via a new flag
+ * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
+ * the in-kernel use-cases we've opted to use a separate context, see
+ * reloc_gpu() as an example.
+ * All the above applies only to the instructions themselves. Non-inline data
+ * used by the instructions is not pre-fetched.
+ */
static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
u32 *cs)
{