return rq;
}
+struct i915_request *intel_context_find_active_request(struct intel_context *ce)
+{
+ struct i915_request *rq, *active = NULL;
+ unsigned long flags;
+
+ GEM_BUG_ON(!intel_engine_uses_guc(ce->engine));
+
+ spin_lock_irqsave(&ce->guc_active.lock, flags);
+ list_for_each_entry_reverse(rq, &ce->guc_active.requests,
+ sched.link) {
+ if (i915_request_completed(rq))
+ break;
+
+ active = rq;
+ }
+ spin_unlock_irqrestore(&ce->guc_active.lock, flags);
+
+ return active;
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_context.c"
#endif
struct i915_request *intel_context_create_request(struct intel_context *ce);
+struct i915_request *
+intel_context_find_active_request(struct intel_context *ce);
+
static inline bool intel_context_is_barrier(const struct intel_context *ce)
{
return test_bit(CONTEXT_BARRIER_BIT, &ce->flags);
ktime_t *now);
struct i915_request *
-intel_engine_find_active_request(struct intel_engine_cs *engine);
+intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine);
u32 intel_engine_context_size(struct intel_gt *gt, u8 class);
struct intel_context *
return engine->cops->get_sibling(engine, sibling);
}
+static inline void
+intel_engine_set_hung_context(struct intel_engine_cs *engine,
+ struct intel_context *ce)
+{
+ engine->hung_ce = ce;
+}
+
+static inline void
+intel_engine_clear_hung_context(struct intel_engine_cs *engine)
+{
+ intel_engine_set_hung_context(engine, NULL);
+}
+
+static inline struct intel_context *
+intel_engine_get_hung_context(struct intel_engine_cs *engine)
+{
+ return engine->hung_ce;
+}
+
#endif /* _INTEL_RINGBUFFER_H_ */
drm_printf(m, "\tRequests:\n");
spin_lock_irqsave(&engine->sched_engine->lock, flags);
- rq = intel_engine_find_active_request(engine);
+ rq = intel_engine_execlist_find_hung_request(engine);
if (rq) {
struct intel_timeline *tl = get_timeline(rq);
}
struct i915_request *
-intel_engine_find_active_request(struct intel_engine_cs *engine)
+intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine)
{
struct i915_request *request, *active = NULL;
+ /*
+ * This search does not work in GuC submission mode. However, the GuC
+ * will report the hanging context directly to the driver itself. So
+ * the driver should never get here when in GuC mode.
+ */
+ GEM_BUG_ON(intel_uc_uses_guc_submission(&engine->gt->uc));
+
/*
* We are called by the error capture, reset and to dump engine
* state at random points in time. In particular, note that neither is
/* keep a request in reserve for a [pm] barrier under oom */
struct i915_request *request_pool;
+ struct intel_context *hung_ce;
+
struct llist_head barrier_tasks;
struct intel_context *kernel_context; /* pinned */
spin_unlock_irqrestore(&sched_engine->lock, flags);
}
-static struct i915_request *context_find_active_request(struct intel_context *ce)
-{
- struct i915_request *rq, *active = NULL;
- unsigned long flags;
-
- spin_lock_irqsave(&ce->guc_active.lock, flags);
- list_for_each_entry_reverse(rq, &ce->guc_active.requests,
- sched.link) {
- if (i915_request_completed(rq))
- break;
-
- active = rq;
- }
- spin_unlock_irqrestore(&ce->guc_active.lock, flags);
-
- return active;
-}
-
static void __guc_reset_context(struct intel_context *ce, bool stalled)
{
struct i915_request *rq;
*/
clr_context_enabled(ce);
- rq = context_find_active_request(ce);
+ rq = intel_context_find_active_request(ce);
if (!rq) {
head = ce->ring->tail;
stalled = false;
return 0;
}
+static void capture_error_state(struct intel_guc *guc,
+ struct intel_context *ce)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_engine_cs *engine = __context_to_physical_engine(ce);
+ intel_wakeref_t wakeref;
+
+ intel_engine_set_hung_context(engine, ce);
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+ i915_capture_error_state(gt, engine->mask);
+ atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]);
+}
+
static void guc_context_replay(struct intel_context *ce)
{
struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
struct intel_context *ce)
{
trace_intel_context_reset(ce);
+ capture_error_state(guc, ce);
guc_context_replay(ce);
}
{
struct intel_engine_capture_vma *capture = NULL;
struct intel_engine_coredump *ee;
- struct i915_request *rq;
+ struct intel_context *ce;
+ struct i915_request *rq = NULL;
unsigned long flags;
ee = intel_engine_coredump_alloc(engine, GFP_KERNEL);
if (!ee)
return NULL;
- spin_lock_irqsave(&engine->sched_engine->lock, flags);
- rq = intel_engine_find_active_request(engine);
+ ce = intel_engine_get_hung_context(engine);
+ if (ce) {
+ intel_engine_clear_hung_context(engine);
+ rq = intel_context_find_active_request(ce);
+ if (!rq || !i915_request_started(rq))
+ goto no_request_capture;
+ } else {
+ /*
+ * Getting here with GuC enabled means it is a forced error capture
+ * with no actual hang. So, no need to attempt the execlist search.
+ */
+ if (!intel_uc_uses_guc_submission(&engine->gt->uc)) {
+ spin_lock_irqsave(&engine->sched_engine->lock, flags);
+ rq = intel_engine_execlist_find_hung_request(engine);
+ spin_unlock_irqrestore(&engine->sched_engine->lock,
+ flags);
+ }
+ }
if (rq)
capture = intel_engine_coredump_add_request(ee, rq,
ATOMIC_MAYFAIL);
- spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
if (!capture) {
+no_request_capture:
kfree(ee);
return NULL;
}