#define ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
#define ATOMIC_MAYFAIL (GFP_ATOMIC | __GFP_NOWARN)
-static inline const struct intel_engine_cs *
-engine_lookup(const struct drm_i915_private *i915, unsigned int id)
-{
- if (id >= I915_NUM_ENGINES)
- return NULL;
-
- return i915->engine[id];
-}
-
-static inline const char *
-__engine_name(const struct intel_engine_cs *engine)
-{
- return engine ? engine->name : "";
-}
-
-static const char *
-engine_name(const struct drm_i915_private *i915, unsigned int id)
-{
- return __engine_name(engine_lookup(i915, id));
-}
-
static void __sg_set_buf(struct scatterlist *sg,
void *addr, unsigned int len, loff_t it)
{
err_printf(m, " INSTDONE: 0x%08x\n",
ee->instdone.instdone);
- if (ee->engine_id != RCS0 || INTEL_GEN(m->i915) <= 3)
+ if (ee->engine->class != RENDER_CLASS || INTEL_GEN(m->i915) <= 3)
return;
err_printf(m, " SC_INSTDONE: 0x%08x\n",
{
int n;
- err_printf(m, "%s command stream:\n",
- engine_name(m->i915, ee->engine_id));
+ err_printf(m, "%s command stream:\n", ee->engine->name);
err_printf(m, " IDLE?: %s\n", yesno(ee->idle));
err_printf(m, " START: 0x%08x\n", ee->start);
err_printf(m, " HEAD: 0x%08x [0x%08x]\n", ee->head, ee->rq_head);
}
static void print_error_obj(struct drm_i915_error_state_buf *m,
- struct intel_engine_cs *engine,
+ const struct intel_engine_cs *engine,
const char *name,
- struct drm_i915_error_object *obj)
+ const struct drm_i915_error_object *obj)
{
char out[ASCII85_BUFSZ];
int page;
static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
struct i915_gpu_state *error)
{
- struct drm_i915_error_object *obj;
+ const struct drm_i915_error_engine *ee;
struct timespec64 ts;
int i, j;
jiffies_to_msecs(jiffies - error->capture),
jiffies_to_msecs(error->capture - error->epoch));
- for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
- if (!error->engine[i].context.pid)
- continue;
-
+ for (ee = error->engine; ee; ee = ee->next)
err_printf(m, "Active process (on ring %s): %s [%d]\n",
- engine_name(m->i915, i),
- error->engine[i].context.comm,
- error->engine[i].context.pid);
- }
+ ee->engine->name,
+ ee->context.comm,
+ ee->context.pid);
+
err_printf(m, "Reset count: %u\n", error->reset_count);
err_printf(m, "Suspend count: %u\n", error->suspend_count);
err_printf(m, "Platform: %s\n", intel_platform_name(error->device_info.platform));
if (IS_GEN(m->i915, 7))
err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);
- for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
- if (error->engine[i].engine_id != -1)
- error_print_engine(m, &error->engine[i], error->epoch);
- }
+ for (ee = error->engine; ee; ee = ee->next)
+ error_print_engine(m, ee, error->epoch);
- for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
- const struct drm_i915_error_engine *ee = &error->engine[i];
+ for (ee = error->engine; ee; ee = ee->next) {
+ const struct drm_i915_error_object *obj;
obj = ee->batchbuffer;
if (obj) {
- err_puts(m, m->i915->engine[i]->name);
+ err_puts(m, ee->engine->name);
if (ee->context.pid)
err_printf(m, " (submitted by %s [%d])",
ee->context.comm,
err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
upper_32_bits(obj->gtt_offset),
lower_32_bits(obj->gtt_offset));
- print_error_obj(m, m->i915->engine[i], NULL, obj);
+ print_error_obj(m, ee->engine, NULL, obj);
}
for (j = 0; j < ee->user_bo_count; j++)
- print_error_obj(m, m->i915->engine[i],
- "user", ee->user_bo[j]);
+ print_error_obj(m, ee->engine, "user", ee->user_bo[j]);
if (ee->num_requests) {
err_printf(m, "%s --- %d requests\n",
- m->i915->engine[i]->name,
+ ee->engine->name,
ee->num_requests);
for (j = 0; j < ee->num_requests; j++)
error_print_request(m, " ",
error->epoch);
}
- print_error_obj(m, m->i915->engine[i],
- "ringbuffer", ee->ringbuffer);
-
- print_error_obj(m, m->i915->engine[i],
- "HW Status", ee->hws_page);
-
- print_error_obj(m, m->i915->engine[i],
- "HW context", ee->ctx);
-
- print_error_obj(m, m->i915->engine[i],
- "WA context", ee->wa_ctx);
-
- print_error_obj(m, m->i915->engine[i],
+ print_error_obj(m, ee->engine, "ringbuffer", ee->ringbuffer);
+ print_error_obj(m, ee->engine, "HW Status", ee->hws_page);
+ print_error_obj(m, ee->engine, "HW context", ee->ctx);
+ print_error_obj(m, ee->engine, "WA context", ee->wa_ctx);
+ print_error_obj(m, ee->engine,
"WA batchbuffer", ee->wa_batchbuffer);
-
- print_error_obj(m, m->i915->engine[i],
+ print_error_obj(m, ee->engine,
"NULL context", ee->default_state);
}
{
struct i915_gpu_state *error =
container_of(error_ref, typeof(*error), ref);
- long i, j;
+ long i;
- for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
- struct drm_i915_error_engine *ee = &error->engine[i];
+ while (error->engine) {
+ struct drm_i915_error_engine *ee = error->engine;
- for (j = 0; j < ee->user_bo_count; j++)
- i915_error_object_free(ee->user_bo[j]);
+ error->engine = ee->next;
+
+ for (i = 0; i < ee->user_bo_count; i++)
+ i915_error_object_free(ee->user_bo[i]);
kfree(ee->user_bo);
i915_error_object_free(ee->batchbuffer);
i915_error_object_free(ee->wa_ctx);
kfree(ee->requests);
+ kfree(ee);
}
kfree(error->overlay);
*
* It's only a small step better than a random number in its current form.
*/
-static u32 i915_error_generate_code(struct i915_gpu_state *error,
- intel_engine_mask_t engine_mask)
+static u32 i915_error_generate_code(struct i915_gpu_state *error)
{
+ const struct drm_i915_error_engine *ee = error->engine;
+
/*
* IPEHR would be an ideal way to detect errors, as it's the gross
* measure of "the command that hung." However, has some very common
* synchronization commands which almost always appear in the case
* strictly a client bug. Use instdone to differentiate those some.
*/
- if (engine_mask) {
- struct drm_i915_error_engine *ee =
- &error->engine[ffs(engine_mask)];
-
- return ee->ipehr ^ ee->instdone.instdone;
- }
-
- return 0;
+ return ee ? ee->ipehr ^ ee->instdone.instdone : 0;
}
static void gem_record_fences(struct i915_gpu_state *error)
ee->num_ports = n;
}
-static void record_context(struct drm_i915_error_context *e,
- struct i915_gem_context *ctx)
+static bool record_context(struct drm_i915_error_context *e,
+ const struct i915_request *rq)
{
+ const struct i915_gem_context *ctx = rq->gem_context;
+
if (ctx->pid) {
struct task_struct *task;
e->sched_attr = ctx->sched;
e->guilty = atomic_read(&ctx->guilty_count);
e->active = atomic_read(&ctx->active_count);
+
+ return i915_gem_context_no_error_capture(ctx);
}
struct capture_vma {
gem_record_rings(struct i915_gpu_state *error, struct compress *compress)
{
struct drm_i915_private *i915 = error->i915;
- int i;
+ struct intel_engine_cs *engine;
+ struct drm_i915_error_engine *ee;
+
+ ee = kzalloc(sizeof(*ee), GFP_KERNEL);
+ if (!ee)
+ return;
- for (i = 0; i < I915_NUM_ENGINES; i++) {
- struct intel_engine_cs *engine = i915->engine[i];
- struct drm_i915_error_engine *ee = &error->engine[i];
+ for_each_uabi_engine(engine, i915) {
struct capture_vma *capture = NULL;
struct i915_request *request;
unsigned long flags;
- ee->engine_id = -1;
-
- if (!engine)
- continue;
-
- ee->engine_id = i;
-
/* Refill our page pool before entering atomic section */
pool_refill(&compress->pool, ALLOW_FAIL);
- error_record_engine_registers(error, engine, ee);
- error_record_engine_execlists(engine, ee);
-
spin_lock_irqsave(&engine->active.lock, flags);
request = intel_engine_find_active_request(engine);
- if (request) {
- struct i915_gem_context *ctx = request->gem_context;
- struct intel_ring *ring = request->ring;
-
- record_context(&ee->context, ctx);
-
- /*
- * We need to copy these to an anonymous buffer
- * as the simplest method to avoid being overwritten
- * by userspace.
- */
- capture = capture_vma(capture,
- request->batch,
- &ee->batchbuffer);
+ if (!request) {
+ spin_unlock_irqrestore(&engine->active.lock, flags);
+ continue;
+ }
- if (HAS_BROKEN_CS_TLB(i915))
- capture = capture_vma(capture,
- engine->gt->scratch,
- &ee->wa_batchbuffer);
+ error->simulated |= record_context(&ee->context, request);
- capture = request_record_user_bo(request, ee, capture);
+ /*
+ * We need to copy these to an anonymous buffer
+ * as the simplest method to avoid being overwritten
+ * by userspace.
+ */
+ capture = capture_vma(capture,
+ request->batch,
+ &ee->batchbuffer);
+ if (HAS_BROKEN_CS_TLB(i915))
capture = capture_vma(capture,
- request->hw_context->state,
- &ee->ctx);
+ engine->gt->scratch,
+ &ee->wa_batchbuffer);
- capture = capture_vma(capture,
- ring->vma,
- &ee->ringbuffer);
+ capture = request_record_user_bo(request, ee, capture);
- error->simulated |=
- i915_gem_context_no_error_capture(ctx);
+ capture = capture_vma(capture,
+ request->hw_context->state,
+ &ee->ctx);
- ee->rq_head = request->head;
- ee->rq_post = request->postfix;
- ee->rq_tail = request->tail;
+ capture = capture_vma(capture,
+ request->ring->vma,
+ &ee->ringbuffer);
- ee->cpu_ring_head = ring->head;
- ee->cpu_ring_tail = ring->tail;
+ ee->cpu_ring_head = request->ring->head;
+ ee->cpu_ring_tail = request->ring->tail;
- engine_record_requests(engine, request, ee);
- }
+ ee->rq_head = request->head;
+ ee->rq_post = request->postfix;
+ ee->rq_tail = request->tail;
+
+ engine_record_requests(engine, request, ee);
spin_unlock_irqrestore(&engine->active.lock, flags);
+ error_record_engine_registers(error, engine, ee);
+ error_record_engine_execlists(engine, ee);
+
while (capture) {
struct capture_vma *this = capture;
struct i915_vma *vma = *this->slot;
ee->default_state =
capture_object(i915, engine->default_state, compress);
+
+ ee->engine = engine;
+
+ ee->next = error->engine;
+ error->engine = ee;
+
+ ee = kzalloc(sizeof(*ee), GFP_KERNEL);
+ if (!ee)
+ return;
}
+
+ kfree(ee);
}
static void
intel_engine_mask_t engines, const char *msg)
{
int len;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(error->engine); i++)
- if (!error->engine[i].context.pid)
- engines &= ~BIT(i);
len = scnprintf(error->error_msg, sizeof(error->error_msg),
"GPU HANG: ecode %d:%x:0x%08x",
INTEL_GEN(error->i915), engines,
- i915_error_generate_code(error, engines));
- if (engines) {
+ i915_error_generate_code(error));
+ if (error->engine) {
/* Just show the first executing process, more is confusing */
- i = __ffs(engines);
len += scnprintf(error->error_msg + len,
sizeof(error->error_msg) - len,
", in %s [%d]",
- error->engine[i].context.comm,
- error->engine[i].context.pid);
+ error->engine->context.comm,
+ error->engine->context.pid);
}
if (msg)
len += scnprintf(error->error_msg + len,
static unsigned long capture_find_epoch(const struct i915_gpu_state *error)
{
+ const struct drm_i915_error_engine *ee;
unsigned long epoch = error->capture;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
- const struct drm_i915_error_engine *ee = &error->engine[i];
+ for (ee = error->engine; ee; ee = ee->next) {
if (ee->hangcheck_timestamp &&
time_before(ee->hangcheck_timestamp, epoch))
epoch = ee->hangcheck_timestamp;