From d9087e2e947fef0020e94d0c1f9edb7a6c87dd09 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 3 Dec 2018 12:50:11 +0000 Subject: [PATCH] drm/i915/selftests: Add tests for GT and engine workaround verification Two simple selftests which test that both GT and engine workarounds are not lost after either a full GPU reset, or after the per-engine ones. (Including checks that one engine reset is not affecting workarounds not belonging to itself.) v2: * Rebase for series refactoring. * Add spinner for actual engine reset! * Add idle reset test as well. (Chris Wilson) * Share existing global_reset_lock. (Chris Wilson) v3: * intel_engine_verify_workarounds can be static. * API rename. (Chris Wilson) * Move global reset lock out of the loop. (Chris Wilson) v4: * Add missing rpm puts. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20181203125014.3219-5-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/intel_workarounds.c | 6 + drivers/gpu/drm/i915/selftests/igt_reset.c | 44 ++++++ drivers/gpu/drm/i915/selftests/igt_reset.h | 15 ++ .../gpu/drm/i915/selftests/intel_hangcheck.c | 51 ++----- .../drm/i915/selftests/intel_workarounds.c | 144 +++++++++++++++++- 6 files changed, 214 insertions(+), 47 deletions(-) create mode 100644 drivers/gpu/drm/i915/selftests/igt_reset.c create mode 100644 drivers/gpu/drm/i915/selftests/igt_reset.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 50a8fa8fce64a..19b5fe5016bf6 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -166,6 +166,7 @@ i915-$(CONFIG_DRM_I915_SELFTEST) += \ selftests/i915_random.o \ selftests/i915_selftest.o \ selftests/igt_flush_test.o \ + selftests/igt_reset.o \ selftests/igt_spinner.o # virtual gpu code diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index 592226a0c7117..c53d4388930b7 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -1303,5 +1303,11 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine) } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +static bool intel_engine_verify_workarounds(struct intel_engine_cs *engine, + const char *from) +{ + return wa_list_verify(engine->i915, &engine->wa_list, from); +} + #include "selftests/intel_workarounds.c" #endif diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.c b/drivers/gpu/drm/i915/selftests/igt_reset.c new file mode 100644 index 0000000000000..208a966da8ca7 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/igt_reset.c @@ -0,0 +1,44 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "igt_reset.h" + +#include "../i915_drv.h" +#include "../intel_ringbuffer.h" + +void igt_global_reset_lock(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + pr_debug("%s: current gpu_error=%08lx\n", + __func__, i915->gpu_error.flags); + + while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags)) + wait_event(i915->gpu_error.reset_queue, + !test_bit(I915_RESET_BACKOFF, + &i915->gpu_error.flags)); + + for_each_engine(engine, i915, id) { + while (test_and_set_bit(I915_RESET_ENGINE + id, + &i915->gpu_error.flags)) + wait_on_bit(&i915->gpu_error.flags, + I915_RESET_ENGINE + id, + TASK_UNINTERRUPTIBLE); + } +} + +void igt_global_reset_unlock(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) + clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + + clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); + wake_up_all(&i915->gpu_error.reset_queue); +} diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.h b/drivers/gpu/drm/i915/selftests/igt_reset.h new file mode 100644 index 0000000000000..5f0234d045d57 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/igt_reset.h @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#ifndef __I915_SELFTESTS_IGT_RESET_H__ +#define __I915_SELFTESTS_IGT_RESET_H__ + +#include "../i915_drv.h" + +void igt_global_reset_lock(struct drm_i915_private *i915); +void igt_global_reset_unlock(struct drm_i915_private *i915); + +#endif diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index a48fbe2557ea1..40efbed611de8 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -27,6 +27,7 @@ #include "../i915_selftest.h" #include "i915_random.h" #include "igt_flush_test.h" +#include "igt_reset.h" #include "igt_wedge_me.h" #include "mock_context.h" @@ -354,40 +355,6 @@ unlock: return err; } -static void global_reset_lock(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - pr_debug("%s: current gpu_error=%08lx\n", - __func__, i915->gpu_error.flags); - - while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags)) - wait_event(i915->gpu_error.reset_queue, - !test_bit(I915_RESET_BACKOFF, - &i915->gpu_error.flags)); - - for_each_engine(engine, i915, id) { - while (test_and_set_bit(I915_RESET_ENGINE + id, - &i915->gpu_error.flags)) - wait_on_bit(&i915->gpu_error.flags, - I915_RESET_ENGINE + id, - TASK_UNINTERRUPTIBLE); - } -} - -static void global_reset_unlock(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) - clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); - - clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); - wake_up_all(&i915->gpu_error.reset_queue); -} - static int igt_global_reset(void *arg) { struct drm_i915_private *i915 = arg; @@ -396,7 +363,7 @@ static int igt_global_reset(void *arg) /* Check that we can issue a global GPU reset */ - global_reset_lock(i915); + igt_global_reset_lock(i915); set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags); mutex_lock(&i915->drm.struct_mutex); @@ -411,7 +378,7 @@ static int igt_global_reset(void *arg) mutex_unlock(&i915->drm.struct_mutex); GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); - global_reset_unlock(i915); + igt_global_reset_unlock(i915); if (i915_terminally_wedged(&i915->gpu_error)) err = -EIO; @@ -942,7 +909,7 @@ static int igt_reset_wait(void *arg) /* Check that we detect a stuck waiter and issue a reset */ - global_reset_lock(i915); + igt_global_reset_lock(i915); mutex_lock(&i915->drm.struct_mutex); err = hang_init(&h, i915); @@ -994,7 +961,7 @@ fini: hang_fini(&h); unlock: mutex_unlock(&i915->drm.struct_mutex); - global_reset_unlock(i915); + igt_global_reset_unlock(i915); if (i915_terminally_wedged(&i915->gpu_error)) return -EIO; @@ -1072,7 +1039,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, /* Check that we can recover an unbind stuck on a hanging request */ - global_reset_lock(i915); + igt_global_reset_lock(i915); mutex_lock(&i915->drm.struct_mutex); err = hang_init(&h, i915); @@ -1192,7 +1159,7 @@ fini: hang_fini(&h); unlock: mutex_unlock(&i915->drm.struct_mutex); - global_reset_unlock(i915); + igt_global_reset_unlock(i915); if (i915_terminally_wedged(&i915->gpu_error)) return -EIO; @@ -1272,7 +1239,7 @@ static int igt_reset_queue(void *arg) /* Check that we replay pending requests following a hang */ - global_reset_lock(i915); + igt_global_reset_lock(i915); mutex_lock(&i915->drm.struct_mutex); err = hang_init(&h, i915); @@ -1403,7 +1370,7 @@ fini: hang_fini(&h); unlock: mutex_unlock(&i915->drm.struct_mutex); - global_reset_unlock(i915); + igt_global_reset_unlock(i915); if (i915_terminally_wedged(&i915->gpu_error)) return -EIO; diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index 80396b3592f50..d76a048c39544 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -6,6 +6,8 @@ #include "../i915_selftest.h" +#include "igt_flush_test.h" +#include "igt_reset.h" #include "igt_spinner.h" #include "igt_wedge_me.h" #include "mock_context.h" @@ -290,7 +292,6 @@ static int live_reset_whitelist(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine = i915->engine[RCS]; - struct i915_gpu_error *error = &i915->gpu_error; struct whitelist w; int err = 0; @@ -302,8 +303,7 @@ static int live_reset_whitelist(void *arg) if (!whitelist_build(engine, &w)) return 0; - set_bit(I915_RESET_BACKOFF, &error->flags); - set_bit(I915_RESET_ENGINE + engine->id, &error->flags); + igt_global_reset_lock(i915); if (intel_has_reset_engine(i915)) { err = check_whitelist_across_reset(engine, @@ -322,15 +322,149 @@ static int live_reset_whitelist(void *arg) } out: - clear_bit(I915_RESET_ENGINE + engine->id, &error->flags); - clear_bit(I915_RESET_BACKOFF, &error->flags); + igt_global_reset_unlock(i915); return err; } +static bool verify_gt_engine_wa(struct drm_i915_private *i915, const char *str) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + bool ok = true; + + ok &= intel_gt_verify_workarounds(i915, str); + + for_each_engine(engine, i915, id) + ok &= intel_engine_verify_workarounds(engine, str); + + return ok; +} + +static int +live_gpu_reset_gt_engine_workarounds(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_gpu_error *error = &i915->gpu_error; + bool ok; + + if (!intel_has_gpu_reset(i915)) + return 0; + + pr_info("Verifying after GPU reset...\n"); + + igt_global_reset_lock(i915); + + ok = verify_gt_engine_wa(i915, "before reset"); + if (!ok) + goto out; + + intel_runtime_pm_get(i915); + set_bit(I915_RESET_HANDOFF, &error->flags); + i915_reset(i915, ALL_ENGINES, "live_workarounds"); + intel_runtime_pm_put(i915); + + ok = verify_gt_engine_wa(i915, "after reset"); + +out: + igt_global_reset_unlock(i915); + + return ok ? 0 : -ESRCH; +} + +static int +live_engine_reset_gt_engine_workarounds(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + struct igt_spinner spin; + enum intel_engine_id id; + struct i915_request *rq; + int ret = 0; + + if (!intel_has_reset_engine(i915)) + return 0; + + ctx = kernel_context(i915); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + igt_global_reset_lock(i915); + + for_each_engine(engine, i915, id) { + bool ok; + + pr_info("Verifying after %s reset...\n", engine->name); + + ok = verify_gt_engine_wa(i915, "before reset"); + if (!ok) { + ret = -ESRCH; + goto err; + } + + intel_runtime_pm_get(i915); + i915_reset_engine(engine, "live_workarounds"); + intel_runtime_pm_put(i915); + + ok = verify_gt_engine_wa(i915, "after idle reset"); + if (!ok) { + ret = -ESRCH; + goto err; + } + + ret = igt_spinner_init(&spin, i915); + if (ret) + goto err; + + intel_runtime_pm_get(i915); + + rq = igt_spinner_create_request(&spin, ctx, engine, MI_NOOP); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + igt_spinner_fini(&spin); + intel_runtime_pm_put(i915); + goto err; + } + + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + pr_err("Spinner failed to start\n"); + igt_spinner_fini(&spin); + intel_runtime_pm_put(i915); + ret = -ETIMEDOUT; + goto err; + } + + i915_reset_engine(engine, "live_workarounds"); + + intel_runtime_pm_put(i915); + + igt_spinner_end(&spin); + igt_spinner_fini(&spin); + + ok = verify_gt_engine_wa(i915, "after busy reset"); + if (!ok) { + ret = -ESRCH; + goto err; + } + } + +err: + igt_global_reset_unlock(i915); + kernel_context_close(ctx); + + igt_flush_test(i915, I915_WAIT_LOCKED); + + return ret; +} + int intel_workarounds_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_reset_whitelist), + SUBTEST(live_gpu_reset_gt_engine_workarounds), + SUBTEST(live_engine_reset_gt_engine_workarounds), }; int err; -- 2.39.5