From 939fc2c4209e8de4915dbb39c1ab620ca8c13272 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 2 Dec 2019 20:43:14 +0000 Subject: [PATCH] drm/i915: Lift i915_vma_pin() out of intel_renderstate_emit() Once inside a request, inside the timeline->mutex, pinning is verboten. <4> [896.032829] ====================================================== <4> [896.032831] WARNING: possible circular locking dependency detected <4> [896.032835] 5.4.0-rc8-CI-Patchwork_15533+ #1 Tainted: G U <4> [896.032838] ------------------------------------------------------ <4> [896.032841] gem_exec_parall/3720 is trying to acquire lock: <4> [896.032844] ffff888401863270 (&kernel#2){+.+.}, at: i915_request_create+0x16/0x1c0 [i915] <4> [896.032915] but task is already holding lock: <4> [896.032917] ffff8883ec1c93c0 (&vm->mutex){+.+.}, at: i915_vma_pin+0xf3/0x11c0 [i915] <4> [896.032952] which lock already depends on the new lock. <4> [896.032954] the existing dependency chain (in reverse order) is: <4> [896.032956] -> #1 (&vm->mutex){+.+.}: <4> [896.032961] __mutex_lock+0x9a/0x9d0 <4> [896.032995] i915_vma_pin+0xf3/0x11c0 [i915] <4> [896.033033] intel_renderstate_emit+0xb9/0x9e0 [i915] <4> [896.033081] i915_gem_init+0x5a9/0xa50 [i915] <4> [896.033112] i915_driver_probe+0xb00/0x15f0 [i915] <4> [896.033144] i915_pci_probe+0x43/0x1c0 [i915] <4> [896.033149] pci_device_probe+0x9e/0x120 <4> [896.033154] really_probe+0xea/0x420 <4> [896.033158] driver_probe_device+0x10b/0x120 <4> [896.033161] device_driver_attach+0x4a/0x50 <4> [896.033164] __driver_attach+0x97/0x130 <4> [896.033168] bus_for_each_dev+0x74/0xc0 <4> [896.033171] bus_add_driver+0x142/0x220 <4> [896.033174] driver_register+0x56/0xf0 <4> [896.033178] do_one_initcall+0x58/0x2ff <4> [896.033183] do_init_module+0x56/0x1f8 <4> [896.033187] load_module+0x243e/0x29f0 <4> [896.033190] __do_sys_finit_module+0xe9/0x110 <4> [896.033194] do_syscall_64+0x4f/0x210 <4> [896.033197] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4> [896.033200] -> #0 (&kernel#2){+.+.}: <4> [896.033206] __lock_acquire+0x1328/0x15d0 <4> [896.033209] lock_acquire+0xa7/0x1c0 <4> [896.033213] __mutex_lock+0x9a/0x9d0 <4> [896.033255] i915_request_create+0x16/0x1c0 [i915] <4> [896.033287] intel_engine_flush_barriers+0x4c/0x100 [i915] <4> [896.033327] ggtt_flush+0x37/0x60 [i915] <4> [896.033366] i915_gem_evict_something+0x46b/0x5a0 [i915] <4> [896.033407] i915_gem_gtt_insert+0x21d/0x6a0 [i915] <4> [896.033449] i915_vma_pin+0xb36/0x11c0 [i915] <4> [896.033488] gen6_ppgtt_pin+0xd5/0x170 [i915] <4> [896.033523] ring_context_pin+0x2e/0xc0 [i915] <4> [896.033554] __intel_context_do_pin+0x6b/0x190 [i915] <4> [896.033591] i915_gem_do_execbuffer+0x1814/0x26c0 [i915] <4> [896.033627] i915_gem_execbuffer2_ioctl+0x11b/0x460 [i915] <4> [896.033632] drm_ioctl_kernel+0xa7/0xf0 <4> [896.033635] drm_ioctl+0x2e1/0x390 <4> [896.033638] do_vfs_ioctl+0xa0/0x6f0 <4> [896.033641] ksys_ioctl+0x35/0x60 <4> [896.033644] __x64_sys_ioctl+0x11/0x20 <4> [896.033647] do_syscall_64+0x4f/0x210 <4> [896.033650] entry_SYSCALL_64_after_hwframe+0x49/0xbe Lift the object allocation and pin prior to the request construction. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Maarten Lankhorst Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191202204316.2665847-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_renderstate.c | 97 ++++++++++++--------- drivers/gpu/drm/i915/gt/intel_renderstate.h | 17 +++- drivers/gpu/drm/i915/i915_gem.c | 8 +- 3 files changed, 78 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c index c4edc35e7d890..5954ecc3207f2 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -29,16 +29,6 @@ #include "intel_renderstate.h" #include "intel_ring.h" -struct intel_renderstate { - const struct intel_renderstate_rodata *rodata; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - u32 batch_offset; - u32 batch_size; - u32 aux_offset; - u32 aux_size; -}; - static const struct intel_renderstate_rodata * render_state_get_rodata(const struct intel_engine_cs *engine) { @@ -84,11 +74,11 @@ static int render_state_setup(struct intel_renderstate *so, u32 *d; int ret; - ret = i915_gem_object_prepare_write(so->obj, &needs_clflush); + ret = i915_gem_object_prepare_write(so->vma->obj, &needs_clflush); if (ret) return ret; - d = kmap_atomic(i915_gem_object_get_dirty_page(so->obj, 0)); + d = kmap_atomic(i915_gem_object_get_dirty_page(so->vma->obj, 0)); while (i < rodata->batch_items) { u32 s = rodata->batch[i]; @@ -166,7 +156,7 @@ static int render_state_setup(struct intel_renderstate *so, ret = 0; out: - i915_gem_object_finish_access(so->obj); + i915_gem_object_finish_access(so->vma->obj); return ret; err: @@ -177,61 +167,84 @@ err: #undef OUT_BATCH -int intel_renderstate_emit(struct i915_request *rq) +int intel_renderstate_init(struct intel_renderstate *so, + struct intel_engine_cs *engine) { - struct intel_engine_cs *engine = rq->engine; - struct intel_renderstate so = {}; /* keep the compiler happy */ + struct drm_i915_gem_object *obj; int err; - so.rodata = render_state_get_rodata(engine); - if (!so.rodata) + memset(so, 0, sizeof(*so)); + + so->rodata = render_state_get_rodata(engine); + if (!so->rodata) return 0; - if (so.rodata->batch_items * 4 > PAGE_SIZE) + if (so->rodata->batch_items * 4 > PAGE_SIZE) return -EINVAL; - so.obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); - if (IS_ERR(so.obj)) - return PTR_ERR(so.obj); + obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); - so.vma = i915_vma_instance(so.obj, &engine->gt->ggtt->vm, NULL); - if (IS_ERR(so.vma)) { - err = PTR_ERR(so.vma); + so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); + if (IS_ERR(so->vma)) { + err = PTR_ERR(so->vma); goto err_obj; } - err = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH); if (err) goto err_vma; - err = render_state_setup(&so, rq->i915); + err = render_state_setup(so, engine->i915); if (err) goto err_unpin; + return 0; + +err_unpin: + i915_vma_unpin(so->vma); +err_vma: + i915_vma_close(so->vma); +err_obj: + i915_gem_object_put(obj); + so->vma = NULL; + return err; +} + +int intel_renderstate_emit(struct intel_renderstate *so, + struct i915_request *rq) +{ + struct intel_engine_cs *engine = rq->engine; + int err; + + if (!so->vma) + return 0; + err = engine->emit_bb_start(rq, - so.batch_offset, so.batch_size, + so->batch_offset, so->batch_size, I915_DISPATCH_SECURE); if (err) - goto err_unpin; + return err; - if (so.aux_size > 8) { + if (so->aux_size > 8) { err = engine->emit_bb_start(rq, - so.aux_offset, so.aux_size, + so->aux_offset, so->aux_size, I915_DISPATCH_SECURE); if (err) - goto err_unpin; + return err; } - i915_vma_lock(so.vma); - err = i915_request_await_object(rq, so.vma->obj, false); + i915_vma_lock(so->vma); + err = i915_request_await_object(rq, so->vma->obj, false); if (err == 0) - err = i915_vma_move_to_active(so.vma, rq, 0); - i915_vma_unlock(so.vma); -err_unpin: - i915_vma_unpin(so.vma); -err_vma: - i915_vma_close(so.vma); -err_obj: - i915_gem_object_put(so.obj); + err = i915_vma_move_to_active(so->vma, rq, 0); + i915_vma_unlock(so->vma); + return err; } + +void intel_renderstate_fini(struct intel_renderstate *so) +{ + i915_vma_unpin_and_release(&so->vma, 0); +} diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.h b/drivers/gpu/drm/i915/gt/intel_renderstate.h index 8d50791450542..5700be69a05a6 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.h +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.h @@ -27,6 +27,8 @@ #include struct i915_request; +struct intel_engine_cs; +struct i915_vma; struct intel_renderstate_rodata { const u32 *reloc; @@ -46,6 +48,19 @@ extern const struct intel_renderstate_rodata gen7_null_state; extern const struct intel_renderstate_rodata gen8_null_state; extern const struct intel_renderstate_rodata gen9_null_state; -int intel_renderstate_emit(struct i915_request *rq); +struct intel_renderstate { + const struct intel_renderstate_rodata *rodata; + struct i915_vma *vma; + u32 batch_offset; + u32 batch_size; + u32 aux_offset; + u32 aux_size; +}; + +int intel_renderstate_init(struct intel_renderstate *so, + struct intel_engine_cs *engine); +int intel_renderstate_emit(struct intel_renderstate *so, + struct i915_request *rq); +void intel_renderstate_fini(struct intel_renderstate *so); #endif /* _INTEL_RENDERSTATE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f7b5fe0f54246..bfdc8a9f82f80 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1086,9 +1086,14 @@ static int __intel_engines_record_defaults(struct intel_gt *gt) */ for_each_engine(engine, gt, id) { + struct intel_renderstate so; struct intel_context *ce; struct i915_request *rq; + err = intel_renderstate_init(&so, engine); + if (err) + goto out; + /* We must be able to switch to something! */ GEM_BUG_ON(!engine->kernel_context); engine->serial++; /* force the kernel context switch */ @@ -1111,13 +1116,14 @@ static int __intel_engines_record_defaults(struct intel_gt *gt) if (err) goto err_rq; - err = intel_renderstate_emit(rq); + err = intel_renderstate_emit(&so, rq); if (err) goto err_rq; err_rq: requests[id] = i915_request_get(rq); i915_request_add(rq); + intel_renderstate_fini(&so); if (err) goto out; } -- 2.39.5