From: Chris Wilson Date: Thu, 16 Jul 2020 10:07:54 +0000 (+0100) Subject: drm/i915: Reduce i915_request.lock contention for i915_request_wait X-Git-Url: https://git.baikalelectronics.ru/sdk/?a=commitdiff_plain;h=3f6a6f343c57a773ed146e54de8c626f72dd2be7;p=kernel.git drm/i915: Reduce i915_request.lock contention for i915_request_wait Currently, we use i915_request_completed() directly in i915_request_wait() and follow up with a manual invocation of dma_fence_signal(). This appears to cause a large number of contentions on i915_request.lock as when the process is woken up after the fence is signaled by an interrupt, we will then try and call dma_fence_signal() ourselves while the signaler is still holding the lock. dma_fence_is_signaled() has the benefit of checking the DMA_FENCE_FLAG_SIGNALED_BIT prior to calling dma_fence_signal() and so avoids most of that contention. Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200716100754.5670-1-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi Signed-off-by: Joonas Lahtinen --- diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 7a05850ca9318..0ec0beb1a83e5 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1640,7 +1640,7 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu) return this_cpu != cpu; } -static bool __i915_spin_request(const struct i915_request * const rq, int state) +static bool __i915_spin_request(struct i915_request * const rq, int state) { unsigned long timeout_ns; unsigned int cpu; @@ -1673,7 +1673,7 @@ static bool __i915_spin_request(const struct i915_request * const rq, int state) timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns); timeout_ns += local_clock_ns(&cpu); do { - if (i915_request_completed(rq)) + if (dma_fence_is_signaled(&rq->fence)) return true; if (signal_pending_state(state, current)) @@ -1697,7 +1697,7 @@ static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb) { struct request_wait *wait = container_of(cb, typeof(*wait), cb); - wake_up_process(wait->tsk); + wake_up_process(fetch_and_zero(&wait->tsk)); } /** @@ -1766,10 +1766,8 @@ long i915_request_wait(struct i915_request *rq, * duration, which we currently lack. */ if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) && - __i915_spin_request(rq, state)) { - dma_fence_signal(&rq->fence); + __i915_spin_request(rq, state)) goto out; - } /* * This client is about to stall waiting for the GPU. In many cases @@ -1793,10 +1791,8 @@ long i915_request_wait(struct i915_request *rq, for (;;) { set_current_state(state); - if (i915_request_completed(rq)) { - dma_fence_signal(&rq->fence); + if (dma_fence_is_signaled(&rq->fence)) break; - } intel_engine_flush_submission(rq->engine); @@ -1814,7 +1810,9 @@ long i915_request_wait(struct i915_request *rq, } __set_current_state(TASK_RUNNING); - dma_fence_remove_callback(&rq->fence, &wait.cb); + if (READ_ONCE(wait.tsk)) + dma_fence_remove_callback(&rq->fence, &wait.cb); + GEM_BUG_ON(!list_empty(&wait.cb.node)); out: mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_);