]> git.baikalelectronics.ru Git - kernel.git/commitdiff
drm/i915: Push the wakeref->count deferral to the backend
authorChris Wilson <chris@chris-wilson.co.uk>
Tue, 13 Aug 2019 19:07:05 +0000 (20:07 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Tue, 13 Aug 2019 20:09:49 +0000 (21:09 +0100)
If the backend wishes to defer the wakeref parking, make it responsible
for unlocking the wakeref (i.e. bumping the counter). This allows it to
time the unlock much more carefully in case it happens to needs the
wakeref to be active during its deferral.

For instance, during engine parking we may choose to emit an idle
barrier (a request). To do so, we borrow the engine->kernel_context
timeline and to ensure exclusive access we keep the
engine->wakeref.count as 0. However, to submit that request to HW may
require a intel_engine_pm_get() (e.g. to keep the submission tasklet
alive) and before we allow that we have to rewake our wakeref to avoid a
recursive deadlock.

<4> [257.742916] IRQs not enabled as expected
<4> [257.742930] WARNING: CPU: 0 PID: 0 at kernel/softirq.c:169 __local_bh_enable_ip+0xa9/0x100
<4> [257.742936] Modules linked in: vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic i915 btusb btrtl btbcm btintel snd_hda_intel snd_intel_nhlt bluetooth snd_hda_codec coretemp snd_hwdep crct10dif_pclmul snd_hda_core crc32_pclmul ecdh_generic ecc ghash_clmulni_intel snd_pcm r8169 realtek lpc_ich prime_numbers i2c_hid
<4> [257.742991] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G     U  W         5.3.0-rc3-g5d0a06cd532c-drmtip_340+ #1
<4> [257.742998] Hardware name: GIGABYTE GB-BXBT-1900/MZBAYAB-00, BIOS F6 02/17/2015
<4> [257.743008] RIP: 0010:__local_bh_enable_ip+0xa9/0x100
<4> [257.743017] Code: 37 5b 5d c3 8b 80 50 08 00 00 85 c0 75 a9 80 3d 0b be 25 01 00 75 a0 48 c7 c7 f3 0c 06 ac c6 05 fb bd 25 01 01 e8 77 84 ff ff <0f> 0b eb 89 48 89 ef e8 3b 41 06 00 eb 98 e8 e4 5c f4 ff 5b 5d c3
<4> [257.743025] RSP: 0018:ffffa78600003cb8 EFLAGS: 00010086
<4> [257.743035] RAX: 0000000000000000 RBX: 0000000000000200 RCX: 0000000000010302
<4> [257.743042] RDX: 0000000080010302 RSI: 0000000000000000 RDI: 00000000ffffffff
<4> [257.743050] RBP: ffffffffc0494bb3 R08: 0000000000000000 R09: 0000000000000001
<4> [257.743058] R10: 0000000014c8f0e9 R11: 00000000fee2ff8e R12: ffffa23ba8c38008
<4> [257.743065] R13: ffffa23bacc579c0 R14: ffffa23bb7db0f60 R15: ffffa23b9cc8c430
<4> [257.743074] FS:  0000000000000000(0000) GS:ffffa23bbba00000(0000) knlGS:0000000000000000
<4> [257.743082] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4> [257.743089] CR2: 00007fe477b20778 CR3: 000000011f72a000 CR4: 00000000001006f0
<4> [257.743096] Call Trace:
<4> [257.743104]  <IRQ>
<4> [257.743265]  __i915_request_commit+0x240/0x5d0 [i915]
<4> [257.743427]  ? __i915_request_create+0x228/0x4c0 [i915]
<4> [257.743584]  __engine_park+0x64/0x250 [i915]
<4> [257.743730]  ____intel_wakeref_put_last+0x1c/0x70 [i915]
<4> [257.743878]  i915_sample+0x2ee/0x310 [i915]
<4> [257.744030]  ? i915_pmu_cpu_offline+0xb0/0xb0 [i915]
<4> [257.744040]  __hrtimer_run_queues+0x11e/0x4b0
<4> [257.744068]  hrtimer_interrupt+0xea/0x250
<4> [257.744079]  ? lockdep_hardirqs_off+0x79/0xd0
<4> [257.744101]  smp_apic_timer_interrupt+0x96/0x280
<4> [257.744114]  apic_timer_interrupt+0xf/0x20
<4> [257.744125] RIP: 0010:__do_softirq+0xb3/0x4ae

v2: Keep the priority_hint assert
v3: That assert was desperately trying to point out my bug. Sorry, little
assert.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111378
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190813190705.23869-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/gt/intel_engine_pm.c
drivers/gpu/drm/i915/i915_priolist_types.h
drivers/gpu/drm/i915/i915_request.c
drivers/gpu/drm/i915/i915_request.h
drivers/gpu/drm/i915/i915_scheduler.c
drivers/gpu/drm/i915/intel_wakeref.c
drivers/gpu/drm/i915/intel_wakeref.h

index 6b15e3335dd6e6ac505be14904ba17a533a36b7e..49ad02c3720fccc9d83bb590d9cdbc0af2b91da9 100644 (file)
@@ -68,10 +68,16 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 
        /* Check again on the next retirement. */
        engine->wakeref_serial = engine->serial + 1;
-
        i915_request_add_active_barriers(rq);
+
+       /* Install ourselves as a preemption barrier */
+       rq->sched.attr.priority = I915_PRIORITY_UNPREEMPTABLE;
        __i915_request_commit(rq);
 
+       /* Release our exclusive hold on the engine */
+       __intel_wakeref_defer_park(&engine->wakeref);
+       __i915_request_queue(rq, NULL);
+
        return false;
 }
 
index b02dea17dcab5bbfcc1a32e92eac99793be09972..21037a2e20389d87e1c12be38702eee66623b212 100644 (file)
@@ -16,18 +16,6 @@ enum {
        I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1,
        I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY,
        I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1,
-
-       /*
-        * Requests containing performance queries must not be preempted by
-        * another context. They get scheduled with their default priority and
-        * once they reach the execlist ports we ensure that they stick on the
-        * HW until finished by pretending that they have maximum priority,
-        * i.e. nothing can have higher priority and force us to usurp the
-        * active request.
-        */
-       I915_PRIORITY_UNPREEMPTABLE = INT_MAX,
-
-       I915_PRIORITY_INVALID = INT_MIN
 };
 
 #define I915_USER_PRIORITY_SHIFT 2
@@ -39,6 +27,19 @@ enum {
 #define I915_PRIORITY_WAIT             ((u8)BIT(0))
 #define I915_PRIORITY_NOSEMAPHORE      ((u8)BIT(1))
 
+/* Smallest priority value that cannot be bumped. */
+#define I915_PRIORITY_INVALID (INT_MIN | (u8)I915_PRIORITY_MASK)
+
+/*
+ * Requests containing performance queries must not be preempted by
+ * another context. They get scheduled with their default priority and
+ * once they reach the execlist ports we ensure that they stick on the
+ * HW until finished by pretending that they have maximum priority,
+ * i.e. nothing can have higher priority and force us to usurp the
+ * active request.
+ */
+#define I915_PRIORITY_UNPREEMPTABLE INT_MAX
+
 #define __NO_PREEMPTION (I915_PRIORITY_WAIT)
 
 struct i915_priolist {
index 43175bada09ebdb60ca8660bca037785b3510567..4703aab3ae21e7e733b8439e51294467c2938abe 100644 (file)
@@ -1186,6 +1186,12 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
                list_add(&ring->active_link, &rq->i915->gt.active_rings);
        rq->emitted_jiffies = jiffies;
 
+       return prev;
+}
+
+void __i915_request_queue(struct i915_request *rq,
+                         const struct i915_sched_attr *attr)
+{
        /*
         * Let the backend know a new request has arrived that may need
         * to adjust the existing execution schedule due to a high priority
@@ -1199,43 +1205,15 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
         */
        local_bh_disable();
        i915_sw_fence_commit(&rq->semaphore);
-       if (engine->schedule) {
-               struct i915_sched_attr attr = rq->gem_context->sched;
-
-               /*
-                * Boost actual workloads past semaphores!
-                *
-                * With semaphores we spin on one engine waiting for another,
-                * simply to reduce the latency of starting our work when
-                * the signaler completes. However, if there is any other
-                * work that we could be doing on this engine instead, that
-                * is better utilisation and will reduce the overall duration
-                * of the current work. To avoid PI boosting a semaphore
-                * far in the distance past over useful work, we keep a history
-                * of any semaphore use along our dependency chain.
-                */
-               if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
-                       attr.priority |= I915_PRIORITY_NOSEMAPHORE;
-
-               /*
-                * Boost priorities to new clients (new request flows).
-                *
-                * Allow interactive/synchronous clients to jump ahead of
-                * the bulk clients. (FQ_CODEL)
-                */
-               if (list_empty(&rq->sched.signalers_list))
-                       attr.priority |= I915_PRIORITY_WAIT;
-
-               engine->schedule(rq, &attr);
-       }
+       if (attr && rq->engine->schedule)
+               rq->engine->schedule(rq, attr);
        i915_sw_fence_commit(&rq->submit);
        local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
-
-       return prev;
 }
 
 void i915_request_add(struct i915_request *rq)
 {
+       struct i915_sched_attr attr = rq->gem_context->sched;
        struct i915_request *prev;
 
        lockdep_assert_held(&rq->timeline->mutex);
@@ -1245,6 +1223,32 @@ void i915_request_add(struct i915_request *rq)
 
        prev = __i915_request_commit(rq);
 
+       /*
+        * Boost actual workloads past semaphores!
+        *
+        * With semaphores we spin on one engine waiting for another,
+        * simply to reduce the latency of starting our work when
+        * the signaler completes. However, if there is any other
+        * work that we could be doing on this engine instead, that
+        * is better utilisation and will reduce the overall duration
+        * of the current work. To avoid PI boosting a semaphore
+        * far in the distance past over useful work, we keep a history
+        * of any semaphore use along our dependency chain.
+        */
+       if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
+               attr.priority |= I915_PRIORITY_NOSEMAPHORE;
+
+       /*
+        * Boost priorities to new clients (new request flows).
+        *
+        * Allow interactive/synchronous clients to jump ahead of
+        * the bulk clients. (FQ_CODEL)
+        */
+       if (list_empty(&rq->sched.signalers_list))
+               attr.priority |= I915_PRIORITY_WAIT;
+
+       __i915_request_queue(rq, &attr);
+
        /*
         * In typical scenarios, we do not expect the previous request on
         * the timeline to be still tracked by timeline->last_request if it
index 313df3c371584dc3b1331cf9e7885849e720bc8e..fec1d5f17c9436eb3c7702dddacecbcb46397e05 100644 (file)
@@ -251,6 +251,8 @@ struct i915_request * __must_check
 i915_request_create(struct intel_context *ce);
 
 struct i915_request *__i915_request_commit(struct i915_request *request);
+void __i915_request_queue(struct i915_request *rq,
+                         const struct i915_sched_attr *attr);
 
 void i915_request_retire_upto(struct i915_request *rq);
 
index 0bd452e851d8de6fa46a00ac7427b3c52b5e92c5..7b84ebca29011f32df0df4e4746ea77857a67fc6 100644 (file)
@@ -349,8 +349,7 @@ void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
        unsigned long flags;
 
        GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
-
-       if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID)
+       if (READ_ONCE(rq->sched.attr.priority) & bump)
                return;
 
        spin_lock_irqsave(&schedule_lock, flags);
index d4443e81c1c8957910c385764284052bc62f4e44..868cc78048d00a6bf5bb716168559fe08aacb587 100644 (file)
@@ -57,12 +57,10 @@ static void ____intel_wakeref_put_last(struct intel_wakeref *wf)
        if (!atomic_dec_and_test(&wf->count))
                goto unlock;
 
+       /* ops->put() must reschedule its own release on error/deferral */
        if (likely(!wf->ops->put(wf))) {
                rpm_put(wf);
                wake_up_var(&wf->wakeref);
-       } else {
-               /* ops->put() must schedule its own release on deferral */
-               atomic_set_release(&wf->count, 1);
        }
 
 unlock:
index 535a3a12864bbb2fc3d9224e8d958bd45abc2f6f..5f0c972a80fb433d08b474fdb21e7da7b0d589ae 100644 (file)
@@ -163,6 +163,17 @@ intel_wakeref_is_active(const struct intel_wakeref *wf)
        return READ_ONCE(wf->wakeref);
 }
 
+/**
+ * __intel_wakeref_defer_park: Defer the current park callback
+ * @wf: the wakeref
+ */
+static inline void
+__intel_wakeref_defer_park(struct intel_wakeref *wf)
+{
+       INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count));
+       atomic_set_release(&wf->count, 1);
+}
+
 /**
  * intel_wakeref_wait_for_idle: Wait until the wakeref is idle
  * @wf: the wakeref