]> git.baikalelectronics.ru Git - kernel.git/commitdiff
drm/amdgpu: Fix per-IB secure flag GFX hang
authorHuang Rui <ray.huang@amd.com>
Mon, 9 Mar 2020 18:52:06 +0000 (14:52 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 28 Apr 2020 20:20:30 +0000 (16:20 -0400)
Since commit "Move to a per-IB secure flag (TMZ)",
we've been seeing hangs in GFX. We need to send
FRAME CONTROL stop/start back-to-back, every time
we flip the TMZ flag. That is, when we transition
from TMZ to non-TMZ we have to send a stop with
TMZ followed by a start with non-TMZ, and
similarly for transitioning from non-TMZ into TMZ.

This patch implements this, thus fixing the GFX
hang.

v1 -> v2:
As suggested by Luben, and accept part of implemetation from this patch:
- Put "secure" closed to the loop and use optimization
- Change "secure" to bool again, and move "secure == -1" out of loop.
v3: Small fixes/optimizations.

Reported-and-Tested-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

index cba22039df6c530fee8f7244eaef0bc41207c103..24ae9f6c4255c66a920cb77de14cde1f00921e28 100644 (file)
@@ -218,7 +218,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
                amdgpu_ring_emit_cntxcntl(ring, status);
        }
 
+       /* Setup initial TMZiness and send it off.
+        */
        secure = false;
+       if (job && ring->funcs->emit_frame_cntl) {
+               secure = ib->flags & AMDGPU_IB_FLAGS_SECURE;
+               amdgpu_ring_emit_frame_cntl(ring, true, secure);
+       }
+
        for (i = 0; i < num_ibs; ++i) {
                ib = &ibs[i];
 
@@ -230,27 +237,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
                    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
                        continue;
 
-               /* If this IB is TMZ, add frame TMZ start packet,
-                * else, turn off TMZ.
-                */
-               if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
-                       if (!secure) {
-                               secure = true;
-                               amdgpu_ring_emit_tmz(ring, true);
+               if (job && ring->funcs->emit_frame_cntl) {
+                       if (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
+                               amdgpu_ring_emit_frame_cntl(ring, false, secure);
+                               secure = !secure;
+                               amdgpu_ring_emit_frame_cntl(ring, true, secure);
                        }
-               } else if (secure) {
-                       secure = false;
-                       amdgpu_ring_emit_tmz(ring, false);
                }
 
                amdgpu_ring_emit_ib(ring, job, ib, status);
                status &= ~AMDGPU_HAVE_CTX_SWITCH;
        }
 
-       if (secure) {
-               secure = false;
-               amdgpu_ring_emit_tmz(ring, false);
-       }
+       if (job && ring->funcs->emit_frame_cntl)
+               amdgpu_ring_emit_frame_cntl(ring, false, secure);
 
 #ifdef CONFIG_X86_64
        if (!(adev->flags & AMD_IS_APU))
index 7d39064f936175c5a2c4f00e4cd3e4aaa9f945a8..7390261095b7f71a192e21005fae04801856cf69 100644 (file)
@@ -177,7 +177,8 @@ struct amdgpu_ring_funcs {
        void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
                                        uint32_t reg0, uint32_t reg1,
                                        uint32_t ref, uint32_t mask);
-       void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
+       void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
+                               bool secure);
        /* Try to soft recover the ring to make the fence signal */
        void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
        int (*preempt_ib)(struct amdgpu_ring *ring);
@@ -256,7 +257,7 @@ struct amdgpu_ring {
 #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
 #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
 #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
-#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
+#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
 #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
index 404c6d470515ba7b58ad6708561840ba25b496b3..9fe20b782e88d0fe30150eab8134746a404a3096 100644 (file)
@@ -3037,7 +3037,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
 static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
 static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
 static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
-static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
+static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
 
 static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
 {
@@ -7599,12 +7599,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
                                           sizeof(de_payload) >> 2);
 }
 
-static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
+static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
+                                   bool secure)
 {
-       if (amdgpu_is_tmz(ring->adev)) {
-               amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
-               amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
-       }
+       uint32_t v = secure ? FRAME_TMZ : 0;
+
+       amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+       amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
 }
 
 static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
@@ -8058,7 +8059,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
        .init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
        .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
        .preempt_ib = gfx_v10_0_ring_preempt_ib,
-       .emit_tmz = gfx_v10_0_ring_emit_tmz,
+       .emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
        .emit_wreg = gfx_v10_0_ring_emit_wreg,
        .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
        .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
index 4e042e974983a0e49dac3c38553a1e05d817723f..eedb92218ba580204b8df1080a64072b7c4b5d81 100644 (file)
@@ -5442,12 +5442,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
        amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
 }
 
-static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
+static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
+                                  bool secure)
 {
-       if (amdgpu_is_tmz(ring->adev)) {
-               amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
-               amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
-       }
+       uint32_t v = secure ? FRAME_TMZ : 0;
+
+       amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+       amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
 }
 
 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
@@ -6699,7 +6700,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
        .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
        .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
        .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
-       .emit_tmz = gfx_v9_0_ring_emit_tmz,
+       .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
        .emit_wreg = gfx_v9_0_ring_emit_wreg,
        .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
        .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,