]> git.baikalelectronics.ru Git - kernel.git/commitdiff
drm/i915/dg2: Enable Wa_14014475959 - RCS / CCS context exit
authorMatthew Brost <matthew.brost@intel.com>
Fri, 15 Apr 2022 22:40:24 +0000 (15:40 -0700)
committerJohn Harrison <John.C.Harrison@Intel.com>
Tue, 19 Apr 2022 18:33:47 +0000 (11:33 -0700)
There is bug in DG2 where if the CCS contexts switches out while the RCS
is running it can cause memory corruption. To workaround this add an
atomic to a memory address with a value 1 and semaphore wait to the same
address for a value of 0. The GuC firmware is responsible for writing 0
to the memory address when it is safe for the context to switch out.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220415224025.3693037-6-umesh.nerlige.ramappa@intel.com
drivers/gpu/drm/i915/gt/gen8_engine_cs.c
drivers/gpu/drm/i915/gt/intel_engine_types.h
drivers/gpu/drm/i915/gt/intel_gpu_commands.h
drivers/gpu/drm/i915/gt/uc/intel_guc.c
drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c

index 047b5a710149c73fc5a6b94585dae381564995cf..9529c5455bc36acf3b6c63e0c2a43bf0f8e3af19 100644 (file)
@@ -583,6 +583,43 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs)
        return cs;
 }
 
+/* Wa_14014475959:dg2 */
+#define CCS_SEMAPHORE_PPHWSP_OFFSET    0x540
+static u32 ccs_semaphore_offset(struct i915_request *rq)
+{
+       return i915_ggtt_offset(rq->context->state) +
+               (LRC_PPHWSP_PN * PAGE_SIZE) + CCS_SEMAPHORE_PPHWSP_OFFSET;
+}
+
+/* Wa_14014475959:dg2 */
+static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs)
+{
+       int i;
+
+       *cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL |
+               MI_ATOMIC_MOVE;
+       *cs++ = ccs_semaphore_offset(rq);
+       *cs++ = 0;
+       *cs++ = 1;
+
+       /*
+        * When MI_ATOMIC_INLINE_DATA set this command must be 11 DW + (1 NOP)
+        * to align. 4 DWs above + 8 filler DWs here.
+        */
+       for (i = 0; i < 8; ++i)
+               *cs++ = 0;
+
+       *cs++ = MI_SEMAPHORE_WAIT |
+               MI_SEMAPHORE_GLOBAL_GTT |
+               MI_SEMAPHORE_POLL |
+               MI_SEMAPHORE_SAD_EQ_SDD;
+       *cs++ = 0;
+       *cs++ = ccs_semaphore_offset(rq);
+       *cs++ = 0;
+
+       return cs;
+}
+
 static __always_inline u32*
 gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
 {
@@ -593,6 +630,10 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
            !intel_uc_uses_guc_submission(&rq->engine->gt->uc))
                cs = gen12_emit_preempt_busywait(rq, cs);
 
+       /* Wa_14014475959:dg2 */
+       if (intel_engine_uses_wa_hold_ccs_switchout(rq->engine))
+               cs = ccs_emit_wa_busywait(rq, cs);
+
        rq->tail = intel_ring_offset(rq, cs);
        assert_ring_tail_valid(rq->ring, rq->tail);
 
index eac20112709c5c1656fe2e68bd25e931b6555479..298f2cc7a879f402fb6bf732179e1aab905ad5b6 100644 (file)
@@ -529,6 +529,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_HAS_RCS_REG_STATE  BIT(9)
 #define I915_ENGINE_HAS_EU_PRIORITY    BIT(10)
 #define I915_ENGINE_FIRST_RENDER_COMPUTE BIT(11)
+#define I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT BIT(12)
        unsigned int flags;
 
        /*
@@ -629,6 +630,13 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
        return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO;
 }
 
+/* Wa_14014475959:dg2 */
+static inline bool
+intel_engine_uses_wa_hold_ccs_switchout(struct intel_engine_cs *engine)
+{
+       return engine->flags & I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
+}
+
 #define instdone_has_slice(dev_priv___, sseu___, slice___) \
        ((GRAPHICS_VER(dev_priv___) == 7 ? 1 : ((sseu___)->slice_mask)) & BIT(slice___))
 
index 724ab069ddb6145af7de8f3bc52457ad86baa000..e52718a87f143a599cb13a3f07b415c4e0197b74 100644 (file)
 #define   MI_MEM_VIRTUAL       (1 << 22) /* 945,g33,965 */
 #define   MI_USE_GGTT          (1 << 22) /* g4x+ */
 #define MI_STORE_DWORD_INDEX   MI_INSTR(0x21, 1)
+#define MI_ATOMIC              MI_INSTR(0x2f, 1)
+#define MI_ATOMIC_INLINE       (MI_INSTR(0x2f, 9) | MI_ATOMIC_INLINE_DATA)
+#define   MI_ATOMIC_GLOBAL_GTT         (1 << 22)
+#define   MI_ATOMIC_INLINE_DATA                (1 << 18)
+#define   MI_ATOMIC_CS_STALL           (1 << 17)
+#define          MI_ATOMIC_MOVE                (0x4 << 8)
+
 /*
  * Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM:
  * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw
index 830889349756a02fe0a5fdf1356d3912971fafa4..228070e31ef00301a353a92957dc0baa2222ab29 100644 (file)
@@ -296,6 +296,10 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
        if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
                flags |= GUC_WA_GAM_CREDITS;
 
+       /* Wa_14014475959:dg2 */
+       if (IS_DG2(gt->i915))
+               flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
+
        /*
         * Wa_14012197797:dg2_g10:a0,dg2_g11:a0
         * Wa_22011391025:dg2_g10,dg2_g11,dg2_g12
index 126e67ea1619b1432af427b50315eda38fec630e..e389a3a041a2114f38093e857efcc438e2ea1413 100644 (file)
 #define   GUC_WA_DUAL_QUEUE            BIT(11)
 #define   GUC_WA_RCS_RESET_BEFORE_RC6  BIT(13)
 #define   GUC_WA_PRE_PARSER            BIT(14)
+#define   GUC_WA_HOLD_CCS_SWITCHOUT    BIT(17)
 #define   GUC_WA_POLLCS                        BIT(18)
 
 #define GUC_CTL_FEATURE                        2
index b27df3fe70b6a1b717fbab1a2e05f6a6bb99f15b..61a6f2424e2412b85d2e3afcf126edf987960390 100644 (file)
@@ -3897,6 +3897,10 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
        engine->flags |= I915_ENGINE_HAS_PREEMPTION;
        engine->flags |= I915_ENGINE_HAS_TIMESLICES;
 
+       /* Wa_14014475959:dg2 */
+       if (IS_DG2(engine->i915) && engine->class == COMPUTE_CLASS)
+               engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
+
        /*
         * TODO: GuC supports timeslicing and semaphores as well, but they're
         * handled by the firmware so some minor tweaks are required before