]> git.baikalelectronics.ru Git - kernel.git/commitdiff
drm/i915/gem: Implement legacy MI_STORE_DATA_IMM
authorChris Wilson <chris@chris-wilson.co.uk>
Mon, 4 May 2020 14:06:29 +0000 (15:06 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Mon, 4 May 2020 14:15:04 +0000 (15:15 +0100)
The older arches did not convert MI_STORE_DATA_IMM to using the GTT, but
left them writing to a physical address. The notes suggest that the
primary reason would be so that the writes were cache coherent, as the
CPU cache uses physical tagging. As such we did not implement the
legacy variant of MI_STORE_DATA_IMM and so left all the relocations
synchronous -- but with a small function to convert from the vma address
into the physical address, we can implement asynchronous relocs on these
older arches, fixing up a few tests that require them.

In order to be able to test the legacy paths, refactor the gpu
relocations so that we can hook them up to a selftest.

v2: Use an array of offsets not enum labels for the selftest
v3: Refactor the common igt_hexdump()

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/757
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200504140629.28240-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c [new file with mode: 0644]
drivers/gpu/drm/i915/gt/selftest_lrc.c
drivers/gpu/drm/i915/i915_selftest.h
drivers/gpu/drm/i915/selftests/i915_live_selftests.h
drivers/gpu/drm/i915/selftests/i915_selftest.c

index 1c247ad0971a20f456d0663ab36e021c388a2ff0..966523a8503f1820ee974c199fc5595b4b74727a 100644 (file)
@@ -955,7 +955,7 @@ static void reloc_cache_init(struct reloc_cache *cache,
        cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
        cache->node.flags = 0;
        cache->rq = NULL;
-       cache->rq_size = 0;
+       cache->target = NULL;
 }
 
 static inline void *unmask_page(unsigned long p)
@@ -1325,7 +1325,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 
                ce = intel_context_create(engine);
                if (IS_ERR(ce)) {
-                       err = PTR_ERR(rq);
+                       err = PTR_ERR(ce);
                        goto err_unpin;
                }
 
@@ -1376,6 +1376,11 @@ out_pool:
        return err;
 }
 
+static bool reloc_can_use_engine(const struct intel_engine_cs *engine)
+{
+       return engine->class != VIDEO_DECODE_CLASS || !IS_GEN(engine->i915, 6);
+}
+
 static u32 *reloc_gpu(struct i915_execbuffer *eb,
                      struct i915_vma *vma,
                      unsigned int len)
@@ -1387,9 +1392,9 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
        if (unlikely(!cache->rq)) {
                struct intel_engine_cs *engine = eb->engine;
 
-               if (!intel_engine_can_store_dword(engine)) {
+               if (!reloc_can_use_engine(engine)) {
                        engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
-                       if (!engine || !intel_engine_can_store_dword(engine))
+                       if (!engine)
                                return ERR_PTR(-ENODEV);
                }
 
@@ -1435,91 +1440,138 @@ static inline bool use_reloc_gpu(struct i915_vma *vma)
        return !dma_resv_test_signaled_rcu(vma->resv, true);
 }
 
-static u64
-relocate_entry(struct i915_vma *vma,
-              const struct drm_i915_gem_relocation_entry *reloc,
-              struct i915_execbuffer *eb,
-              const struct i915_vma *target)
+static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset)
 {
-       u64 offset = reloc->offset;
-       u64 target_offset = relocation_target(reloc, target);
-       bool wide = eb->reloc_cache.use_64bit_reloc;
-       void *vaddr;
+       struct page *page;
+       unsigned long addr;
 
-       if (!eb->reloc_cache.vaddr && use_reloc_gpu(vma)) {
-               const unsigned int gen = eb->reloc_cache.gen;
-               unsigned int len;
-               u32 *batch;
-               u64 addr;
+       GEM_BUG_ON(vma->pages != vma->obj->mm.pages);
 
-               if (wide)
-                       len = offset & 7 ? 8 : 5;
-               else if (gen >= 4)
-                       len = 4;
-               else
-                       len = 3;
+       page = i915_gem_object_get_page(vma->obj, offset >> PAGE_SHIFT);
+       addr = PFN_PHYS(page_to_pfn(page));
+       GEM_BUG_ON(overflows_type(addr, u32)); /* expected dma32 */
 
-               batch = reloc_gpu(eb, vma, len);
-               if (IS_ERR(batch))
-                       goto repeat;
+       return addr + offset_in_page(offset);
+}
+
+static bool __reloc_entry_gpu(struct i915_execbuffer *eb,
+                             struct i915_vma *vma,
+                             u64 offset,
+                             u64 target_addr)
+{
+       const unsigned int gen = eb->reloc_cache.gen;
+       unsigned int len;
+       u32 *batch;
+       u64 addr;
+
+       if (gen >= 8)
+               len = offset & 7 ? 8 : 5;
+       else if (gen >= 4)
+               len = 4;
+       else
+               len = 3;
+
+       batch = reloc_gpu(eb, vma, len);
+       if (IS_ERR(batch))
+               return false;
+
+       addr = gen8_canonical_addr(vma->node.start + offset);
+       if (gen >= 8) {
+               if (offset & 7) {
+                       *batch++ = MI_STORE_DWORD_IMM_GEN4;
+                       *batch++ = lower_32_bits(addr);
+                       *batch++ = upper_32_bits(addr);
+                       *batch++ = lower_32_bits(target_addr);
+
+                       addr = gen8_canonical_addr(addr + 4);
 
-               addr = gen8_canonical_addr(vma->node.start + offset);
-               if (wide) {
-                       if (offset & 7) {
-                               *batch++ = MI_STORE_DWORD_IMM_GEN4;
-                               *batch++ = lower_32_bits(addr);
-                               *batch++ = upper_32_bits(addr);
-                               *batch++ = lower_32_bits(target_offset);
-
-                               addr = gen8_canonical_addr(addr + 4);
-
-                               *batch++ = MI_STORE_DWORD_IMM_GEN4;
-                               *batch++ = lower_32_bits(addr);
-                               *batch++ = upper_32_bits(addr);
-                               *batch++ = upper_32_bits(target_offset);
-                       } else {
-                               *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
-                               *batch++ = lower_32_bits(addr);
-                               *batch++ = upper_32_bits(addr);
-                               *batch++ = lower_32_bits(target_offset);
-                               *batch++ = upper_32_bits(target_offset);
-                       }
-               } else if (gen >= 6) {
                        *batch++ = MI_STORE_DWORD_IMM_GEN4;
-                       *batch++ = 0;
-                       *batch++ = addr;
-                       *batch++ = target_offset;
-               } else if (gen >= 4) {
-                       *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
-                       *batch++ = 0;
-                       *batch++ = addr;
-                       *batch++ = target_offset;
+                       *batch++ = lower_32_bits(addr);
+                       *batch++ = upper_32_bits(addr);
+                       *batch++ = upper_32_bits(target_addr);
                } else {
-                       *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
-                       *batch++ = addr;
-                       *batch++ = target_offset;
+                       *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
+                       *batch++ = lower_32_bits(addr);
+                       *batch++ = upper_32_bits(addr);
+                       *batch++ = lower_32_bits(target_addr);
+                       *batch++ = upper_32_bits(target_addr);
                }
-
-               goto out;
+       } else if (gen >= 6) {
+               *batch++ = MI_STORE_DWORD_IMM_GEN4;
+               *batch++ = 0;
+               *batch++ = addr;
+               *batch++ = target_addr;
+       } else if (IS_I965G(eb->i915)) {
+               *batch++ = MI_STORE_DWORD_IMM_GEN4;
+               *batch++ = 0;
+               *batch++ = vma_phys_addr(vma, offset);
+               *batch++ = target_addr;
+       } else if (gen >= 4) {
+               *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+               *batch++ = 0;
+               *batch++ = addr;
+               *batch++ = target_addr;
+       } else if (gen >= 3 &&
+                  !(IS_I915G(eb->i915) || IS_I915GM(eb->i915))) {
+               *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+               *batch++ = addr;
+               *batch++ = target_addr;
+       } else {
+               *batch++ = MI_STORE_DWORD_IMM;
+               *batch++ = vma_phys_addr(vma, offset);
+               *batch++ = target_addr;
        }
 
+       return true;
+}
+
+static bool reloc_entry_gpu(struct i915_execbuffer *eb,
+                           struct i915_vma *vma,
+                           u64 offset,
+                           u64 target_addr)
+{
+       if (eb->reloc_cache.vaddr)
+               return false;
+
+       if (!use_reloc_gpu(vma))
+               return false;
+
+       return __reloc_entry_gpu(eb, vma, offset, target_addr);
+}
+
+static u64
+relocate_entry(struct i915_vma *vma,
+              const struct drm_i915_gem_relocation_entry *reloc,
+              struct i915_execbuffer *eb,
+              const struct i915_vma *target)
+{
+       u64 target_addr = relocation_target(reloc, target);
+       u64 offset = reloc->offset;
+
+       if (!reloc_entry_gpu(eb, vma, offset, target_addr)) {
+               bool wide = eb->reloc_cache.use_64bit_reloc;
+               void *vaddr;
+
 repeat:
-       vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT);
-       if (IS_ERR(vaddr))
-               return PTR_ERR(vaddr);
+               vaddr = reloc_vaddr(vma->obj,
+                                   &eb->reloc_cache,
+                                   offset >> PAGE_SHIFT);
+               if (IS_ERR(vaddr))
+                       return PTR_ERR(vaddr);
 
-       clflush_write32(vaddr + offset_in_page(offset),
-                       lower_32_bits(target_offset),
-                       eb->reloc_cache.vaddr);
+               GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)));
+               clflush_write32(vaddr + offset_in_page(offset),
+                               lower_32_bits(target_addr),
+                               eb->reloc_cache.vaddr);
 
-       if (wide) {
-               offset += sizeof(u32);
-               target_offset >>= 32;
-               wide = false;
-               goto repeat;
+               if (wide) {
+                       offset += sizeof(u32);
+                       target_addr >>= 32;
+                       wide = false;
+                       goto repeat;
+               }
        }
 
-out:
        return target->node.start | UPDATE;
 }
 
@@ -3022,3 +3074,7 @@ end:;
        kvfree(exec2_list);
        return err;
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_execbuffer.c"
+#endif
index be268511cb6df0e9ef6e8a28f4f477de0853554c..8fe3ad2ee34e6458011b897f4ff863c3c3e0196b 100644 (file)
@@ -302,35 +302,6 @@ static void fill_scratch(struct tiled_blits *t, u32 *vaddr, u32 val)
        i915_gem_object_flush_map(t->scratch.vma->obj);
 }
 
-static void hexdump(const void *buf, size_t len)
-{
-       const size_t rowsize = 8 * sizeof(u32);
-       const void *prev = NULL;
-       bool skip = false;
-       size_t pos;
-
-       for (pos = 0; pos < len; pos += rowsize) {
-               char line[128];
-
-               if (prev && !memcmp(prev, buf + pos, rowsize)) {
-                       if (!skip) {
-                               pr_info("*\n");
-                               skip = true;
-                       }
-                       continue;
-               }
-
-               WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
-                                               rowsize, sizeof(u32),
-                                               line, sizeof(line),
-                                               false) >= sizeof(line));
-               pr_info("[%04zx] %s\n", pos, line);
-
-               prev = buf + pos;
-               skip = false;
-       }
-}
-
 static u64 swizzle_bit(unsigned int bit, u64 offset)
 {
        return (offset & BIT_ULL(bit)) >> (bit - 6);
@@ -426,7 +397,7 @@ static int verify_buffer(const struct tiled_blits *t,
                pr_err("Invalid %s tiling detected at (%d, %d), start_val %x\n",
                       repr_tiling(buf->tiling),
                       x, y, buf->start_val);
-               hexdump(vaddr, 4096);
+               igt_hexdump(vaddr, 4096);
        }
 
        i915_gem_object_unpin_map(buf->vma->obj);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
new file mode 100644 (file)
index 0000000..a49016f
--- /dev/null
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+
+#include "gt/intel_engine_pm.h"
+#include "selftests/igt_flush_test.h"
+
+static u64 read_reloc(const u32 *map, int x, const u64 mask)
+{
+       u64 reloc;
+
+       memcpy(&reloc, &map[x], sizeof(reloc));
+       return reloc & mask;
+}
+
+static int __igt_gpu_reloc(struct i915_execbuffer *eb,
+                          struct drm_i915_gem_object *obj)
+{
+       const unsigned int offsets[] = { 8, 3, 0 };
+       const u64 mask =
+               GENMASK_ULL(eb->reloc_cache.use_64bit_reloc ? 63 : 31, 0);
+       const u32 *map = page_mask_bits(obj->mm.mapping);
+       struct i915_request *rq;
+       struct i915_vma *vma;
+       int err;
+       int i;
+
+       vma = i915_vma_instance(obj, eb->context->vm, NULL);
+       if (IS_ERR(vma))
+               return PTR_ERR(vma);
+
+       err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+       if (err)
+               return err;
+
+       /* 8-Byte aligned */
+       if (!__reloc_entry_gpu(eb, vma,
+                              offsets[0] * sizeof(u32),
+                              0)) {
+               err = -EIO;
+               goto unpin_vma;
+       }
+
+       /* !8-Byte aligned */
+       if (!__reloc_entry_gpu(eb, vma,
+                              offsets[1] * sizeof(u32),
+                              1)) {
+               err = -EIO;
+               goto unpin_vma;
+       }
+
+       /* Skip to the end of the cmd page */
+       i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1;
+       i -= eb->reloc_cache.rq_size;
+       memset32(eb->reloc_cache.rq_cmd + eb->reloc_cache.rq_size,
+                MI_NOOP, i);
+       eb->reloc_cache.rq_size += i;
+
+       /* Force batch chaining */
+       if (!__reloc_entry_gpu(eb, vma,
+                              offsets[2] * sizeof(u32),
+                              2)) {
+               err = -EIO;
+               goto unpin_vma;
+       }
+
+       GEM_BUG_ON(!eb->reloc_cache.rq);
+       rq = i915_request_get(eb->reloc_cache.rq);
+       err = reloc_gpu_flush(&eb->reloc_cache);
+       if (err)
+               goto put_rq;
+       GEM_BUG_ON(eb->reloc_cache.rq);
+
+       err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2);
+       if (err) {
+               intel_gt_set_wedged(eb->engine->gt);
+               goto put_rq;
+       }
+
+       if (!i915_request_completed(rq)) {
+               pr_err("%s: did not wait for relocations!\n", eb->engine->name);
+               err = -EINVAL;
+               goto put_rq;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(offsets); i++) {
+               u64 reloc = read_reloc(map, offsets[i], mask);
+
+               if (reloc != i) {
+                       pr_err("%s[%d]: map[%d] %llx != %x\n",
+                              eb->engine->name, i, offsets[i], reloc, i);
+                       err = -EINVAL;
+               }
+       }
+       if (err)
+               igt_hexdump(map, 4096);
+
+put_rq:
+       i915_request_put(rq);
+unpin_vma:
+       i915_vma_unpin(vma);
+       return err;
+}
+
+static int igt_gpu_reloc(void *arg)
+{
+       struct i915_execbuffer eb;
+       struct drm_i915_gem_object *scratch;
+       int err = 0;
+       u32 *map;
+
+       eb.i915 = arg;
+
+       scratch = i915_gem_object_create_internal(eb.i915, 4096);
+       if (IS_ERR(scratch))
+               return PTR_ERR(scratch);
+
+       map = i915_gem_object_pin_map(scratch, I915_MAP_WC);
+       if (IS_ERR(map)) {
+               err = PTR_ERR(map);
+               goto err_scratch;
+       }
+
+       for_each_uabi_engine(eb.engine, eb.i915) {
+               reloc_cache_init(&eb.reloc_cache, eb.i915);
+               memset(map, POISON_INUSE, 4096);
+
+               intel_engine_pm_get(eb.engine);
+               eb.context = intel_context_create(eb.engine);
+               if (IS_ERR(eb.context)) {
+                       err = PTR_ERR(eb.context);
+                       goto err_pm;
+               }
+
+               err = intel_context_pin(eb.context);
+               if (err)
+                       goto err_put;
+
+               err = __igt_gpu_reloc(&eb, scratch);
+
+               intel_context_unpin(eb.context);
+err_put:
+               intel_context_put(eb.context);
+err_pm:
+               intel_engine_pm_put(eb.engine);
+               if (err)
+                       break;
+       }
+
+       if (igt_flush_test(eb.i915))
+               err = -EIO;
+
+err_scratch:
+       i915_gem_object_put(scratch);
+       return err;
+}
+
+int i915_gem_execbuffer_live_selftests(struct drm_i915_private *i915)
+{
+       static const struct i915_subtest tests[] = {
+               SUBTEST(igt_gpu_reloc),
+       };
+
+       if (intel_gt_is_wedged(&i915->gt))
+               return 0;
+
+       return i915_live_subtests(tests, i915);
+}
index 7529df92f6a291582253de5a7f15cee8967633bd..3ced73533f6b2cb9f6395a751ffea92a5e65d1de 100644 (file)
@@ -4342,35 +4342,6 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
        return intel_gt_live_subtests(tests, &i915->gt);
 }
 
-static void hexdump(const void *buf, size_t len)
-{
-       const size_t rowsize = 8 * sizeof(u32);
-       const void *prev = NULL;
-       bool skip = false;
-       size_t pos;
-
-       for (pos = 0; pos < len; pos += rowsize) {
-               char line[128];
-
-               if (prev && !memcmp(prev, buf + pos, rowsize)) {
-                       if (!skip) {
-                               pr_info("*\n");
-                               skip = true;
-                       }
-                       continue;
-               }
-
-               WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
-                                               rowsize, sizeof(u32),
-                                               line, sizeof(line),
-                                               false) >= sizeof(line));
-               pr_info("[%04zx] %s\n", pos, line);
-
-               prev = buf + pos;
-               skip = false;
-       }
-}
-
 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
 {
        const u32 offset =
@@ -4518,10 +4489,10 @@ static int live_lrc_layout(void *arg)
 
                if (err) {
                        pr_info("%s: HW register image:\n", engine->name);
-                       hexdump(hw, PAGE_SIZE);
+                       igt_hexdump(hw, PAGE_SIZE);
 
                        pr_info("%s: SW register image:\n", engine->name);
-                       hexdump(lrc, PAGE_SIZE);
+                       igt_hexdump(lrc, PAGE_SIZE);
                }
 
                shmem_unpin_map(engine->default_state, hw);
index 98bcb6fa0ab48af79056efb1d4ec31543de2a17a..d53d207ab6eb6de78217b1bd13f379df004cd6ed 100644 (file)
@@ -133,4 +133,6 @@ bool __igt_timeout(unsigned long timeout, const char *fmt, ...);
 #define igt_timeout(t, fmt, ...) \
        __igt_timeout((t), KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
 
+void igt_hexdump(const void *buf, size_t len);
+
 #endif /* !__I915_SELFTEST_H__ */
index 0a953bfc0585617a881fcde39b9e02982a23d350..5dd5d81646c4ac405d31694a8ec307f3762008b0 100644 (file)
@@ -37,6 +37,7 @@ selftest(gem, i915_gem_live_selftests)
 selftest(evict, i915_gem_evict_live_selftests)
 selftest(hugepages, i915_gem_huge_page_live_selftests)
 selftest(gem_contexts, i915_gem_context_live_selftests)
+selftest(gem_execbuf, i915_gem_execbuffer_live_selftests)
 selftest(blt, i915_gem_object_blt_live_selftests)
 selftest(client, i915_gem_client_blt_live_selftests)
 selftest(reset, intel_reset_live_selftests)
index d3bf9eefb6827ef53adfeac60c90c7cf75a4bdd9..1bc11c09faef53182e66d3ab2900dc7317e2e137 100644 (file)
@@ -396,6 +396,35 @@ bool __igt_timeout(unsigned long timeout, const char *fmt, ...)
        return true;
 }
 
+void igt_hexdump(const void *buf, size_t len)
+{
+       const size_t rowsize = 8 * sizeof(u32);
+       const void *prev = NULL;
+       bool skip = false;
+       size_t pos;
+
+       for (pos = 0; pos < len; pos += rowsize) {
+               char line[128];
+
+               if (prev && !memcmp(prev, buf + pos, rowsize)) {
+                       if (!skip) {
+                               pr_info("*\n");
+                               skip = true;
+                       }
+                       continue;
+               }
+
+               WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
+                                               rowsize, sizeof(u32),
+                                               line, sizeof(line),
+                                               false) >= sizeof(line));
+               pr_info("[%04zx] %s\n", pos, line);
+
+               prev = buf + pos;
+               skip = false;
+       }
+}
+
 module_param_named(st_random_seed, i915_selftest.random_seed, uint, 0400);
 module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400);
 module_param_named(st_filter, i915_selftest.filter, charp, 0400);