gt/intel_ringbuffer.o \
gt/intel_mocs.o \
gt/intel_sseu.o \
+ gt/intel_timeline.o \
gt/intel_workarounds.o
gt-$(CONFIG_DRM_I915_SELFTEST) += \
gt/mock_engine.o
i915_query.o \
i915_request.o \
i915_scheduler.o \
- i915_timeline.o \
i915_trace_points.o \
i915_vma.o \
intel_wopcm.o
i915_priolist_types.h \
i915_reg.h \
i915_scheduler_types.h \
- i915_timeline_types.h \
i915_utils.h \
intel_csr.h \
intel_drv.h \
mutex_destroy(&ctx->engines_mutex);
if (ctx->timeline)
- i915_timeline_put(ctx->timeline);
+ intel_timeline_put(ctx->timeline);
kfree(ctx->name);
put_pid(ctx->pid);
}
if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
- struct i915_timeline *timeline;
+ struct intel_timeline *timeline;
- timeline = i915_timeline_create(&dev_priv->gt, NULL);
+ timeline = intel_timeline_create(&dev_priv->gt, NULL);
if (IS_ERR(timeline)) {
context_close(ctx);
return ERR_CAST(timeline);
GEM_BUG_ON(src->timeline == dst->timeline);
if (dst->timeline)
- i915_timeline_put(dst->timeline);
- dst->timeline = i915_timeline_get(src->timeline);
+ intel_timeline_put(dst->timeline);
+ dst->timeline = intel_timeline_get(src->timeline);
}
return 0;
struct drm_i915_private;
struct drm_i915_file_private;
struct i915_address_space;
-struct i915_timeline;
+struct intel_timeline;
struct intel_ring;
struct i915_gem_engines {
struct i915_gem_engines __rcu *engines;
struct mutex engines_mutex; /* guards writes to engines */
- struct i915_timeline *timeline;
+ struct intel_timeline *timeline;
/**
* @vm: unique address space (GTT)
i915_gem_batch_pool_fini(&engine->batch_pool);
}
- i915_timelines_park(i915);
+ intel_timelines_park(i915);
i915_vma_parked(i915);
i915_globals_park();
#include "i915_reg.h"
#include "i915_request.h"
#include "i915_selftest.h"
-#include "i915_timeline.h"
+#include "gt/intel_timeline.h"
#include "intel_engine_types.h"
#include "intel_gpu_commands.h"
#include "intel_workarounds.h"
struct intel_ring *
intel_engine_create_ring(struct intel_engine_cs *engine,
- struct i915_timeline *timeline,
+ struct intel_timeline *timeline,
int size);
int intel_ring_pin(struct intel_ring *ring);
void intel_ring_reset(struct intel_ring *ring, u32 tail);
struct measure_breadcrumb {
struct i915_request rq;
- struct i915_timeline timeline;
+ struct intel_timeline timeline;
struct intel_ring ring;
u32 cs[1024];
};
if (!frame)
return -ENOMEM;
- if (i915_timeline_init(&frame->timeline,
- engine->gt,
- engine->status_page.vma))
+ if (intel_timeline_init(&frame->timeline,
+ engine->gt,
+ engine->status_page.vma))
goto out_frame;
INIT_LIST_HEAD(&frame->ring.request_list);
frame->rq.ring = &frame->ring;
frame->rq.timeline = &frame->timeline;
- dw = i915_timeline_pin(&frame->timeline);
+ dw = intel_timeline_pin(&frame->timeline);
if (dw < 0)
goto out_timeline;
dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */
- i915_timeline_unpin(&frame->timeline);
+ intel_timeline_unpin(&frame->timeline);
out_timeline:
- i915_timeline_fini(&frame->timeline);
+ intel_timeline_fini(&frame->timeline);
out_frame:
kfree(frame);
return dw;
#include "i915_pmu.h"
#include "i915_priolist_types.h"
#include "i915_selftest.h"
-#include "i915_timeline_types.h"
+#include "gt/intel_timeline_types.h"
#include "intel_sseu.h"
#include "intel_wakeref.h"
#include "intel_workarounds_types.h"
struct i915_vma *vma;
void *vaddr;
- struct i915_timeline *timeline;
+ struct intel_timeline *timeline;
struct list_head request_list;
struct list_head active_link;
return ret;
}
-static struct i915_timeline *
+static struct intel_timeline *
get_timeline(struct i915_gem_context *ctx, struct intel_gt *gt)
{
if (ctx->timeline)
- return i915_timeline_get(ctx->timeline);
+ return intel_timeline_get(ctx->timeline);
else
- return i915_timeline_create(gt, NULL);
+ return intel_timeline_create(gt, NULL);
}
static int execlists_context_deferred_alloc(struct intel_context *ce,
struct i915_vma *vma;
u32 context_size;
struct intel_ring *ring;
- struct i915_timeline *timeline;
+ struct intel_timeline *timeline;
int ret;
if (ce->state)
ring = intel_engine_create_ring(engine,
timeline,
ce->gem_context->ring_size);
- i915_timeline_put(timeline);
+ intel_timeline_put(timeline);
if (IS_ERR(ring)) {
ret = PTR_ERR(ring);
goto error_deref_obj;
static bool __i915_gem_unset_wedged(struct drm_i915_private *i915)
{
struct i915_gpu_error *error = &i915->gpu_error;
- struct i915_timeline *tl;
+ struct intel_timeline *tl;
if (!test_bit(I915_WEDGED, &error->flags))
return true;
if (atomic_fetch_inc(&ring->pin_count))
return 0;
- ret = i915_timeline_pin(ring->timeline);
+ ret = intel_timeline_pin(ring->timeline);
if (ret)
goto err_unpin;
err_ring:
i915_vma_unpin(vma);
err_timeline:
- i915_timeline_unpin(ring->timeline);
+ intel_timeline_unpin(ring->timeline);
err_unpin:
atomic_dec(&ring->pin_count);
return ret;
ring->vma->obj->pin_global--;
i915_vma_unpin(ring->vma);
- i915_timeline_unpin(ring->timeline);
+ intel_timeline_unpin(ring->timeline);
}
static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
struct intel_ring *
intel_engine_create_ring(struct intel_engine_cs *engine,
- struct i915_timeline *timeline,
+ struct intel_timeline *timeline,
int size)
{
struct drm_i915_private *i915 = engine->i915;
kref_init(&ring->ref);
INIT_LIST_HEAD(&ring->request_list);
- ring->timeline = i915_timeline_get(timeline);
+ ring->timeline = intel_timeline_get(timeline);
ring->size = size;
/* Workaround an erratum on the i830 which causes a hang if
i915_vma_close(ring->vma);
i915_vma_put(ring->vma);
- i915_timeline_put(ring->timeline);
+ intel_timeline_put(ring->timeline);
kfree(ring);
}
int intel_ring_submission_init(struct intel_engine_cs *engine)
{
- struct i915_timeline *timeline;
+ struct intel_timeline *timeline;
struct intel_ring *ring;
int err;
- timeline = i915_timeline_create(engine->gt, engine->status_page.vma);
+ timeline = intel_timeline_create(engine->gt, engine->status_page.vma);
if (IS_ERR(timeline)) {
err = PTR_ERR(timeline);
goto err;
GEM_BUG_ON(timeline->has_initial_breadcrumb);
ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE);
- i915_timeline_put(timeline);
+ intel_timeline_put(timeline);
if (IS_ERR(ring)) {
err = PTR_ERR(ring);
goto err;
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016-2018 Intel Corporation
+ */
+
+#include "gt/intel_gt_types.h"
+
+#include "i915_drv.h"
+
+#include "i915_active.h"
+#include "i915_syncmap.h"
+#include "gt/intel_timeline.h"
+
+#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
+#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
+
+struct intel_timeline_hwsp {
+ struct intel_gt *gt;
+ struct i915_gt_timelines *gt_timelines;
+ struct list_head free_link;
+ struct i915_vma *vma;
+ u64 free_bitmap;
+};
+
+struct intel_timeline_cacheline {
+ struct i915_active active;
+ struct intel_timeline_hwsp *hwsp;
+ void *vaddr;
+#define CACHELINE_BITS 6
+#define CACHELINE_FREE CACHELINE_BITS
+};
+
+static struct i915_vma *__hwsp_alloc(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+
+ obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
+
+ vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
+ if (IS_ERR(vma))
+ i915_gem_object_put(obj);
+
+ return vma;
+}
+
+static struct i915_vma *
+hwsp_alloc(struct intel_timeline *timeline, unsigned int *cacheline)
+{
+ struct i915_gt_timelines *gt = &timeline->gt->timelines;
+ struct intel_timeline_hwsp *hwsp;
+
+ BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE);
+
+ spin_lock_irq(>->hwsp_lock);
+
+ /* hwsp_free_list only contains HWSP that have available cachelines */
+ hwsp = list_first_entry_or_null(>->hwsp_free_list,
+ typeof(*hwsp), free_link);
+ if (!hwsp) {
+ struct i915_vma *vma;
+
+ spin_unlock_irq(>->hwsp_lock);
+
+ hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL);
+ if (!hwsp)
+ return ERR_PTR(-ENOMEM);
+
+ vma = __hwsp_alloc(timeline->gt);
+ if (IS_ERR(vma)) {
+ kfree(hwsp);
+ return vma;
+ }
+
+ vma->private = hwsp;
+ hwsp->gt = timeline->gt;
+ hwsp->vma = vma;
+ hwsp->free_bitmap = ~0ull;
+ hwsp->gt_timelines = gt;
+
+ spin_lock_irq(>->hwsp_lock);
+ list_add(&hwsp->free_link, >->hwsp_free_list);
+ }
+
+ GEM_BUG_ON(!hwsp->free_bitmap);
+ *cacheline = __ffs64(hwsp->free_bitmap);
+ hwsp->free_bitmap &= ~BIT_ULL(*cacheline);
+ if (!hwsp->free_bitmap)
+ list_del(&hwsp->free_link);
+
+ spin_unlock_irq(>->hwsp_lock);
+
+ GEM_BUG_ON(hwsp->vma->private != hwsp);
+ return hwsp->vma;
+}
+
+static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline)
+{
+ struct i915_gt_timelines *gt = hwsp->gt_timelines;
+ unsigned long flags;
+
+ spin_lock_irqsave(>->hwsp_lock, flags);
+
+ /* As a cacheline becomes available, publish the HWSP on the freelist */
+ if (!hwsp->free_bitmap)
+ list_add_tail(&hwsp->free_link, >->hwsp_free_list);
+
+ GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap));
+ hwsp->free_bitmap |= BIT_ULL(cacheline);
+
+ /* And if no one is left using it, give the page back to the system */
+ if (hwsp->free_bitmap == ~0ull) {
+ i915_vma_put(hwsp->vma);
+ list_del(&hwsp->free_link);
+ kfree(hwsp);
+ }
+
+ spin_unlock_irqrestore(>->hwsp_lock, flags);
+}
+
+static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
+{
+ GEM_BUG_ON(!i915_active_is_idle(&cl->active));
+
+ i915_gem_object_unpin_map(cl->hwsp->vma->obj);
+ i915_vma_put(cl->hwsp->vma);
+ __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
+
+ i915_active_fini(&cl->active);
+ kfree(cl);
+}
+
+static void __cacheline_retire(struct i915_active *active)
+{
+ struct intel_timeline_cacheline *cl =
+ container_of(active, typeof(*cl), active);
+
+ i915_vma_unpin(cl->hwsp->vma);
+ if (ptr_test_bit(cl->vaddr, CACHELINE_FREE))
+ __idle_cacheline_free(cl);
+}
+
+static struct intel_timeline_cacheline *
+cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
+{
+ struct intel_timeline_cacheline *cl;
+ void *vaddr;
+
+ GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS));
+
+ cl = kmalloc(sizeof(*cl), GFP_KERNEL);
+ if (!cl)
+ return ERR_PTR(-ENOMEM);
+
+ vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB);
+ if (IS_ERR(vaddr)) {
+ kfree(cl);
+ return ERR_CAST(vaddr);
+ }
+
+ i915_vma_get(hwsp->vma);
+ cl->hwsp = hwsp;
+ cl->vaddr = page_pack_bits(vaddr, cacheline);
+
+ i915_active_init(hwsp->gt->i915, &cl->active, __cacheline_retire);
+
+ return cl;
+}
+
+static void cacheline_acquire(struct intel_timeline_cacheline *cl)
+{
+ if (cl && i915_active_acquire(&cl->active))
+ __i915_vma_pin(cl->hwsp->vma);
+}
+
+static void cacheline_release(struct intel_timeline_cacheline *cl)
+{
+ if (cl)
+ i915_active_release(&cl->active);
+}
+
+static void cacheline_free(struct intel_timeline_cacheline *cl)
+{
+ GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE));
+ cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE);
+
+ if (i915_active_is_idle(&cl->active))
+ __idle_cacheline_free(cl);
+}
+
+int intel_timeline_init(struct intel_timeline *timeline,
+ struct intel_gt *gt,
+ struct i915_vma *hwsp)
+{
+ void *vaddr;
+
+ /*
+ * Ideally we want a set of engines on a single leaf as we expect
+ * to mostly be tracking synchronisation between engines. It is not
+ * a huge issue if this is not the case, but we may want to mitigate
+ * any page crossing penalties if they become an issue.
+ *
+ * Called during early_init before we know how many engines there are.
+ */
+ BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES);
+
+ timeline->gt = gt;
+ timeline->pin_count = 0;
+ timeline->has_initial_breadcrumb = !hwsp;
+ timeline->hwsp_cacheline = NULL;
+
+ if (!hwsp) {
+ struct intel_timeline_cacheline *cl;
+ unsigned int cacheline;
+
+ hwsp = hwsp_alloc(timeline, &cacheline);
+ if (IS_ERR(hwsp))
+ return PTR_ERR(hwsp);
+
+ cl = cacheline_alloc(hwsp->private, cacheline);
+ if (IS_ERR(cl)) {
+ __idle_hwsp_free(hwsp->private, cacheline);
+ return PTR_ERR(cl);
+ }
+
+ timeline->hwsp_cacheline = cl;
+ timeline->hwsp_offset = cacheline * CACHELINE_BYTES;
+
+ vaddr = page_mask_bits(cl->vaddr);
+ } else {
+ timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
+
+ vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
+ if (IS_ERR(vaddr))
+ return PTR_ERR(vaddr);
+ }
+
+ timeline->hwsp_seqno =
+ memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES);
+
+ timeline->hwsp_ggtt = i915_vma_get(hwsp);
+ GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size);
+
+ timeline->fence_context = dma_fence_context_alloc(1);
+
+ mutex_init(&timeline->mutex);
+
+ INIT_ACTIVE_REQUEST(&timeline->last_request);
+ INIT_LIST_HEAD(&timeline->requests);
+
+ i915_syncmap_init(&timeline->sync);
+
+ return 0;
+}
+
+static void timelines_init(struct intel_gt *gt)
+{
+ struct i915_gt_timelines *timelines = >->timelines;
+
+ mutex_init(&timelines->mutex);
+ INIT_LIST_HEAD(&timelines->active_list);
+
+ spin_lock_init(&timelines->hwsp_lock);
+ INIT_LIST_HEAD(&timelines->hwsp_free_list);
+
+ /* via i915_gem_wait_for_idle() */
+ i915_gem_shrinker_taints_mutex(gt->i915, &timelines->mutex);
+}
+
+void intel_timelines_init(struct drm_i915_private *i915)
+{
+ timelines_init(&i915->gt);
+}
+
+static void timeline_add_to_active(struct intel_timeline *tl)
+{
+ struct i915_gt_timelines *gt = &tl->gt->timelines;
+
+ mutex_lock(>->mutex);
+ list_add(&tl->link, >->active_list);
+ mutex_unlock(>->mutex);
+}
+
+static void timeline_remove_from_active(struct intel_timeline *tl)
+{
+ struct i915_gt_timelines *gt = &tl->gt->timelines;
+
+ mutex_lock(>->mutex);
+ list_del(&tl->link);
+ mutex_unlock(>->mutex);
+}
+
+static void timelines_park(struct intel_gt *gt)
+{
+ struct i915_gt_timelines *timelines = >->timelines;
+ struct intel_timeline *timeline;
+
+ mutex_lock(&timelines->mutex);
+ list_for_each_entry(timeline, &timelines->active_list, link) {
+ /*
+ * All known fences are completed so we can scrap
+ * the current sync point tracking and start afresh,
+ * any attempt to wait upon a previous sync point
+ * will be skipped as the fence was signaled.
+ */
+ i915_syncmap_free(&timeline->sync);
+ }
+ mutex_unlock(&timelines->mutex);
+}
+
+/**
+ * intel_timelines_park - called when the driver idles
+ * @i915: the drm_i915_private device
+ *
+ * When the driver is completely idle, we know that all of our sync points
+ * have been signaled and our tracking is then entirely redundant. Any request
+ * to wait upon an older sync point will be completed instantly as we know
+ * the fence is signaled and therefore we will not even look them up in the
+ * sync point map.
+ */
+void intel_timelines_park(struct drm_i915_private *i915)
+{
+ timelines_park(&i915->gt);
+}
+
+void intel_timeline_fini(struct intel_timeline *timeline)
+{
+ GEM_BUG_ON(timeline->pin_count);
+ GEM_BUG_ON(!list_empty(&timeline->requests));
+
+ i915_syncmap_free(&timeline->sync);
+
+ if (timeline->hwsp_cacheline)
+ cacheline_free(timeline->hwsp_cacheline);
+ else
+ i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
+
+ i915_vma_put(timeline->hwsp_ggtt);
+}
+
+struct intel_timeline *
+intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
+{
+ struct intel_timeline *timeline;
+ int err;
+
+ timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
+ if (!timeline)
+ return ERR_PTR(-ENOMEM);
+
+ err = intel_timeline_init(timeline, gt, global_hwsp);
+ if (err) {
+ kfree(timeline);
+ return ERR_PTR(err);
+ }
+
+ kref_init(&timeline->kref);
+
+ return timeline;
+}
+
+int intel_timeline_pin(struct intel_timeline *tl)
+{
+ int err;
+
+ if (tl->pin_count++)
+ return 0;
+ GEM_BUG_ON(!tl->pin_count);
+
+ err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ if (err)
+ goto unpin;
+
+ tl->hwsp_offset =
+ i915_ggtt_offset(tl->hwsp_ggtt) +
+ offset_in_page(tl->hwsp_offset);
+
+ cacheline_acquire(tl->hwsp_cacheline);
+ timeline_add_to_active(tl);
+
+ return 0;
+
+unpin:
+ tl->pin_count = 0;
+ return err;
+}
+
+static u32 timeline_advance(struct intel_timeline *tl)
+{
+ GEM_BUG_ON(!tl->pin_count);
+ GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
+
+ return tl->seqno += 1 + tl->has_initial_breadcrumb;
+}
+
+static void timeline_rollback(struct intel_timeline *tl)
+{
+ tl->seqno -= 1 + tl->has_initial_breadcrumb;
+}
+
+static noinline int
+__intel_timeline_get_seqno(struct intel_timeline *tl,
+ struct i915_request *rq,
+ u32 *seqno)
+{
+ struct intel_timeline_cacheline *cl;
+ unsigned int cacheline;
+ struct i915_vma *vma;
+ void *vaddr;
+ int err;
+
+ /*
+ * If there is an outstanding GPU reference to this cacheline,
+ * such as it being sampled by a HW semaphore on another timeline,
+ * we cannot wraparound our seqno value (the HW semaphore does
+ * a strict greater-than-or-equals compare, not i915_seqno_passed).
+ * So if the cacheline is still busy, we must detach ourselves
+ * from it and leave it inflight alongside its users.
+ *
+ * However, if nobody is watching and we can guarantee that nobody
+ * will, we could simply reuse the same cacheline.
+ *
+ * if (i915_active_request_is_signaled(&tl->last_request) &&
+ * i915_active_is_signaled(&tl->hwsp_cacheline->active))
+ * return 0;
+ *
+ * That seems unlikely for a busy timeline that needed to wrap in
+ * the first place, so just replace the cacheline.
+ */
+
+ vma = hwsp_alloc(tl, &cacheline);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_rollback;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ if (err) {
+ __idle_hwsp_free(vma->private, cacheline);
+ goto err_rollback;
+ }
+
+ cl = cacheline_alloc(vma->private, cacheline);
+ if (IS_ERR(cl)) {
+ err = PTR_ERR(cl);
+ __idle_hwsp_free(vma->private, cacheline);
+ goto err_unpin;
+ }
+ GEM_BUG_ON(cl->hwsp->vma != vma);
+
+ /*
+ * Attach the old cacheline to the current request, so that we only
+ * free it after the current request is retired, which ensures that
+ * all writes into the cacheline from previous requests are complete.
+ */
+ err = i915_active_ref(&tl->hwsp_cacheline->active,
+ tl->fence_context, rq);
+ if (err)
+ goto err_cacheline;
+
+ cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */
+ cacheline_free(tl->hwsp_cacheline);
+
+ i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */
+ i915_vma_put(tl->hwsp_ggtt);
+
+ tl->hwsp_ggtt = i915_vma_get(vma);
+
+ vaddr = page_mask_bits(cl->vaddr);
+ tl->hwsp_offset = cacheline * CACHELINE_BYTES;
+ tl->hwsp_seqno =
+ memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES);
+
+ tl->hwsp_offset += i915_ggtt_offset(vma);
+
+ cacheline_acquire(cl);
+ tl->hwsp_cacheline = cl;
+
+ *seqno = timeline_advance(tl);
+ GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno));
+ return 0;
+
+err_cacheline:
+ cacheline_free(cl);
+err_unpin:
+ i915_vma_unpin(vma);
+err_rollback:
+ timeline_rollback(tl);
+ return err;
+}
+
+int intel_timeline_get_seqno(struct intel_timeline *tl,
+ struct i915_request *rq,
+ u32 *seqno)
+{
+ *seqno = timeline_advance(tl);
+
+ /* Replace the HWSP on wraparound for HW semaphores */
+ if (unlikely(!*seqno && tl->hwsp_cacheline))
+ return __intel_timeline_get_seqno(tl, rq, seqno);
+
+ return 0;
+}
+
+static int cacheline_ref(struct intel_timeline_cacheline *cl,
+ struct i915_request *rq)
+{
+ return i915_active_ref(&cl->active, rq->fence.context, rq);
+}
+
+int intel_timeline_read_hwsp(struct i915_request *from,
+ struct i915_request *to,
+ u32 *hwsp)
+{
+ struct intel_timeline_cacheline *cl = from->hwsp_cacheline;
+ struct intel_timeline *tl = from->timeline;
+ int err;
+
+ GEM_BUG_ON(to->timeline == tl);
+
+ mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
+ err = i915_request_completed(from);
+ if (!err)
+ err = cacheline_ref(cl, to);
+ if (!err) {
+ if (likely(cl == tl->hwsp_cacheline)) {
+ *hwsp = tl->hwsp_offset;
+ } else { /* across a seqno wrap, recover the original offset */
+ *hwsp = i915_ggtt_offset(cl->hwsp->vma) +
+ ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) *
+ CACHELINE_BYTES;
+ }
+ }
+ mutex_unlock(&tl->mutex);
+
+ return err;
+}
+
+void intel_timeline_unpin(struct intel_timeline *tl)
+{
+ GEM_BUG_ON(!tl->pin_count);
+ if (--tl->pin_count)
+ return;
+
+ timeline_remove_from_active(tl);
+ cacheline_release(tl->hwsp_cacheline);
+
+ /*
+ * Since this timeline is idle, all bariers upon which we were waiting
+ * must also be complete and so we can discard the last used barriers
+ * without loss of information.
+ */
+ i915_syncmap_free(&tl->sync);
+
+ __i915_vma_unpin(tl->hwsp_ggtt);
+}
+
+void __intel_timeline_free(struct kref *kref)
+{
+ struct intel_timeline *timeline =
+ container_of(kref, typeof(*timeline), kref);
+
+ intel_timeline_fini(timeline);
+ kfree(timeline);
+}
+
+static void timelines_fini(struct intel_gt *gt)
+{
+ struct i915_gt_timelines *timelines = >->timelines;
+
+ GEM_BUG_ON(!list_empty(&timelines->active_list));
+ GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list));
+
+ mutex_destroy(&timelines->mutex);
+}
+
+void intel_timelines_fini(struct drm_i915_private *i915)
+{
+ timelines_fini(&i915->gt);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "gt/selftests/mock_timeline.c"
+#include "gt/selftest_timeline.c"
+#endif
--- /dev/null
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef I915_TIMELINE_H
+#define I915_TIMELINE_H
+
+#include <linux/lockdep.h>
+
+#include "i915_active.h"
+#include "i915_syncmap.h"
+#include "gt/intel_timeline_types.h"
+
+int intel_timeline_init(struct intel_timeline *tl,
+ struct intel_gt *gt,
+ struct i915_vma *hwsp);
+void intel_timeline_fini(struct intel_timeline *tl);
+
+struct intel_timeline *
+intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp);
+
+static inline struct intel_timeline *
+intel_timeline_get(struct intel_timeline *timeline)
+{
+ kref_get(&timeline->kref);
+ return timeline;
+}
+
+void __intel_timeline_free(struct kref *kref);
+static inline void intel_timeline_put(struct intel_timeline *timeline)
+{
+ kref_put(&timeline->kref, __intel_timeline_free);
+}
+
+static inline int __intel_timeline_sync_set(struct intel_timeline *tl,
+ u64 context, u32 seqno)
+{
+ return i915_syncmap_set(&tl->sync, context, seqno);
+}
+
+static inline int intel_timeline_sync_set(struct intel_timeline *tl,
+ const struct dma_fence *fence)
+{
+ return __intel_timeline_sync_set(tl, fence->context, fence->seqno);
+}
+
+static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl,
+ u64 context, u32 seqno)
+{
+ return i915_syncmap_is_later(&tl->sync, context, seqno);
+}
+
+static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl,
+ const struct dma_fence *fence)
+{
+ return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno);
+}
+
+int intel_timeline_pin(struct intel_timeline *tl);
+int intel_timeline_get_seqno(struct intel_timeline *tl,
+ struct i915_request *rq,
+ u32 *seqno);
+void intel_timeline_unpin(struct intel_timeline *tl);
+
+int intel_timeline_read_hwsp(struct i915_request *from,
+ struct i915_request *until,
+ u32 *hwsp_offset);
+
+void intel_timelines_init(struct drm_i915_private *i915);
+void intel_timelines_park(struct drm_i915_private *i915);
+void intel_timelines_fini(struct drm_i915_private *i915);
+
+#endif
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __I915_TIMELINE_TYPES_H__
+#define __I915_TIMELINE_TYPES_H__
+
+#include <linux/list.h>
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+#include "i915_active_types.h"
+
+struct drm_i915_private;
+struct i915_vma;
+struct intel_timeline_cacheline;
+struct i915_syncmap;
+
+struct intel_timeline {
+ u64 fence_context;
+ u32 seqno;
+
+ struct mutex mutex; /* protects the flow of requests */
+
+ unsigned int pin_count;
+ const u32 *hwsp_seqno;
+ struct i915_vma *hwsp_ggtt;
+ u32 hwsp_offset;
+
+ struct intel_timeline_cacheline *hwsp_cacheline;
+
+ bool has_initial_breadcrumb;
+
+ /**
+ * List of breadcrumbs associated with GPU requests currently
+ * outstanding.
+ */
+ struct list_head requests;
+
+ /* Contains an RCU guarded pointer to the last request. No reference is
+ * held to the request, users must carefully acquire a reference to
+ * the request using i915_active_request_get_request_rcu(), or hold the
+ * struct_mutex.
+ */
+ struct i915_active_request last_request;
+
+ /**
+ * We track the most recent seqno that we wait on in every context so
+ * that we only have to emit a new await and dependency on a more
+ * recent sync point. As the contexts may be executed out-of-order, we
+ * have to track each individually and can not rely on an absolute
+ * global_seqno. When we know that all tracked fences are completed
+ * (i.e. when the driver is idle), we know that the syncmap is
+ * redundant and we can discard it without loss of generality.
+ */
+ struct i915_syncmap *sync;
+
+ struct list_head link;
+ struct intel_gt *gt;
+
+ struct kref kref;
+};
+
+#endif /* __I915_TIMELINE_TYPES_H__ */
struct mock_ring {
struct intel_ring base;
- struct i915_timeline timeline;
+ struct intel_timeline timeline;
};
-static void mock_timeline_pin(struct i915_timeline *tl)
+static void mock_timeline_pin(struct intel_timeline *tl)
{
tl->pin_count++;
}
-static void mock_timeline_unpin(struct i915_timeline *tl)
+static void mock_timeline_unpin(struct intel_timeline *tl)
{
GEM_BUG_ON(!tl->pin_count);
tl->pin_count--;
if (!ring)
return NULL;
- if (i915_timeline_init(&ring->timeline, engine->gt, NULL)) {
+ if (intel_timeline_init(&ring->timeline, engine->gt, NULL)) {
kfree(ring);
return NULL;
}
{
struct mock_ring *ring = container_of(base, typeof(*ring), base);
- i915_timeline_fini(&ring->timeline);
+ intel_timeline_fini(&ring->timeline);
kfree(ring);
}
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017-2018 Intel Corporation
+ */
+
+#include <linux/prime_numbers.h>
+
+#include "gem/i915_gem_pm.h"
+
+#include "../selftests/i915_random.h"
+#include "../i915_selftest.h"
+
+#include "../selftests/igt_flush_test.h"
+#include "../selftests/mock_gem_device.h"
+#include "selftests/mock_timeline.h"
+
+static struct page *hwsp_page(struct intel_timeline *tl)
+{
+ struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
+
+ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+ return sg_page(obj->mm.pages->sgl);
+}
+
+static unsigned long hwsp_cacheline(struct intel_timeline *tl)
+{
+ unsigned long address = (unsigned long)page_address(hwsp_page(tl));
+
+ return (address + tl->hwsp_offset) / CACHELINE_BYTES;
+}
+
+#define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
+
+struct mock_hwsp_freelist {
+ struct drm_i915_private *i915;
+ struct radix_tree_root cachelines;
+ struct intel_timeline **history;
+ unsigned long count, max;
+ struct rnd_state prng;
+};
+
+enum {
+ SHUFFLE = BIT(0),
+};
+
+static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
+ unsigned int idx,
+ struct intel_timeline *tl)
+{
+ tl = xchg(&state->history[idx], tl);
+ if (tl) {
+ radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
+ intel_timeline_put(tl);
+ }
+}
+
+static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
+ unsigned int count,
+ unsigned int flags)
+{
+ struct intel_timeline *tl;
+ unsigned int idx;
+
+ while (count--) {
+ unsigned long cacheline;
+ int err;
+
+ tl = intel_timeline_create(&state->i915->gt, NULL);
+ if (IS_ERR(tl))
+ return PTR_ERR(tl);
+
+ cacheline = hwsp_cacheline(tl);
+ err = radix_tree_insert(&state->cachelines, cacheline, tl);
+ if (err) {
+ if (err == -EEXIST) {
+ pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
+ cacheline);
+ }
+ intel_timeline_put(tl);
+ return err;
+ }
+
+ idx = state->count++ % state->max;
+ __mock_hwsp_record(state, idx, tl);
+ }
+
+ if (flags & SHUFFLE)
+ i915_prandom_shuffle(state->history,
+ sizeof(*state->history),
+ min(state->count, state->max),
+ &state->prng);
+
+ count = i915_prandom_u32_max_state(min(state->count, state->max),
+ &state->prng);
+ while (count--) {
+ idx = --state->count % state->max;
+ __mock_hwsp_record(state, idx, NULL);
+ }
+
+ return 0;
+}
+
+static int mock_hwsp_freelist(void *arg)
+{
+ struct mock_hwsp_freelist state;
+ const struct {
+ const char *name;
+ unsigned int flags;
+ } phases[] = {
+ { "linear", 0 },
+ { "shuffled", SHUFFLE },
+ { },
+ }, *p;
+ unsigned int na;
+ int err = 0;
+
+ INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
+ state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
+
+ state.i915 = mock_gem_device();
+ if (!state.i915)
+ return -ENOMEM;
+
+ /*
+ * Create a bunch of timelines and check that their HWSP do not overlap.
+ * Free some, and try again.
+ */
+
+ state.max = PAGE_SIZE / sizeof(*state.history);
+ state.count = 0;
+ state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
+ if (!state.history) {
+ err = -ENOMEM;
+ goto err_put;
+ }
+
+ mutex_lock(&state.i915->drm.struct_mutex);
+ for (p = phases; p->name; p++) {
+ pr_debug("%s(%s)\n", __func__, p->name);
+ for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
+ err = __mock_hwsp_timeline(&state, na, p->flags);
+ if (err)
+ goto out;
+ }
+ }
+
+out:
+ for (na = 0; na < state.max; na++)
+ __mock_hwsp_record(&state, na, NULL);
+ mutex_unlock(&state.i915->drm.struct_mutex);
+ kfree(state.history);
+err_put:
+ drm_dev_put(&state.i915->drm);
+ return err;
+}
+
+struct __igt_sync {
+ const char *name;
+ u32 seqno;
+ bool expected;
+ bool set;
+};
+
+static int __igt_sync(struct intel_timeline *tl,
+ u64 ctx,
+ const struct __igt_sync *p,
+ const char *name)
+{
+ int ret;
+
+ if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
+ pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
+ name, p->name, ctx, p->seqno, yesno(p->expected));
+ return -EINVAL;
+ }
+
+ if (p->set) {
+ ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int igt_sync(void *arg)
+{
+ const struct __igt_sync pass[] = {
+ { "unset", 0, false, false },
+ { "new", 0, false, true },
+ { "0a", 0, true, true },
+ { "1a", 1, false, true },
+ { "1b", 1, true, true },
+ { "0b", 0, true, false },
+ { "2a", 2, false, true },
+ { "4", 4, false, true },
+ { "INT_MAX", INT_MAX, false, true },
+ { "INT_MAX-1", INT_MAX-1, true, false },
+ { "INT_MAX+1", (u32)INT_MAX+1, false, true },
+ { "INT_MAX", INT_MAX, true, false },
+ { "UINT_MAX", UINT_MAX, false, true },
+ { "wrap", 0, false, true },
+ { "unwrap", UINT_MAX, true, false },
+ {},
+ }, *p;
+ struct intel_timeline tl;
+ int order, offset;
+ int ret = -ENODEV;
+
+ mock_timeline_init(&tl, 0);
+ for (p = pass; p->name; p++) {
+ for (order = 1; order < 64; order++) {
+ for (offset = -1; offset <= (order > 1); offset++) {
+ u64 ctx = BIT_ULL(order) + offset;
+
+ ret = __igt_sync(&tl, ctx, p, "1");
+ if (ret)
+ goto out;
+ }
+ }
+ }
+ mock_timeline_fini(&tl);
+
+ mock_timeline_init(&tl, 0);
+ for (order = 1; order < 64; order++) {
+ for (offset = -1; offset <= (order > 1); offset++) {
+ u64 ctx = BIT_ULL(order) + offset;
+
+ for (p = pass; p->name; p++) {
+ ret = __igt_sync(&tl, ctx, p, "2");
+ if (ret)
+ goto out;
+ }
+ }
+ }
+
+out:
+ mock_timeline_fini(&tl);
+ return ret;
+}
+
+static unsigned int random_engine(struct rnd_state *rnd)
+{
+ return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
+}
+
+static int bench_sync(void *arg)
+{
+ struct rnd_state prng;
+ struct intel_timeline tl;
+ unsigned long end_time, count;
+ u64 prng32_1M;
+ ktime_t kt;
+ int order, last_order;
+
+ mock_timeline_init(&tl, 0);
+
+ /* Lookups from cache are very fast and so the random number generation
+ * and the loop itself becomes a significant factor in the per-iteration
+ * timings. We try to compensate the results by measuring the overhead
+ * of the prng and subtract it from the reported results.
+ */
+ prandom_seed_state(&prng, i915_selftest.random_seed);
+ count = 0;
+ kt = ktime_get();
+ end_time = jiffies + HZ/10;
+ do {
+ u32 x;
+
+ /* Make sure the compiler doesn't optimise away the prng call */
+ WRITE_ONCE(x, prandom_u32_state(&prng));
+
+ count++;
+ } while (!time_after(jiffies, end_time));
+ kt = ktime_sub(ktime_get(), kt);
+ pr_debug("%s: %lu random evaluations, %lluns/prng\n",
+ __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+ prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
+
+ /* Benchmark (only) setting random context ids */
+ prandom_seed_state(&prng, i915_selftest.random_seed);
+ count = 0;
+ kt = ktime_get();
+ end_time = jiffies + HZ/10;
+ do {
+ u64 id = i915_prandom_u64_state(&prng);
+
+ __intel_timeline_sync_set(&tl, id, 0);
+ count++;
+ } while (!time_after(jiffies, end_time));
+ kt = ktime_sub(ktime_get(), kt);
+ kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
+ pr_info("%s: %lu random insertions, %lluns/insert\n",
+ __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+ /* Benchmark looking up the exact same context ids as we just set */
+ prandom_seed_state(&prng, i915_selftest.random_seed);
+ end_time = count;
+ kt = ktime_get();
+ while (end_time--) {
+ u64 id = i915_prandom_u64_state(&prng);
+
+ if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
+ mock_timeline_fini(&tl);
+ pr_err("Lookup of %llu failed\n", id);
+ return -EINVAL;
+ }
+ }
+ kt = ktime_sub(ktime_get(), kt);
+ kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
+ pr_info("%s: %lu random lookups, %lluns/lookup\n",
+ __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+ mock_timeline_fini(&tl);
+ cond_resched();
+
+ mock_timeline_init(&tl, 0);
+
+ /* Benchmark setting the first N (in order) contexts */
+ count = 0;
+ kt = ktime_get();
+ end_time = jiffies + HZ/10;
+ do {
+ __intel_timeline_sync_set(&tl, count++, 0);
+ } while (!time_after(jiffies, end_time));
+ kt = ktime_sub(ktime_get(), kt);
+ pr_info("%s: %lu in-order insertions, %lluns/insert\n",
+ __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+ /* Benchmark looking up the exact same context ids as we just set */
+ end_time = count;
+ kt = ktime_get();
+ while (end_time--) {
+ if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
+ pr_err("Lookup of %lu failed\n", end_time);
+ mock_timeline_fini(&tl);
+ return -EINVAL;
+ }
+ }
+ kt = ktime_sub(ktime_get(), kt);
+ pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
+ __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+ mock_timeline_fini(&tl);
+ cond_resched();
+
+ mock_timeline_init(&tl, 0);
+
+ /* Benchmark searching for a random context id and maybe changing it */
+ prandom_seed_state(&prng, i915_selftest.random_seed);
+ count = 0;
+ kt = ktime_get();
+ end_time = jiffies + HZ/10;
+ do {
+ u32 id = random_engine(&prng);
+ u32 seqno = prandom_u32_state(&prng);
+
+ if (!__intel_timeline_sync_is_later(&tl, id, seqno))
+ __intel_timeline_sync_set(&tl, id, seqno);
+
+ count++;
+ } while (!time_after(jiffies, end_time));
+ kt = ktime_sub(ktime_get(), kt);
+ kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
+ pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
+ __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+ mock_timeline_fini(&tl);
+ cond_resched();
+
+ /* Benchmark searching for a known context id and changing the seqno */
+ for (last_order = 1, order = 1; order < 32;
+ ({ int tmp = last_order; last_order = order; order += tmp; })) {
+ unsigned int mask = BIT(order) - 1;
+
+ mock_timeline_init(&tl, 0);
+
+ count = 0;
+ kt = ktime_get();
+ end_time = jiffies + HZ/10;
+ do {
+ /* Without assuming too many details of the underlying
+ * implementation, try to identify its phase-changes
+ * (if any)!
+ */
+ u64 id = (u64)(count & mask) << order;
+
+ __intel_timeline_sync_is_later(&tl, id, 0);
+ __intel_timeline_sync_set(&tl, id, 0);
+
+ count++;
+ } while (!time_after(jiffies, end_time));
+ kt = ktime_sub(ktime_get(), kt);
+ pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
+ __func__, count, order,
+ (long long)div64_ul(ktime_to_ns(kt), count));
+ mock_timeline_fini(&tl);
+ cond_resched();
+ }
+
+ return 0;
+}
+
+int intel_timeline_mock_selftests(void)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(mock_hwsp_freelist),
+ SUBTEST(igt_sync),
+ SUBTEST(bench_sync),
+ };
+
+ return i915_subtests(tests, NULL);
+}
+
+static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
+{
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ if (INTEL_GEN(rq->i915) >= 8) {
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = addr;
+ *cs++ = 0;
+ *cs++ = value;
+ } else if (INTEL_GEN(rq->i915) >= 4) {
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = 0;
+ *cs++ = addr;
+ *cs++ = value;
+ } else {
+ *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+ *cs++ = addr;
+ *cs++ = value;
+ *cs++ = MI_NOOP;
+ }
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static struct i915_request *
+tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
+{
+ struct i915_request *rq;
+ int err;
+
+ lockdep_assert_held(&tl->gt->i915->drm.struct_mutex); /* lazy rq refs */
+
+ err = intel_timeline_pin(tl);
+ if (err) {
+ rq = ERR_PTR(err);
+ goto out;
+ }
+
+ rq = i915_request_create(engine->kernel_context);
+ if (IS_ERR(rq))
+ goto out_unpin;
+
+ err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
+ i915_request_add(rq);
+ if (err)
+ rq = ERR_PTR(err);
+
+out_unpin:
+ intel_timeline_unpin(tl);
+out:
+ if (IS_ERR(rq))
+ pr_err("Failed to write to timeline!\n");
+ return rq;
+}
+
+static struct intel_timeline *
+checked_intel_timeline_create(struct drm_i915_private *i915)
+{
+ struct intel_timeline *tl;
+
+ tl = intel_timeline_create(&i915->gt, NULL);
+ if (IS_ERR(tl))
+ return tl;
+
+ if (*tl->hwsp_seqno != tl->seqno) {
+ pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
+ *tl->hwsp_seqno, tl->seqno);
+ intel_timeline_put(tl);
+ return ERR_PTR(-EINVAL);
+ }
+
+ return tl;
+}
+
+static int live_hwsp_engine(void *arg)
+{
+#define NUM_TIMELINES 4096
+ struct drm_i915_private *i915 = arg;
+ struct intel_timeline **timelines;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ intel_wakeref_t wakeref;
+ unsigned long count, n;
+ int err = 0;
+
+ /*
+ * Create a bunch of timelines and check we can write
+ * independently to each of their breadcrumb slots.
+ */
+
+ timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
+ sizeof(*timelines),
+ GFP_KERNEL);
+ if (!timelines)
+ return -ENOMEM;
+
+ mutex_lock(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+ count = 0;
+ for_each_engine(engine, i915, id) {
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ for (n = 0; n < NUM_TIMELINES; n++) {
+ struct intel_timeline *tl;
+ struct i915_request *rq;
+
+ tl = checked_intel_timeline_create(i915);
+ if (IS_ERR(tl)) {
+ err = PTR_ERR(tl);
+ goto out;
+ }
+
+ rq = tl_write(tl, engine, count);
+ if (IS_ERR(rq)) {
+ intel_timeline_put(tl);
+ err = PTR_ERR(rq);
+ goto out;
+ }
+
+ timelines[count++] = tl;
+ }
+ }
+
+out:
+ if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ err = -EIO;
+
+ for (n = 0; n < count; n++) {
+ struct intel_timeline *tl = timelines[n];
+
+ if (!err && *tl->hwsp_seqno != n) {
+ pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
+ n, *tl->hwsp_seqno);
+ err = -EINVAL;
+ }
+ intel_timeline_put(tl);
+ }
+
+ intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ kvfree(timelines);
+
+ return err;
+#undef NUM_TIMELINES
+}
+
+static int live_hwsp_alternate(void *arg)
+{
+#define NUM_TIMELINES 4096
+ struct drm_i915_private *i915 = arg;
+ struct intel_timeline **timelines;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ intel_wakeref_t wakeref;
+ unsigned long count, n;
+ int err = 0;
+
+ /*
+ * Create a bunch of timelines and check we can write
+ * independently to each of their breadcrumb slots with adjacent
+ * engines.
+ */
+
+ timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
+ sizeof(*timelines),
+ GFP_KERNEL);
+ if (!timelines)
+ return -ENOMEM;
+
+ mutex_lock(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+ count = 0;
+ for (n = 0; n < NUM_TIMELINES; n++) {
+ for_each_engine(engine, i915, id) {
+ struct intel_timeline *tl;
+ struct i915_request *rq;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ tl = checked_intel_timeline_create(i915);
+ if (IS_ERR(tl)) {
+ err = PTR_ERR(tl);
+ goto out;
+ }
+
+ rq = tl_write(tl, engine, count);
+ if (IS_ERR(rq)) {
+ intel_timeline_put(tl);
+ err = PTR_ERR(rq);
+ goto out;
+ }
+
+ timelines[count++] = tl;
+ }
+ }
+
+out:
+ if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ err = -EIO;
+
+ for (n = 0; n < count; n++) {
+ struct intel_timeline *tl = timelines[n];
+
+ if (!err && *tl->hwsp_seqno != n) {
+ pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
+ n, *tl->hwsp_seqno);
+ err = -EINVAL;
+ }
+ intel_timeline_put(tl);
+ }
+
+ intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ kvfree(timelines);
+
+ return err;
+#undef NUM_TIMELINES
+}
+
+static int live_hwsp_wrap(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_engine_cs *engine;
+ struct intel_timeline *tl;
+ enum intel_engine_id id;
+ intel_wakeref_t wakeref;
+ int err = 0;
+
+ /*
+ * Across a seqno wrap, we need to keep the old cacheline alive for
+ * foreign GPU references.
+ */
+
+ mutex_lock(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+ tl = intel_timeline_create(&i915->gt, NULL);
+ if (IS_ERR(tl)) {
+ err = PTR_ERR(tl);
+ goto out_rpm;
+ }
+ if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
+ goto out_free;
+
+ err = intel_timeline_pin(tl);
+ if (err)
+ goto out_free;
+
+ for_each_engine(engine, i915, id) {
+ const u32 *hwsp_seqno[2];
+ struct i915_request *rq;
+ u32 seqno[2];
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ rq = i915_request_create(engine->kernel_context);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out;
+ }
+
+ tl->seqno = -4u;
+
+ err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
+ if (err) {
+ i915_request_add(rq);
+ goto out;
+ }
+ pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
+ seqno[0], tl->hwsp_offset);
+
+ err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
+ if (err) {
+ i915_request_add(rq);
+ goto out;
+ }
+ hwsp_seqno[0] = tl->hwsp_seqno;
+
+ err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
+ if (err) {
+ i915_request_add(rq);
+ goto out;
+ }
+ pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
+ seqno[1], tl->hwsp_offset);
+
+ err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
+ if (err) {
+ i915_request_add(rq);
+ goto out;
+ }
+ hwsp_seqno[1] = tl->hwsp_seqno;
+
+ /* With wrap should come a new hwsp */
+ GEM_BUG_ON(seqno[1] >= seqno[0]);
+ GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
+
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ pr_err("Wait for timeline writes timed out!\n");
+ err = -EIO;
+ goto out;
+ }
+
+ if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) {
+ pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
+ *hwsp_seqno[0], *hwsp_seqno[1],
+ seqno[0], seqno[1]);
+ err = -EINVAL;
+ goto out;
+ }
+
+ i915_retire_requests(i915); /* recycle HWSP */
+ }
+
+out:
+ if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ err = -EIO;
+
+ intel_timeline_unpin(tl);
+out_free:
+ intel_timeline_put(tl);
+out_rpm:
+ intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ return err;
+}
+
+static int live_hwsp_recycle(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ intel_wakeref_t wakeref;
+ unsigned long count;
+ int err = 0;
+
+ /*
+ * Check seqno writes into one timeline at a time. We expect to
+ * recycle the breadcrumb slot between iterations and neither
+ * want to confuse ourselves or the GPU.
+ */
+
+ mutex_lock(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+ count = 0;
+ for_each_engine(engine, i915, id) {
+ IGT_TIMEOUT(end_time);
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ do {
+ struct intel_timeline *tl;
+ struct i915_request *rq;
+
+ tl = checked_intel_timeline_create(i915);
+ if (IS_ERR(tl)) {
+ err = PTR_ERR(tl);
+ goto out;
+ }
+
+ rq = tl_write(tl, engine, count);
+ if (IS_ERR(rq)) {
+ intel_timeline_put(tl);
+ err = PTR_ERR(rq);
+ goto out;
+ }
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ pr_err("Wait for timeline writes timed out!\n");
+ intel_timeline_put(tl);
+ err = -EIO;
+ goto out;
+ }
+
+ if (*tl->hwsp_seqno != count) {
+ pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
+ count, *tl->hwsp_seqno);
+ err = -EINVAL;
+ }
+
+ intel_timeline_put(tl);
+ count++;
+
+ if (err)
+ goto out;
+
+ intel_timelines_park(i915); /* Encourage recycling! */
+ } while (!__igt_timeout(end_time, NULL));
+ }
+
+out:
+ if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ err = -EIO;
+ intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ return err;
+}
+
+int intel_timeline_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_hwsp_recycle),
+ SUBTEST(live_hwsp_engine),
+ SUBTEST(live_hwsp_alternate),
+ SUBTEST(live_hwsp_wrap),
+ };
+
+ if (i915_terminally_wedged(i915))
+ return 0;
+
+ return i915_subtests(tests, i915);
+}
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017-2018 Intel Corporation
+ */
+
+#include "../intel_timeline.h"
+
+#include "mock_timeline.h"
+
+void mock_timeline_init(struct intel_timeline *timeline, u64 context)
+{
+ timeline->gt = NULL;
+ timeline->fence_context = context;
+
+ mutex_init(&timeline->mutex);
+
+ INIT_ACTIVE_REQUEST(&timeline->last_request);
+ INIT_LIST_HEAD(&timeline->requests);
+
+ i915_syncmap_init(&timeline->sync);
+
+ INIT_LIST_HEAD(&timeline->link);
+}
+
+void mock_timeline_fini(struct intel_timeline *timeline)
+{
+ i915_syncmap_free(&timeline->sync);
+}
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017-2018 Intel Corporation
+ */
+
+#ifndef __MOCK_TIMELINE__
+#define __MOCK_TIMELINE__
+
+struct intel_timeline;
+
+void mock_timeline_init(struct intel_timeline *timeline, u64 context);
+void mock_timeline_fini(struct intel_timeline *timeline);
+
+#endif /* !__MOCK_TIMELINE__ */
#include "i915_gpu_error.h"
#include "i915_request.h"
#include "i915_scheduler.h"
-#include "i915_timeline.h"
+#include "gt/intel_timeline.h"
#include "i915_vma.h"
#include "intel_gvt.h"
unsigned int flags, long timeout)
{
struct i915_gt_timelines *gt = &i915->gt.timelines;
- struct i915_timeline *tl;
+ struct intel_timeline *tl;
mutex_lock(>->mutex);
list_for_each_entry(tl, >->active_list, link) {
dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
- i915_timelines_init(dev_priv);
+ intel_timelines_init(dev_priv);
ret = i915_gem_init_userptr(dev_priv);
if (ret)
if (ret != -EIO) {
i915_gem_cleanup_userptr(dev_priv);
- i915_timelines_fini(dev_priv);
+ intel_timelines_fini(dev_priv);
}
if (ret == -EIO) {
intel_uc_fini_misc(dev_priv);
i915_gem_cleanup_userptr(dev_priv);
- i915_timelines_fini(dev_priv);
+ intel_timelines_fini(dev_priv);
i915_gem_drain_freed_objects(dev_priv);
#include "i915_request.h"
#include "i915_scatterlist.h"
#include "i915_selftest.h"
-#include "i915_timeline.h"
+#include "gt/intel_timeline.h"
#define I915_GTT_PAGE_SIZE_4K BIT_ULL(12)
#define I915_GTT_PAGE_SIZE_64K BIT_ULL(16)
struct i915_request *
__i915_request_create(struct intel_context *ce, gfp_t gfp)
{
- struct i915_timeline *tl = ce->ring->timeline;
+ struct intel_timeline *tl = ce->ring->timeline;
struct i915_request *rq;
u32 seqno;
int ret;
}
}
- ret = i915_timeline_get_seqno(tl, rq, &seqno);
+ ret = intel_timeline_get_seqno(tl, rq, &seqno);
if (ret)
goto err_free;
return 0;
signal = list_prev_entry(signal, ring_link);
- if (i915_timeline_sync_is_later(rq->timeline, &signal->fence))
+ if (intel_timeline_sync_is_later(rq->timeline, &signal->fence))
return 0;
return i915_sw_fence_await_dma_fence(&rq->submit,
return err;
/* We need to pin the signaler's HWSP until we are finished reading. */
- err = i915_timeline_read_hwsp(from, to, &hwsp_offset);
+ err = intel_timeline_read_hwsp(from, to, &hwsp_offset);
if (err)
return err;
/* Squash repeated waits to the same timelines */
if (fence->context != rq->i915->mm.unordered_timeline &&
- i915_timeline_sync_is_later(rq->timeline, fence))
+ intel_timeline_sync_is_later(rq->timeline, fence))
continue;
if (dma_fence_is_i915(fence))
/* Record the latest fence used against each timeline */
if (fence->context != rq->i915->mm.unordered_timeline)
- i915_timeline_sync_set(rq->timeline, fence);
+ intel_timeline_sync_set(rq->timeline, fence);
} while (--nchild);
return 0;
static struct i915_request *
__i915_request_add_to_timeline(struct i915_request *rq)
{
- struct i915_timeline *timeline = rq->timeline;
+ struct intel_timeline *timeline = rq->timeline;
struct i915_request *prev;
/*
struct drm_file;
struct drm_i915_gem_object;
struct i915_request;
-struct i915_timeline;
-struct i915_timeline_cacheline;
+struct intel_timeline;
+struct intel_timeline_cacheline;
struct i915_capture_list {
struct i915_capture_list *next;
struct intel_engine_cs *engine;
struct intel_context *hw_context;
struct intel_ring *ring;
- struct i915_timeline *timeline;
+ struct intel_timeline *timeline;
struct list_head signal_link;
/*
* inside the timeline's HWSP vma, but it is only valid while this
* request has not completed and guarded by the timeline mutex.
*/
- struct i915_timeline_cacheline *hwsp_cacheline;
+ struct intel_timeline_cacheline *hwsp_cacheline;
/** Position in the ring of the start of the request */
u32 head;
+++ /dev/null
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2016-2018 Intel Corporation
- */
-
-#include "gt/intel_gt_types.h"
-
-#include "i915_drv.h"
-
-#include "i915_active.h"
-#include "i915_syncmap.h"
-#include "i915_timeline.h"
-
-#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
-#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
-
-struct i915_timeline_hwsp {
- struct intel_gt *gt;
- struct i915_gt_timelines *gt_timelines;
- struct list_head free_link;
- struct i915_vma *vma;
- u64 free_bitmap;
-};
-
-struct i915_timeline_cacheline {
- struct i915_active active;
- struct i915_timeline_hwsp *hwsp;
- void *vaddr;
-#define CACHELINE_BITS 6
-#define CACHELINE_FREE CACHELINE_BITS
-};
-
-static struct i915_vma *__hwsp_alloc(struct intel_gt *gt)
-{
- struct drm_i915_private *i915 = gt->i915;
- struct drm_i915_gem_object *obj;
- struct i915_vma *vma;
-
- obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
- if (IS_ERR(obj))
- return ERR_CAST(obj);
-
- i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
-
- vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
- if (IS_ERR(vma))
- i915_gem_object_put(obj);
-
- return vma;
-}
-
-static struct i915_vma *
-hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
-{
- struct i915_gt_timelines *gt = &timeline->gt->timelines;
- struct i915_timeline_hwsp *hwsp;
-
- BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE);
-
- spin_lock_irq(>->hwsp_lock);
-
- /* hwsp_free_list only contains HWSP that have available cachelines */
- hwsp = list_first_entry_or_null(>->hwsp_free_list,
- typeof(*hwsp), free_link);
- if (!hwsp) {
- struct i915_vma *vma;
-
- spin_unlock_irq(>->hwsp_lock);
-
- hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL);
- if (!hwsp)
- return ERR_PTR(-ENOMEM);
-
- vma = __hwsp_alloc(timeline->gt);
- if (IS_ERR(vma)) {
- kfree(hwsp);
- return vma;
- }
-
- vma->private = hwsp;
- hwsp->gt = timeline->gt;
- hwsp->vma = vma;
- hwsp->free_bitmap = ~0ull;
- hwsp->gt_timelines = gt;
-
- spin_lock_irq(>->hwsp_lock);
- list_add(&hwsp->free_link, >->hwsp_free_list);
- }
-
- GEM_BUG_ON(!hwsp->free_bitmap);
- *cacheline = __ffs64(hwsp->free_bitmap);
- hwsp->free_bitmap &= ~BIT_ULL(*cacheline);
- if (!hwsp->free_bitmap)
- list_del(&hwsp->free_link);
-
- spin_unlock_irq(>->hwsp_lock);
-
- GEM_BUG_ON(hwsp->vma->private != hwsp);
- return hwsp->vma;
-}
-
-static void __idle_hwsp_free(struct i915_timeline_hwsp *hwsp, int cacheline)
-{
- struct i915_gt_timelines *gt = hwsp->gt_timelines;
- unsigned long flags;
-
- spin_lock_irqsave(>->hwsp_lock, flags);
-
- /* As a cacheline becomes available, publish the HWSP on the freelist */
- if (!hwsp->free_bitmap)
- list_add_tail(&hwsp->free_link, >->hwsp_free_list);
-
- GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap));
- hwsp->free_bitmap |= BIT_ULL(cacheline);
-
- /* And if no one is left using it, give the page back to the system */
- if (hwsp->free_bitmap == ~0ull) {
- i915_vma_put(hwsp->vma);
- list_del(&hwsp->free_link);
- kfree(hwsp);
- }
-
- spin_unlock_irqrestore(>->hwsp_lock, flags);
-}
-
-static void __idle_cacheline_free(struct i915_timeline_cacheline *cl)
-{
- GEM_BUG_ON(!i915_active_is_idle(&cl->active));
-
- i915_gem_object_unpin_map(cl->hwsp->vma->obj);
- i915_vma_put(cl->hwsp->vma);
- __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
-
- i915_active_fini(&cl->active);
- kfree(cl);
-}
-
-static void __cacheline_retire(struct i915_active *active)
-{
- struct i915_timeline_cacheline *cl =
- container_of(active, typeof(*cl), active);
-
- i915_vma_unpin(cl->hwsp->vma);
- if (ptr_test_bit(cl->vaddr, CACHELINE_FREE))
- __idle_cacheline_free(cl);
-}
-
-static struct i915_timeline_cacheline *
-cacheline_alloc(struct i915_timeline_hwsp *hwsp, unsigned int cacheline)
-{
- struct i915_timeline_cacheline *cl;
- void *vaddr;
-
- GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS));
-
- cl = kmalloc(sizeof(*cl), GFP_KERNEL);
- if (!cl)
- return ERR_PTR(-ENOMEM);
-
- vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB);
- if (IS_ERR(vaddr)) {
- kfree(cl);
- return ERR_CAST(vaddr);
- }
-
- i915_vma_get(hwsp->vma);
- cl->hwsp = hwsp;
- cl->vaddr = page_pack_bits(vaddr, cacheline);
-
- i915_active_init(hwsp->gt->i915, &cl->active, __cacheline_retire);
-
- return cl;
-}
-
-static void cacheline_acquire(struct i915_timeline_cacheline *cl)
-{
- if (cl && i915_active_acquire(&cl->active))
- __i915_vma_pin(cl->hwsp->vma);
-}
-
-static void cacheline_release(struct i915_timeline_cacheline *cl)
-{
- if (cl)
- i915_active_release(&cl->active);
-}
-
-static void cacheline_free(struct i915_timeline_cacheline *cl)
-{
- GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE));
- cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE);
-
- if (i915_active_is_idle(&cl->active))
- __idle_cacheline_free(cl);
-}
-
-int i915_timeline_init(struct i915_timeline *timeline,
- struct intel_gt *gt,
- struct i915_vma *hwsp)
-{
- void *vaddr;
-
- /*
- * Ideally we want a set of engines on a single leaf as we expect
- * to mostly be tracking synchronisation between engines. It is not
- * a huge issue if this is not the case, but we may want to mitigate
- * any page crossing penalties if they become an issue.
- *
- * Called during early_init before we know how many engines there are.
- */
- BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES);
-
- timeline->gt = gt;
- timeline->pin_count = 0;
- timeline->has_initial_breadcrumb = !hwsp;
- timeline->hwsp_cacheline = NULL;
-
- if (!hwsp) {
- struct i915_timeline_cacheline *cl;
- unsigned int cacheline;
-
- hwsp = hwsp_alloc(timeline, &cacheline);
- if (IS_ERR(hwsp))
- return PTR_ERR(hwsp);
-
- cl = cacheline_alloc(hwsp->private, cacheline);
- if (IS_ERR(cl)) {
- __idle_hwsp_free(hwsp->private, cacheline);
- return PTR_ERR(cl);
- }
-
- timeline->hwsp_cacheline = cl;
- timeline->hwsp_offset = cacheline * CACHELINE_BYTES;
-
- vaddr = page_mask_bits(cl->vaddr);
- } else {
- timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
-
- vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
- if (IS_ERR(vaddr))
- return PTR_ERR(vaddr);
- }
-
- timeline->hwsp_seqno =
- memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES);
-
- timeline->hwsp_ggtt = i915_vma_get(hwsp);
- GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size);
-
- timeline->fence_context = dma_fence_context_alloc(1);
-
- mutex_init(&timeline->mutex);
-
- INIT_ACTIVE_REQUEST(&timeline->last_request);
- INIT_LIST_HEAD(&timeline->requests);
-
- i915_syncmap_init(&timeline->sync);
-
- return 0;
-}
-
-static void timelines_init(struct intel_gt *gt)
-{
- struct i915_gt_timelines *timelines = >->timelines;
-
- mutex_init(&timelines->mutex);
- INIT_LIST_HEAD(&timelines->active_list);
-
- spin_lock_init(&timelines->hwsp_lock);
- INIT_LIST_HEAD(&timelines->hwsp_free_list);
-
- /* via i915_gem_wait_for_idle() */
- i915_gem_shrinker_taints_mutex(gt->i915, &timelines->mutex);
-}
-
-void i915_timelines_init(struct drm_i915_private *i915)
-{
- timelines_init(&i915->gt);
-}
-
-static void timeline_add_to_active(struct i915_timeline *tl)
-{
- struct i915_gt_timelines *gt = &tl->gt->timelines;
-
- mutex_lock(>->mutex);
- list_add(&tl->link, >->active_list);
- mutex_unlock(>->mutex);
-}
-
-static void timeline_remove_from_active(struct i915_timeline *tl)
-{
- struct i915_gt_timelines *gt = &tl->gt->timelines;
-
- mutex_lock(>->mutex);
- list_del(&tl->link);
- mutex_unlock(>->mutex);
-}
-
-static void timelines_park(struct intel_gt *gt)
-{
- struct i915_gt_timelines *timelines = >->timelines;
- struct i915_timeline *timeline;
-
- mutex_lock(&timelines->mutex);
- list_for_each_entry(timeline, &timelines->active_list, link) {
- /*
- * All known fences are completed so we can scrap
- * the current sync point tracking and start afresh,
- * any attempt to wait upon a previous sync point
- * will be skipped as the fence was signaled.
- */
- i915_syncmap_free(&timeline->sync);
- }
- mutex_unlock(&timelines->mutex);
-}
-
-/**
- * i915_timelines_park - called when the driver idles
- * @i915: the drm_i915_private device
- *
- * When the driver is completely idle, we know that all of our sync points
- * have been signaled and our tracking is then entirely redundant. Any request
- * to wait upon an older sync point will be completed instantly as we know
- * the fence is signaled and therefore we will not even look them up in the
- * sync point map.
- */
-void i915_timelines_park(struct drm_i915_private *i915)
-{
- timelines_park(&i915->gt);
-}
-
-void i915_timeline_fini(struct i915_timeline *timeline)
-{
- GEM_BUG_ON(timeline->pin_count);
- GEM_BUG_ON(!list_empty(&timeline->requests));
-
- i915_syncmap_free(&timeline->sync);
-
- if (timeline->hwsp_cacheline)
- cacheline_free(timeline->hwsp_cacheline);
- else
- i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
-
- i915_vma_put(timeline->hwsp_ggtt);
-}
-
-struct i915_timeline *
-i915_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
-{
- struct i915_timeline *timeline;
- int err;
-
- timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
- if (!timeline)
- return ERR_PTR(-ENOMEM);
-
- err = i915_timeline_init(timeline, gt, global_hwsp);
- if (err) {
- kfree(timeline);
- return ERR_PTR(err);
- }
-
- kref_init(&timeline->kref);
-
- return timeline;
-}
-
-int i915_timeline_pin(struct i915_timeline *tl)
-{
- int err;
-
- if (tl->pin_count++)
- return 0;
- GEM_BUG_ON(!tl->pin_count);
-
- err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
- if (err)
- goto unpin;
-
- tl->hwsp_offset =
- i915_ggtt_offset(tl->hwsp_ggtt) +
- offset_in_page(tl->hwsp_offset);
-
- cacheline_acquire(tl->hwsp_cacheline);
- timeline_add_to_active(tl);
-
- return 0;
-
-unpin:
- tl->pin_count = 0;
- return err;
-}
-
-static u32 timeline_advance(struct i915_timeline *tl)
-{
- GEM_BUG_ON(!tl->pin_count);
- GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
-
- return tl->seqno += 1 + tl->has_initial_breadcrumb;
-}
-
-static void timeline_rollback(struct i915_timeline *tl)
-{
- tl->seqno -= 1 + tl->has_initial_breadcrumb;
-}
-
-static noinline int
-__i915_timeline_get_seqno(struct i915_timeline *tl,
- struct i915_request *rq,
- u32 *seqno)
-{
- struct i915_timeline_cacheline *cl;
- unsigned int cacheline;
- struct i915_vma *vma;
- void *vaddr;
- int err;
-
- /*
- * If there is an outstanding GPU reference to this cacheline,
- * such as it being sampled by a HW semaphore on another timeline,
- * we cannot wraparound our seqno value (the HW semaphore does
- * a strict greater-than-or-equals compare, not i915_seqno_passed).
- * So if the cacheline is still busy, we must detach ourselves
- * from it and leave it inflight alongside its users.
- *
- * However, if nobody is watching and we can guarantee that nobody
- * will, we could simply reuse the same cacheline.
- *
- * if (i915_active_request_is_signaled(&tl->last_request) &&
- * i915_active_is_signaled(&tl->hwsp_cacheline->active))
- * return 0;
- *
- * That seems unlikely for a busy timeline that needed to wrap in
- * the first place, so just replace the cacheline.
- */
-
- vma = hwsp_alloc(tl, &cacheline);
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto err_rollback;
- }
-
- err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
- if (err) {
- __idle_hwsp_free(vma->private, cacheline);
- goto err_rollback;
- }
-
- cl = cacheline_alloc(vma->private, cacheline);
- if (IS_ERR(cl)) {
- err = PTR_ERR(cl);
- __idle_hwsp_free(vma->private, cacheline);
- goto err_unpin;
- }
- GEM_BUG_ON(cl->hwsp->vma != vma);
-
- /*
- * Attach the old cacheline to the current request, so that we only
- * free it after the current request is retired, which ensures that
- * all writes into the cacheline from previous requests are complete.
- */
- err = i915_active_ref(&tl->hwsp_cacheline->active,
- tl->fence_context, rq);
- if (err)
- goto err_cacheline;
-
- cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */
- cacheline_free(tl->hwsp_cacheline);
-
- i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */
- i915_vma_put(tl->hwsp_ggtt);
-
- tl->hwsp_ggtt = i915_vma_get(vma);
-
- vaddr = page_mask_bits(cl->vaddr);
- tl->hwsp_offset = cacheline * CACHELINE_BYTES;
- tl->hwsp_seqno =
- memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES);
-
- tl->hwsp_offset += i915_ggtt_offset(vma);
-
- cacheline_acquire(cl);
- tl->hwsp_cacheline = cl;
-
- *seqno = timeline_advance(tl);
- GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno));
- return 0;
-
-err_cacheline:
- cacheline_free(cl);
-err_unpin:
- i915_vma_unpin(vma);
-err_rollback:
- timeline_rollback(tl);
- return err;
-}
-
-int i915_timeline_get_seqno(struct i915_timeline *tl,
- struct i915_request *rq,
- u32 *seqno)
-{
- *seqno = timeline_advance(tl);
-
- /* Replace the HWSP on wraparound for HW semaphores */
- if (unlikely(!*seqno && tl->hwsp_cacheline))
- return __i915_timeline_get_seqno(tl, rq, seqno);
-
- return 0;
-}
-
-static int cacheline_ref(struct i915_timeline_cacheline *cl,
- struct i915_request *rq)
-{
- return i915_active_ref(&cl->active, rq->fence.context, rq);
-}
-
-int i915_timeline_read_hwsp(struct i915_request *from,
- struct i915_request *to,
- u32 *hwsp)
-{
- struct i915_timeline_cacheline *cl = from->hwsp_cacheline;
- struct i915_timeline *tl = from->timeline;
- int err;
-
- GEM_BUG_ON(to->timeline == tl);
-
- mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
- err = i915_request_completed(from);
- if (!err)
- err = cacheline_ref(cl, to);
- if (!err) {
- if (likely(cl == tl->hwsp_cacheline)) {
- *hwsp = tl->hwsp_offset;
- } else { /* across a seqno wrap, recover the original offset */
- *hwsp = i915_ggtt_offset(cl->hwsp->vma) +
- ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) *
- CACHELINE_BYTES;
- }
- }
- mutex_unlock(&tl->mutex);
-
- return err;
-}
-
-void i915_timeline_unpin(struct i915_timeline *tl)
-{
- GEM_BUG_ON(!tl->pin_count);
- if (--tl->pin_count)
- return;
-
- timeline_remove_from_active(tl);
- cacheline_release(tl->hwsp_cacheline);
-
- /*
- * Since this timeline is idle, all bariers upon which we were waiting
- * must also be complete and so we can discard the last used barriers
- * without loss of information.
- */
- i915_syncmap_free(&tl->sync);
-
- __i915_vma_unpin(tl->hwsp_ggtt);
-}
-
-void __i915_timeline_free(struct kref *kref)
-{
- struct i915_timeline *timeline =
- container_of(kref, typeof(*timeline), kref);
-
- i915_timeline_fini(timeline);
- kfree(timeline);
-}
-
-static void timelines_fini(struct intel_gt *gt)
-{
- struct i915_gt_timelines *timelines = >->timelines;
-
- GEM_BUG_ON(!list_empty(&timelines->active_list));
- GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list));
-
- mutex_destroy(&timelines->mutex);
-}
-
-void i915_timelines_fini(struct drm_i915_private *i915)
-{
- timelines_fini(&i915->gt);
-}
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/mock_timeline.c"
-#include "selftests/i915_timeline.c"
-#endif
+++ /dev/null
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef I915_TIMELINE_H
-#define I915_TIMELINE_H
-
-#include <linux/lockdep.h>
-
-#include "i915_active.h"
-#include "i915_syncmap.h"
-#include "i915_timeline_types.h"
-
-int i915_timeline_init(struct i915_timeline *tl,
- struct intel_gt *gt,
- struct i915_vma *hwsp);
-void i915_timeline_fini(struct i915_timeline *tl);
-
-struct i915_timeline *
-i915_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp);
-
-static inline struct i915_timeline *
-i915_timeline_get(struct i915_timeline *timeline)
-{
- kref_get(&timeline->kref);
- return timeline;
-}
-
-void __i915_timeline_free(struct kref *kref);
-static inline void i915_timeline_put(struct i915_timeline *timeline)
-{
- kref_put(&timeline->kref, __i915_timeline_free);
-}
-
-static inline int __i915_timeline_sync_set(struct i915_timeline *tl,
- u64 context, u32 seqno)
-{
- return i915_syncmap_set(&tl->sync, context, seqno);
-}
-
-static inline int i915_timeline_sync_set(struct i915_timeline *tl,
- const struct dma_fence *fence)
-{
- return __i915_timeline_sync_set(tl, fence->context, fence->seqno);
-}
-
-static inline bool __i915_timeline_sync_is_later(struct i915_timeline *tl,
- u64 context, u32 seqno)
-{
- return i915_syncmap_is_later(&tl->sync, context, seqno);
-}
-
-static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
- const struct dma_fence *fence)
-{
- return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno);
-}
-
-int i915_timeline_pin(struct i915_timeline *tl);
-int i915_timeline_get_seqno(struct i915_timeline *tl,
- struct i915_request *rq,
- u32 *seqno);
-void i915_timeline_unpin(struct i915_timeline *tl);
-
-int i915_timeline_read_hwsp(struct i915_request *from,
- struct i915_request *until,
- u32 *hwsp_offset);
-
-void i915_timelines_init(struct drm_i915_private *i915);
-void i915_timelines_park(struct drm_i915_private *i915);
-void i915_timelines_fini(struct drm_i915_private *i915);
-
-#endif
+++ /dev/null
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2016 Intel Corporation
- */
-
-#ifndef __I915_TIMELINE_TYPES_H__
-#define __I915_TIMELINE_TYPES_H__
-
-#include <linux/list.h>
-#include <linux/kref.h>
-#include <linux/mutex.h>
-#include <linux/types.h>
-
-#include "i915_active_types.h"
-
-struct drm_i915_private;
-struct i915_vma;
-struct i915_timeline_cacheline;
-struct i915_syncmap;
-
-struct i915_timeline {
- u64 fence_context;
- u32 seqno;
-
- struct mutex mutex; /* protects the flow of requests */
-
- unsigned int pin_count;
- const u32 *hwsp_seqno;
- struct i915_vma *hwsp_ggtt;
- u32 hwsp_offset;
-
- struct i915_timeline_cacheline *hwsp_cacheline;
-
- bool has_initial_breadcrumb;
-
- /**
- * List of breadcrumbs associated with GPU requests currently
- * outstanding.
- */
- struct list_head requests;
-
- /* Contains an RCU guarded pointer to the last request. No reference is
- * held to the request, users must carefully acquire a reference to
- * the request using i915_active_request_get_request_rcu(), or hold the
- * struct_mutex.
- */
- struct i915_active_request last_request;
-
- /**
- * We track the most recent seqno that we wait on in every context so
- * that we only have to emit a new await and dependency on a more
- * recent sync point. As the contexts may be executed out-of-order, we
- * have to track each individually and can not rely on an absolute
- * global_seqno. When we know that all tracked fences are completed
- * (i.e. when the driver is idle), we know that the syncmap is
- * redundant and we can discard it without loss of generality.
- */
- struct i915_syncmap *sync;
-
- struct list_head link;
- struct intel_gt *gt;
-
- struct kref kref;
-};
-
-#endif /* __I915_TIMELINE_TYPES_H__ */
selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */
selftest(uncore, intel_uncore_live_selftests)
selftest(workarounds, intel_workarounds_live_selftests)
-selftest(timelines, i915_timeline_live_selftests)
+selftest(timelines, intel_timeline_live_selftests)
selftest(requests, i915_request_live_selftests)
selftest(active, i915_active_live_selftests)
selftest(objects, i915_gem_object_live_selftests)
selftest(syncmap, i915_syncmap_mock_selftests)
selftest(uncore, intel_uncore_mock_selftests)
selftest(engine, intel_engine_cs_mock_selftests)
-selftest(timelines, i915_timeline_mock_selftests)
+selftest(timelines, intel_timeline_mock_selftests)
selftest(requests, i915_request_mock_selftests)
selftest(objects, i915_gem_object_mock_selftests)
selftest(phys, i915_gem_phys_mock_selftests)
+++ /dev/null
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2017-2018 Intel Corporation
- */
-
-#include <linux/prime_numbers.h>
-
-#include "gem/i915_gem_pm.h"
-
-#include "i915_random.h"
-#include "i915_selftest.h"
-
-#include "igt_flush_test.h"
-#include "mock_gem_device.h"
-#include "mock_timeline.h"
-
-static struct page *hwsp_page(struct i915_timeline *tl)
-{
- struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
-
- GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
- return sg_page(obj->mm.pages->sgl);
-}
-
-static unsigned long hwsp_cacheline(struct i915_timeline *tl)
-{
- unsigned long address = (unsigned long)page_address(hwsp_page(tl));
-
- return (address + tl->hwsp_offset) / CACHELINE_BYTES;
-}
-
-#define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
-
-struct mock_hwsp_freelist {
- struct drm_i915_private *i915;
- struct radix_tree_root cachelines;
- struct i915_timeline **history;
- unsigned long count, max;
- struct rnd_state prng;
-};
-
-enum {
- SHUFFLE = BIT(0),
-};
-
-static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
- unsigned int idx,
- struct i915_timeline *tl)
-{
- tl = xchg(&state->history[idx], tl);
- if (tl) {
- radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
- i915_timeline_put(tl);
- }
-}
-
-static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
- unsigned int count,
- unsigned int flags)
-{
- struct i915_timeline *tl;
- unsigned int idx;
-
- while (count--) {
- unsigned long cacheline;
- int err;
-
- tl = i915_timeline_create(&state->i915->gt, NULL);
- if (IS_ERR(tl))
- return PTR_ERR(tl);
-
- cacheline = hwsp_cacheline(tl);
- err = radix_tree_insert(&state->cachelines, cacheline, tl);
- if (err) {
- if (err == -EEXIST) {
- pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
- cacheline);
- }
- i915_timeline_put(tl);
- return err;
- }
-
- idx = state->count++ % state->max;
- __mock_hwsp_record(state, idx, tl);
- }
-
- if (flags & SHUFFLE)
- i915_prandom_shuffle(state->history,
- sizeof(*state->history),
- min(state->count, state->max),
- &state->prng);
-
- count = i915_prandom_u32_max_state(min(state->count, state->max),
- &state->prng);
- while (count--) {
- idx = --state->count % state->max;
- __mock_hwsp_record(state, idx, NULL);
- }
-
- return 0;
-}
-
-static int mock_hwsp_freelist(void *arg)
-{
- struct mock_hwsp_freelist state;
- const struct {
- const char *name;
- unsigned int flags;
- } phases[] = {
- { "linear", 0 },
- { "shuffled", SHUFFLE },
- { },
- }, *p;
- unsigned int na;
- int err = 0;
-
- INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
- state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
-
- state.i915 = mock_gem_device();
- if (!state.i915)
- return -ENOMEM;
-
- /*
- * Create a bunch of timelines and check that their HWSP do not overlap.
- * Free some, and try again.
- */
-
- state.max = PAGE_SIZE / sizeof(*state.history);
- state.count = 0;
- state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
- if (!state.history) {
- err = -ENOMEM;
- goto err_put;
- }
-
- mutex_lock(&state.i915->drm.struct_mutex);
- for (p = phases; p->name; p++) {
- pr_debug("%s(%s)\n", __func__, p->name);
- for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
- err = __mock_hwsp_timeline(&state, na, p->flags);
- if (err)
- goto out;
- }
- }
-
-out:
- for (na = 0; na < state.max; na++)
- __mock_hwsp_record(&state, na, NULL);
- mutex_unlock(&state.i915->drm.struct_mutex);
- kfree(state.history);
-err_put:
- drm_dev_put(&state.i915->drm);
- return err;
-}
-
-struct __igt_sync {
- const char *name;
- u32 seqno;
- bool expected;
- bool set;
-};
-
-static int __igt_sync(struct i915_timeline *tl,
- u64 ctx,
- const struct __igt_sync *p,
- const char *name)
-{
- int ret;
-
- if (__i915_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
- pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
- name, p->name, ctx, p->seqno, yesno(p->expected));
- return -EINVAL;
- }
-
- if (p->set) {
- ret = __i915_timeline_sync_set(tl, ctx, p->seqno);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
-static int igt_sync(void *arg)
-{
- const struct __igt_sync pass[] = {
- { "unset", 0, false, false },
- { "new", 0, false, true },
- { "0a", 0, true, true },
- { "1a", 1, false, true },
- { "1b", 1, true, true },
- { "0b", 0, true, false },
- { "2a", 2, false, true },
- { "4", 4, false, true },
- { "INT_MAX", INT_MAX, false, true },
- { "INT_MAX-1", INT_MAX-1, true, false },
- { "INT_MAX+1", (u32)INT_MAX+1, false, true },
- { "INT_MAX", INT_MAX, true, false },
- { "UINT_MAX", UINT_MAX, false, true },
- { "wrap", 0, false, true },
- { "unwrap", UINT_MAX, true, false },
- {},
- }, *p;
- struct i915_timeline tl;
- int order, offset;
- int ret = -ENODEV;
-
- mock_timeline_init(&tl, 0);
- for (p = pass; p->name; p++) {
- for (order = 1; order < 64; order++) {
- for (offset = -1; offset <= (order > 1); offset++) {
- u64 ctx = BIT_ULL(order) + offset;
-
- ret = __igt_sync(&tl, ctx, p, "1");
- if (ret)
- goto out;
- }
- }
- }
- mock_timeline_fini(&tl);
-
- mock_timeline_init(&tl, 0);
- for (order = 1; order < 64; order++) {
- for (offset = -1; offset <= (order > 1); offset++) {
- u64 ctx = BIT_ULL(order) + offset;
-
- for (p = pass; p->name; p++) {
- ret = __igt_sync(&tl, ctx, p, "2");
- if (ret)
- goto out;
- }
- }
- }
-
-out:
- mock_timeline_fini(&tl);
- return ret;
-}
-
-static unsigned int random_engine(struct rnd_state *rnd)
-{
- return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
-}
-
-static int bench_sync(void *arg)
-{
- struct rnd_state prng;
- struct i915_timeline tl;
- unsigned long end_time, count;
- u64 prng32_1M;
- ktime_t kt;
- int order, last_order;
-
- mock_timeline_init(&tl, 0);
-
- /* Lookups from cache are very fast and so the random number generation
- * and the loop itself becomes a significant factor in the per-iteration
- * timings. We try to compensate the results by measuring the overhead
- * of the prng and subtract it from the reported results.
- */
- prandom_seed_state(&prng, i915_selftest.random_seed);
- count = 0;
- kt = ktime_get();
- end_time = jiffies + HZ/10;
- do {
- u32 x;
-
- /* Make sure the compiler doesn't optimise away the prng call */
- WRITE_ONCE(x, prandom_u32_state(&prng));
-
- count++;
- } while (!time_after(jiffies, end_time));
- kt = ktime_sub(ktime_get(), kt);
- pr_debug("%s: %lu random evaluations, %lluns/prng\n",
- __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
- prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
-
- /* Benchmark (only) setting random context ids */
- prandom_seed_state(&prng, i915_selftest.random_seed);
- count = 0;
- kt = ktime_get();
- end_time = jiffies + HZ/10;
- do {
- u64 id = i915_prandom_u64_state(&prng);
-
- __i915_timeline_sync_set(&tl, id, 0);
- count++;
- } while (!time_after(jiffies, end_time));
- kt = ktime_sub(ktime_get(), kt);
- kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
- pr_info("%s: %lu random insertions, %lluns/insert\n",
- __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
-
- /* Benchmark looking up the exact same context ids as we just set */
- prandom_seed_state(&prng, i915_selftest.random_seed);
- end_time = count;
- kt = ktime_get();
- while (end_time--) {
- u64 id = i915_prandom_u64_state(&prng);
-
- if (!__i915_timeline_sync_is_later(&tl, id, 0)) {
- mock_timeline_fini(&tl);
- pr_err("Lookup of %llu failed\n", id);
- return -EINVAL;
- }
- }
- kt = ktime_sub(ktime_get(), kt);
- kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
- pr_info("%s: %lu random lookups, %lluns/lookup\n",
- __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
-
- mock_timeline_fini(&tl);
- cond_resched();
-
- mock_timeline_init(&tl, 0);
-
- /* Benchmark setting the first N (in order) contexts */
- count = 0;
- kt = ktime_get();
- end_time = jiffies + HZ/10;
- do {
- __i915_timeline_sync_set(&tl, count++, 0);
- } while (!time_after(jiffies, end_time));
- kt = ktime_sub(ktime_get(), kt);
- pr_info("%s: %lu in-order insertions, %lluns/insert\n",
- __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
-
- /* Benchmark looking up the exact same context ids as we just set */
- end_time = count;
- kt = ktime_get();
- while (end_time--) {
- if (!__i915_timeline_sync_is_later(&tl, end_time, 0)) {
- pr_err("Lookup of %lu failed\n", end_time);
- mock_timeline_fini(&tl);
- return -EINVAL;
- }
- }
- kt = ktime_sub(ktime_get(), kt);
- pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
- __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
-
- mock_timeline_fini(&tl);
- cond_resched();
-
- mock_timeline_init(&tl, 0);
-
- /* Benchmark searching for a random context id and maybe changing it */
- prandom_seed_state(&prng, i915_selftest.random_seed);
- count = 0;
- kt = ktime_get();
- end_time = jiffies + HZ/10;
- do {
- u32 id = random_engine(&prng);
- u32 seqno = prandom_u32_state(&prng);
-
- if (!__i915_timeline_sync_is_later(&tl, id, seqno))
- __i915_timeline_sync_set(&tl, id, seqno);
-
- count++;
- } while (!time_after(jiffies, end_time));
- kt = ktime_sub(ktime_get(), kt);
- kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
- pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
- __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
- mock_timeline_fini(&tl);
- cond_resched();
-
- /* Benchmark searching for a known context id and changing the seqno */
- for (last_order = 1, order = 1; order < 32;
- ({ int tmp = last_order; last_order = order; order += tmp; })) {
- unsigned int mask = BIT(order) - 1;
-
- mock_timeline_init(&tl, 0);
-
- count = 0;
- kt = ktime_get();
- end_time = jiffies + HZ/10;
- do {
- /* Without assuming too many details of the underlying
- * implementation, try to identify its phase-changes
- * (if any)!
- */
- u64 id = (u64)(count & mask) << order;
-
- __i915_timeline_sync_is_later(&tl, id, 0);
- __i915_timeline_sync_set(&tl, id, 0);
-
- count++;
- } while (!time_after(jiffies, end_time));
- kt = ktime_sub(ktime_get(), kt);
- pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
- __func__, count, order,
- (long long)div64_ul(ktime_to_ns(kt), count));
- mock_timeline_fini(&tl);
- cond_resched();
- }
-
- return 0;
-}
-
-int i915_timeline_mock_selftests(void)
-{
- static const struct i915_subtest tests[] = {
- SUBTEST(mock_hwsp_freelist),
- SUBTEST(igt_sync),
- SUBTEST(bench_sync),
- };
-
- return i915_subtests(tests, NULL);
-}
-
-static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
-{
- u32 *cs;
-
- cs = intel_ring_begin(rq, 4);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- if (INTEL_GEN(rq->i915) >= 8) {
- *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
- *cs++ = addr;
- *cs++ = 0;
- *cs++ = value;
- } else if (INTEL_GEN(rq->i915) >= 4) {
- *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
- *cs++ = 0;
- *cs++ = addr;
- *cs++ = value;
- } else {
- *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
- *cs++ = addr;
- *cs++ = value;
- *cs++ = MI_NOOP;
- }
-
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static struct i915_request *
-tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value)
-{
- struct i915_request *rq;
- int err;
-
- lockdep_assert_held(&tl->gt->i915->drm.struct_mutex); /* lazy rq refs */
-
- err = i915_timeline_pin(tl);
- if (err) {
- rq = ERR_PTR(err);
- goto out;
- }
-
- rq = i915_request_create(engine->kernel_context);
- if (IS_ERR(rq))
- goto out_unpin;
-
- err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
- i915_request_add(rq);
- if (err)
- rq = ERR_PTR(err);
-
-out_unpin:
- i915_timeline_unpin(tl);
-out:
- if (IS_ERR(rq))
- pr_err("Failed to write to timeline!\n");
- return rq;
-}
-
-static struct i915_timeline *
-checked_i915_timeline_create(struct drm_i915_private *i915)
-{
- struct i915_timeline *tl;
-
- tl = i915_timeline_create(&i915->gt, NULL);
- if (IS_ERR(tl))
- return tl;
-
- if (*tl->hwsp_seqno != tl->seqno) {
- pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
- *tl->hwsp_seqno, tl->seqno);
- i915_timeline_put(tl);
- return ERR_PTR(-EINVAL);
- }
-
- return tl;
-}
-
-static int live_hwsp_engine(void *arg)
-{
-#define NUM_TIMELINES 4096
- struct drm_i915_private *i915 = arg;
- struct i915_timeline **timelines;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- intel_wakeref_t wakeref;
- unsigned long count, n;
- int err = 0;
-
- /*
- * Create a bunch of timelines and check we can write
- * independently to each of their breadcrumb slots.
- */
-
- timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
- sizeof(*timelines),
- GFP_KERNEL);
- if (!timelines)
- return -ENOMEM;
-
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- count = 0;
- for_each_engine(engine, i915, id) {
- if (!intel_engine_can_store_dword(engine))
- continue;
-
- for (n = 0; n < NUM_TIMELINES; n++) {
- struct i915_timeline *tl;
- struct i915_request *rq;
-
- tl = checked_i915_timeline_create(i915);
- if (IS_ERR(tl)) {
- err = PTR_ERR(tl);
- goto out;
- }
-
- rq = tl_write(tl, engine, count);
- if (IS_ERR(rq)) {
- i915_timeline_put(tl);
- err = PTR_ERR(rq);
- goto out;
- }
-
- timelines[count++] = tl;
- }
- }
-
-out:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
- err = -EIO;
-
- for (n = 0; n < count; n++) {
- struct i915_timeline *tl = timelines[n];
-
- if (!err && *tl->hwsp_seqno != n) {
- pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
- n, *tl->hwsp_seqno);
- err = -EINVAL;
- }
- i915_timeline_put(tl);
- }
-
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
-
- kvfree(timelines);
-
- return err;
-#undef NUM_TIMELINES
-}
-
-static int live_hwsp_alternate(void *arg)
-{
-#define NUM_TIMELINES 4096
- struct drm_i915_private *i915 = arg;
- struct i915_timeline **timelines;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- intel_wakeref_t wakeref;
- unsigned long count, n;
- int err = 0;
-
- /*
- * Create a bunch of timelines and check we can write
- * independently to each of their breadcrumb slots with adjacent
- * engines.
- */
-
- timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
- sizeof(*timelines),
- GFP_KERNEL);
- if (!timelines)
- return -ENOMEM;
-
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- count = 0;
- for (n = 0; n < NUM_TIMELINES; n++) {
- for_each_engine(engine, i915, id) {
- struct i915_timeline *tl;
- struct i915_request *rq;
-
- if (!intel_engine_can_store_dword(engine))
- continue;
-
- tl = checked_i915_timeline_create(i915);
- if (IS_ERR(tl)) {
- err = PTR_ERR(tl);
- goto out;
- }
-
- rq = tl_write(tl, engine, count);
- if (IS_ERR(rq)) {
- i915_timeline_put(tl);
- err = PTR_ERR(rq);
- goto out;
- }
-
- timelines[count++] = tl;
- }
- }
-
-out:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
- err = -EIO;
-
- for (n = 0; n < count; n++) {
- struct i915_timeline *tl = timelines[n];
-
- if (!err && *tl->hwsp_seqno != n) {
- pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
- n, *tl->hwsp_seqno);
- err = -EINVAL;
- }
- i915_timeline_put(tl);
- }
-
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
-
- kvfree(timelines);
-
- return err;
-#undef NUM_TIMELINES
-}
-
-static int live_hwsp_wrap(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct intel_engine_cs *engine;
- struct i915_timeline *tl;
- enum intel_engine_id id;
- intel_wakeref_t wakeref;
- int err = 0;
-
- /*
- * Across a seqno wrap, we need to keep the old cacheline alive for
- * foreign GPU references.
- */
-
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- tl = i915_timeline_create(&i915->gt, NULL);
- if (IS_ERR(tl)) {
- err = PTR_ERR(tl);
- goto out_rpm;
- }
- if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
- goto out_free;
-
- err = i915_timeline_pin(tl);
- if (err)
- goto out_free;
-
- for_each_engine(engine, i915, id) {
- const u32 *hwsp_seqno[2];
- struct i915_request *rq;
- u32 seqno[2];
-
- if (!intel_engine_can_store_dword(engine))
- continue;
-
- rq = i915_request_create(engine->kernel_context);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto out;
- }
-
- tl->seqno = -4u;
-
- err = i915_timeline_get_seqno(tl, rq, &seqno[0]);
- if (err) {
- i915_request_add(rq);
- goto out;
- }
- pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
- seqno[0], tl->hwsp_offset);
-
- err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
- if (err) {
- i915_request_add(rq);
- goto out;
- }
- hwsp_seqno[0] = tl->hwsp_seqno;
-
- err = i915_timeline_get_seqno(tl, rq, &seqno[1]);
- if (err) {
- i915_request_add(rq);
- goto out;
- }
- pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
- seqno[1], tl->hwsp_offset);
-
- err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
- if (err) {
- i915_request_add(rq);
- goto out;
- }
- hwsp_seqno[1] = tl->hwsp_seqno;
-
- /* With wrap should come a new hwsp */
- GEM_BUG_ON(seqno[1] >= seqno[0]);
- GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
-
- i915_request_add(rq);
-
- if (i915_request_wait(rq, 0, HZ / 5) < 0) {
- pr_err("Wait for timeline writes timed out!\n");
- err = -EIO;
- goto out;
- }
-
- if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) {
- pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
- *hwsp_seqno[0], *hwsp_seqno[1],
- seqno[0], seqno[1]);
- err = -EINVAL;
- goto out;
- }
-
- i915_retire_requests(i915); /* recycle HWSP */
- }
-
-out:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
- err = -EIO;
-
- i915_timeline_unpin(tl);
-out_free:
- i915_timeline_put(tl);
-out_rpm:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
-
- return err;
-}
-
-static int live_hwsp_recycle(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- intel_wakeref_t wakeref;
- unsigned long count;
- int err = 0;
-
- /*
- * Check seqno writes into one timeline at a time. We expect to
- * recycle the breadcrumb slot between iterations and neither
- * want to confuse ourselves or the GPU.
- */
-
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- count = 0;
- for_each_engine(engine, i915, id) {
- IGT_TIMEOUT(end_time);
-
- if (!intel_engine_can_store_dword(engine))
- continue;
-
- do {
- struct i915_timeline *tl;
- struct i915_request *rq;
-
- tl = checked_i915_timeline_create(i915);
- if (IS_ERR(tl)) {
- err = PTR_ERR(tl);
- goto out;
- }
-
- rq = tl_write(tl, engine, count);
- if (IS_ERR(rq)) {
- i915_timeline_put(tl);
- err = PTR_ERR(rq);
- goto out;
- }
-
- if (i915_request_wait(rq, 0, HZ / 5) < 0) {
- pr_err("Wait for timeline writes timed out!\n");
- i915_timeline_put(tl);
- err = -EIO;
- goto out;
- }
-
- if (*tl->hwsp_seqno != count) {
- pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
- count, *tl->hwsp_seqno);
- err = -EINVAL;
- }
-
- i915_timeline_put(tl);
- count++;
-
- if (err)
- goto out;
-
- i915_timelines_park(i915); /* Encourage recycling! */
- } while (!__igt_timeout(end_time, NULL));
- }
-
-out:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
- err = -EIO;
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
-
- return err;
-}
-
-int i915_timeline_live_selftests(struct drm_i915_private *i915)
-{
- static const struct i915_subtest tests[] = {
- SUBTEST(live_hwsp_recycle),
- SUBTEST(live_hwsp_engine),
- SUBTEST(live_hwsp_alternate),
- SUBTEST(live_hwsp_wrap),
- };
-
- if (i915_terminally_wedged(i915))
- return 0;
-
- return i915_subtests(tests, i915);
-}
i915_gem_contexts_fini(i915);
mutex_unlock(&i915->drm.struct_mutex);
- i915_timelines_fini(i915);
+ intel_timelines_fini(i915);
drain_workqueue(i915->wq);
i915_gem_drain_freed_objects(i915);
i915->gt.awake = true;
- i915_timelines_init(i915);
+ intel_timelines_init(i915);
mutex_lock(&i915->drm.struct_mutex);
mock_engine_free(i915->engine[RCS0]);
err_unlock:
mutex_unlock(&i915->drm.struct_mutex);
- i915_timelines_fini(i915);
+ intel_timelines_fini(i915);
destroy_workqueue(i915->wq);
err_drv:
drm_mode_config_cleanup(&i915->drm);
+++ /dev/null
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2017-2018 Intel Corporation
- */
-
-#include "../i915_timeline.h"
-
-#include "mock_timeline.h"
-
-void mock_timeline_init(struct i915_timeline *timeline, u64 context)
-{
- timeline->gt = NULL;
- timeline->fence_context = context;
-
- mutex_init(&timeline->mutex);
-
- INIT_ACTIVE_REQUEST(&timeline->last_request);
- INIT_LIST_HEAD(&timeline->requests);
-
- i915_syncmap_init(&timeline->sync);
-
- INIT_LIST_HEAD(&timeline->link);
-}
-
-void mock_timeline_fini(struct i915_timeline *timeline)
-{
- i915_syncmap_free(&timeline->sync);
-}
+++ /dev/null
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2017-2018 Intel Corporation
- */
-
-#ifndef __MOCK_TIMELINE__
-#define __MOCK_TIMELINE__
-
-struct i915_timeline;
-
-void mock_timeline_init(struct i915_timeline *timeline, u64 context);
-void mock_timeline_fini(struct i915_timeline *timeline);
-
-#endif /* !__MOCK_TIMELINE__ */