# Extra header tests
include $(src)/Makefile.header-test
+subdir-ccflags-y += -I$(src)
+
# Please keep these build lists sorted!
# core driver code
i915-y += i915_drv.o \
i915_irq.o \
- i915_memcpy.o \
- i915_mm.o \
i915_params.o \
i915_pci.o \
- i915_reset.o \
i915_suspend.o \
- i915_sw_fence.o \
- i915_syncmap.o \
i915_sysfs.o \
- i915_user_extensions.o \
intel_csr.o \
intel_device_info.o \
intel_pm.o \
intel_runtime_pm.o \
- intel_workarounds.o
+ intel_uncore.o
+
+# core library code
+i915-y += \
+ i915_memcpy.o \
+ i915_mm.o \
+ i915_sw_fence.o \
+ i915_syncmap.o \
+ i915_user_extensions.o
i915-$(CONFIG_COMPAT) += i915_ioc32.o
i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
-# GEM code
+# "Graphics Technology" (aka we talk to the gpu)
+obj-y += gt/
+gt-y += \
+ gt/intel_breadcrumbs.o \
+ gt/intel_context.o \
+ gt/intel_engine_cs.o \
+ gt/intel_hangcheck.o \
+ gt/intel_lrc.o \
+ gt/intel_reset.o \
+ gt/intel_ringbuffer.o \
+ gt/intel_mocs.o \
+ gt/intel_sseu.o \
+ gt/intel_workarounds.o
+gt-$(CONFIG_DRM_I915_SELFTEST) += \
+ gt/mock_engine.o
+i915-y += $(gt-y)
+
+# GEM (Graphics Execution Management) code
i915-y += \
i915_active.o \
i915_cmd_parser.o \
i915_timeline.o \
i915_trace_points.o \
i915_vma.o \
- intel_breadcrumbs.o \
- intel_context.o \
- intel_engine_cs.o \
- intel_hangcheck.o \
- intel_lrc.o \
- intel_mocs.o \
- intel_ringbuffer.o \
- intel_sseu.o \
- intel_uncore.o \
intel_wopcm.o
# general-purpose microcontroller (GuC) support
intel_cdclk.h \
intel_color.h \
intel_connector.h \
- intel_context_types.h \
intel_crt.h \
intel_csr.h \
intel_ddi.h \
intel_dp.h \
intel_dvo.h \
- intel_engine_types.h \
intel_fbc.h \
intel_fbdev.h \
intel_frontbuffer.h \
intel_psr.h \
intel_sdvo.h \
intel_sprite.h \
- intel_sseu.h \
- intel_tv.h \
- intel_workarounds_types.h
+ intel_tv.h
quiet_cmd_header_test = HDRTEST $@
cmd_header_test = echo "\#include \"$(<F)\"" > $@
--- /dev/null
+# Extra header tests
+include $(src)/Makefile.header-test
--- /dev/null
+# SPDX-License-Identifier: MIT
+# Copyright © 2019 Intel Corporation
+
+# Test the headers are compilable as standalone units
+header_test := $(notdir $(wildcard $(src)/*.h))
+
+quiet_cmd_header_test = HDRTEST $@
+ cmd_header_test = echo "\#include \"$(<F)\"" > $@
+
+header_test_%.c: %.h
+ $(call cmd,header_test)
+
+extra-$(CONFIG_DRM_I915_WERROR) += \
+ $(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h)))
+
+clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h)))
--- /dev/null
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/kthread.h>
+#include <uapi/linux/sched/types.h>
+
+#include "i915_drv.h"
+
+static void irq_enable(struct intel_engine_cs *engine)
+{
+ if (!engine->irq_enable)
+ return;
+
+ /* Caller disables interrupts */
+ spin_lock(&engine->i915->irq_lock);
+ engine->irq_enable(engine);
+ spin_unlock(&engine->i915->irq_lock);
+}
+
+static void irq_disable(struct intel_engine_cs *engine)
+{
+ if (!engine->irq_disable)
+ return;
+
+ /* Caller disables interrupts */
+ spin_lock(&engine->i915->irq_lock);
+ engine->irq_disable(engine);
+ spin_unlock(&engine->i915->irq_lock);
+}
+
+static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
+{
+ lockdep_assert_held(&b->irq_lock);
+
+ GEM_BUG_ON(!b->irq_enabled);
+ if (!--b->irq_enabled)
+ irq_disable(container_of(b,
+ struct intel_engine_cs,
+ breadcrumbs));
+
+ b->irq_armed = false;
+}
+
+void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
+{
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+ if (!b->irq_armed)
+ return;
+
+ spin_lock_irq(&b->irq_lock);
+ if (b->irq_armed)
+ __intel_breadcrumbs_disarm_irq(b);
+ spin_unlock_irq(&b->irq_lock);
+}
+
+static inline bool __request_completed(const struct i915_request *rq)
+{
+ return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
+}
+
+void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
+{
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+ struct intel_context *ce, *cn;
+ struct list_head *pos, *next;
+ LIST_HEAD(signal);
+
+ spin_lock(&b->irq_lock);
+
+ if (b->irq_armed && list_empty(&b->signalers))
+ __intel_breadcrumbs_disarm_irq(b);
+
+ list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
+ GEM_BUG_ON(list_empty(&ce->signals));
+
+ list_for_each_safe(pos, next, &ce->signals) {
+ struct i915_request *rq =
+ list_entry(pos, typeof(*rq), signal_link);
+
+ if (!__request_completed(rq))
+ break;
+
+ GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
+ &rq->fence.flags));
+
+ /*
+ * Queue for execution after dropping the signaling
+ * spinlock as the callback chain may end up adding
+ * more signalers to the same context or engine.
+ */
+ i915_request_get(rq);
+
+ /*
+ * We may race with direct invocation of
+ * dma_fence_signal(), e.g. i915_request_retire(),
+ * so we need to acquire our reference to the request
+ * before we cancel the breadcrumb.
+ */
+ clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+ list_add_tail(&rq->signal_link, &signal);
+ }
+
+ /*
+ * We process the list deletion in bulk, only using a list_add
+ * (not list_move) above but keeping the status of
+ * rq->signal_link known with the I915_FENCE_FLAG_SIGNAL bit.
+ */
+ if (!list_is_first(pos, &ce->signals)) {
+ /* Advance the list to the first incomplete request */
+ __list_del_many(&ce->signals, pos);
+ if (&ce->signals == pos) /* now empty */
+ list_del_init(&ce->signal_link);
+ }
+ }
+
+ spin_unlock(&b->irq_lock);
+
+ list_for_each_safe(pos, next, &signal) {
+ struct i915_request *rq =
+ list_entry(pos, typeof(*rq), signal_link);
+
+ dma_fence_signal(&rq->fence);
+ i915_request_put(rq);
+ }
+}
+
+void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
+{
+ local_irq_disable();
+ intel_engine_breadcrumbs_irq(engine);
+ local_irq_enable();
+}
+
+static void signal_irq_work(struct irq_work *work)
+{
+ struct intel_engine_cs *engine =
+ container_of(work, typeof(*engine), breadcrumbs.irq_work);
+
+ intel_engine_breadcrumbs_irq(engine);
+}
+
+void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine)
+{
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+ spin_lock_irq(&b->irq_lock);
+ if (!b->irq_enabled++)
+ irq_enable(engine);
+ GEM_BUG_ON(!b->irq_enabled); /* no overflow! */
+ spin_unlock_irq(&b->irq_lock);
+}
+
+void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine)
+{
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+ spin_lock_irq(&b->irq_lock);
+ GEM_BUG_ON(!b->irq_enabled); /* no underflow! */
+ if (!--b->irq_enabled)
+ irq_disable(engine);
+ spin_unlock_irq(&b->irq_lock);
+}
+
+static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
+{
+ struct intel_engine_cs *engine =
+ container_of(b, struct intel_engine_cs, breadcrumbs);
+
+ lockdep_assert_held(&b->irq_lock);
+ if (b->irq_armed)
+ return;
+
+ /*
+ * The breadcrumb irq will be disarmed on the interrupt after the
+ * waiters are signaled. This gives us a single interrupt window in
+ * which we can add a new waiter and avoid the cost of re-enabling
+ * the irq.
+ */
+ b->irq_armed = true;
+
+ /*
+ * Since we are waiting on a request, the GPU should be busy
+ * and should have its own rpm reference. This is tracked
+ * by i915->gt.awake, we can forgo holding our own wakref
+ * for the interrupt as before i915->gt.awake is released (when
+ * the driver is idle) we disarm the breadcrumbs.
+ */
+
+ if (!b->irq_enabled++)
+ irq_enable(engine);
+}
+
+void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
+{
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+ spin_lock_init(&b->irq_lock);
+ INIT_LIST_HEAD(&b->signalers);
+
+ init_irq_work(&b->irq_work, signal_irq_work);
+}
+
+void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
+{
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+ unsigned long flags;
+
+ spin_lock_irqsave(&b->irq_lock, flags);
+
+ if (b->irq_enabled)
+ irq_enable(engine);
+ else
+ irq_disable(engine);
+
+ spin_unlock_irqrestore(&b->irq_lock, flags);
+}
+
+void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
+{
+}
+
+bool i915_request_enable_breadcrumb(struct i915_request *rq)
+{
+ struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
+
+ GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
+
+ if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
+ return true;
+
+ spin_lock(&b->irq_lock);
+ if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags) &&
+ !__request_completed(rq)) {
+ struct intel_context *ce = rq->hw_context;
+ struct list_head *pos;
+
+ __intel_breadcrumbs_arm_irq(b);
+
+ /*
+ * We keep the seqno in retirement order, so we can break
+ * inside intel_engine_breadcrumbs_irq as soon as we've passed
+ * the last completed request (or seen a request that hasn't
+ * event started). We could iterate the timeline->requests list,
+ * but keeping a separate signalers_list has the advantage of
+ * hopefully being much smaller than the full list and so
+ * provides faster iteration and detection when there are no
+ * more interrupts required for this context.
+ *
+ * We typically expect to add new signalers in order, so we
+ * start looking for our insertion point from the tail of
+ * the list.
+ */
+ list_for_each_prev(pos, &ce->signals) {
+ struct i915_request *it =
+ list_entry(pos, typeof(*it), signal_link);
+
+ if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
+ break;
+ }
+ list_add(&rq->signal_link, pos);
+ if (pos == &ce->signals) /* catch transitions from empty list */
+ list_move_tail(&ce->signal_link, &b->signalers);
+
+ set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+ }
+ spin_unlock(&b->irq_lock);
+
+ return !__request_completed(rq);
+}
+
+void i915_request_cancel_breadcrumb(struct i915_request *rq)
+{
+ struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
+
+ if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
+ return;
+
+ spin_lock(&b->irq_lock);
+ if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
+ struct intel_context *ce = rq->hw_context;
+
+ list_del(&rq->signal_link);
+ if (list_empty(&ce->signals))
+ list_del_init(&ce->signal_link);
+
+ clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+ }
+ spin_unlock(&b->irq_lock);
+}
+
+void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
+ struct drm_printer *p)
+{
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+ struct intel_context *ce;
+ struct i915_request *rq;
+
+ if (list_empty(&b->signalers))
+ return;
+
+ drm_printf(p, "Signals:\n");
+
+ spin_lock_irq(&b->irq_lock);
+ list_for_each_entry(ce, &b->signalers, signal_link) {
+ list_for_each_entry(rq, &ce->signals, signal_link) {
+ drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
+ rq->fence.context, rq->fence.seqno,
+ i915_request_completed(rq) ? "!" :
+ i915_request_started(rq) ? "*" :
+ "",
+ jiffies_to_msecs(jiffies - rq->emitted_jiffies));
+ }
+ }
+ spin_unlock_irq(&b->irq_lock);
+}
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_gem_context.h"
+#include "i915_globals.h"
+
+#include "intel_context.h"
+#include "intel_engine.h"
+
+static struct i915_global_context {
+ struct i915_global base;
+ struct kmem_cache *slab_ce;
+} global;
+
+struct intel_context *intel_context_alloc(void)
+{
+ return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL);
+}
+
+void intel_context_free(struct intel_context *ce)
+{
+ kmem_cache_free(global.slab_ce, ce);
+}
+
+struct intel_context *
+intel_context_lookup(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine)
+{
+ struct intel_context *ce = NULL;
+ struct rb_node *p;
+
+ spin_lock(&ctx->hw_contexts_lock);
+ p = ctx->hw_contexts.rb_node;
+ while (p) {
+ struct intel_context *this =
+ rb_entry(p, struct intel_context, node);
+
+ if (this->engine == engine) {
+ GEM_BUG_ON(this->gem_context != ctx);
+ ce = this;
+ break;
+ }
+
+ if (this->engine < engine)
+ p = p->rb_right;
+ else
+ p = p->rb_left;
+ }
+ spin_unlock(&ctx->hw_contexts_lock);
+
+ return ce;
+}
+
+struct intel_context *
+__intel_context_insert(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine,
+ struct intel_context *ce)
+{
+ struct rb_node **p, *parent;
+ int err = 0;
+
+ spin_lock(&ctx->hw_contexts_lock);
+
+ parent = NULL;
+ p = &ctx->hw_contexts.rb_node;
+ while (*p) {
+ struct intel_context *this;
+
+ parent = *p;
+ this = rb_entry(parent, struct intel_context, node);
+
+ if (this->engine == engine) {
+ err = -EEXIST;
+ ce = this;
+ break;
+ }
+
+ if (this->engine < engine)
+ p = &parent->rb_right;
+ else
+ p = &parent->rb_left;
+ }
+ if (!err) {
+ rb_link_node(&ce->node, parent, p);
+ rb_insert_color(&ce->node, &ctx->hw_contexts);
+ }
+
+ spin_unlock(&ctx->hw_contexts_lock);
+
+ return ce;
+}
+
+void __intel_context_remove(struct intel_context *ce)
+{
+ struct i915_gem_context *ctx = ce->gem_context;
+
+ spin_lock(&ctx->hw_contexts_lock);
+ rb_erase(&ce->node, &ctx->hw_contexts);
+ spin_unlock(&ctx->hw_contexts_lock);
+}
+
+static struct intel_context *
+intel_context_instance(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine)
+{
+ struct intel_context *ce, *pos;
+
+ ce = intel_context_lookup(ctx, engine);
+ if (likely(ce))
+ return ce;
+
+ ce = intel_context_alloc();
+ if (!ce)
+ return ERR_PTR(-ENOMEM);
+
+ intel_context_init(ce, ctx, engine);
+
+ pos = __intel_context_insert(ctx, engine, ce);
+ if (unlikely(pos != ce)) /* Beaten! Use their HW context instead */
+ intel_context_free(ce);
+
+ GEM_BUG_ON(intel_context_lookup(ctx, engine) != pos);
+ return pos;
+}
+
+struct intel_context *
+intel_context_pin_lock(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine)
+ __acquires(ce->pin_mutex)
+{
+ struct intel_context *ce;
+
+ ce = intel_context_instance(ctx, engine);
+ if (IS_ERR(ce))
+ return ce;
+
+ if (mutex_lock_interruptible(&ce->pin_mutex))
+ return ERR_PTR(-EINTR);
+
+ return ce;
+}
+
+struct intel_context *
+intel_context_pin(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine)
+{
+ struct intel_context *ce;
+ int err;
+
+ ce = intel_context_instance(ctx, engine);
+ if (IS_ERR(ce))
+ return ce;
+
+ if (likely(atomic_inc_not_zero(&ce->pin_count)))
+ return ce;
+
+ if (mutex_lock_interruptible(&ce->pin_mutex))
+ return ERR_PTR(-EINTR);
+
+ if (likely(!atomic_read(&ce->pin_count))) {
+ err = ce->ops->pin(ce);
+ if (err)
+ goto err;
+
+ i915_gem_context_get(ctx);
+ GEM_BUG_ON(ce->gem_context != ctx);
+
+ mutex_lock(&ctx->mutex);
+ list_add(&ce->active_link, &ctx->active_engines);
+ mutex_unlock(&ctx->mutex);
+
+ intel_context_get(ce);
+ smp_mb__before_atomic(); /* flush pin before it is visible */
+ }
+
+ atomic_inc(&ce->pin_count);
+ GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
+
+ mutex_unlock(&ce->pin_mutex);
+ return ce;
+
+err:
+ mutex_unlock(&ce->pin_mutex);
+ return ERR_PTR(err);
+}
+
+void intel_context_unpin(struct intel_context *ce)
+{
+ if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
+ return;
+
+ /* We may be called from inside intel_context_pin() to evict another */
+ intel_context_get(ce);
+ mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING);
+
+ if (likely(atomic_dec_and_test(&ce->pin_count))) {
+ ce->ops->unpin(ce);
+
+ mutex_lock(&ce->gem_context->mutex);
+ list_del(&ce->active_link);
+ mutex_unlock(&ce->gem_context->mutex);
+
+ i915_gem_context_put(ce->gem_context);
+ intel_context_put(ce);
+ }
+
+ mutex_unlock(&ce->pin_mutex);
+ intel_context_put(ce);
+}
+
+static void intel_context_retire(struct i915_active_request *active,
+ struct i915_request *rq)
+{
+ struct intel_context *ce =
+ container_of(active, typeof(*ce), active_tracker);
+
+ intel_context_unpin(ce);
+}
+
+void
+intel_context_init(struct intel_context *ce,
+ struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine)
+{
+ kref_init(&ce->ref);
+
+ ce->gem_context = ctx;
+ ce->engine = engine;
+ ce->ops = engine->cops;
+ ce->sseu = engine->sseu;
+
+ INIT_LIST_HEAD(&ce->signal_link);
+ INIT_LIST_HEAD(&ce->signals);
+
+ mutex_init(&ce->pin_mutex);
+
+ i915_active_request_init(&ce->active_tracker,
+ NULL, intel_context_retire);
+}
+
+static void i915_global_context_shrink(void)
+{
+ kmem_cache_shrink(global.slab_ce);
+}
+
+static void i915_global_context_exit(void)
+{
+ kmem_cache_destroy(global.slab_ce);
+}
+
+static struct i915_global_context global = { {
+ .shrink = i915_global_context_shrink,
+ .exit = i915_global_context_exit,
+} };
+
+int __init i915_global_context_init(void)
+{
+ global.slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN);
+ if (!global.slab_ce)
+ return -ENOMEM;
+
+ i915_global_register(&global.base);
+ return 0;
+}
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_CONTEXT_H__
+#define __INTEL_CONTEXT_H__
+
+#include <linux/lockdep.h>
+
+#include "intel_context_types.h"
+#include "intel_engine_types.h"
+
+struct intel_context *intel_context_alloc(void);
+void intel_context_free(struct intel_context *ce);
+
+void intel_context_init(struct intel_context *ce,
+ struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine);
+
+/**
+ * intel_context_lookup - Find the matching HW context for this (ctx, engine)
+ * @ctx - the parent GEM context
+ * @engine - the target HW engine
+ *
+ * May return NULL if the HW context hasn't been instantiated (i.e. unused).
+ */
+struct intel_context *
+intel_context_lookup(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine);
+
+/**
+ * intel_context_pin_lock - Stablises the 'pinned' status of the HW context
+ * @ctx - the parent GEM context
+ * @engine - the target HW engine
+ *
+ * Acquire a lock on the pinned status of the HW context, such that the context
+ * can neither be bound to the GPU or unbound whilst the lock is held, i.e.
+ * intel_context_is_pinned() remains stable.
+ */
+struct intel_context *
+intel_context_pin_lock(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine);
+
+static inline bool
+intel_context_is_pinned(struct intel_context *ce)
+{
+ return atomic_read(&ce->pin_count);
+}
+
+static inline void intel_context_pin_unlock(struct intel_context *ce)
+__releases(ce->pin_mutex)
+{
+ mutex_unlock(&ce->pin_mutex);
+}
+
+struct intel_context *
+__intel_context_insert(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine,
+ struct intel_context *ce);
+void
+__intel_context_remove(struct intel_context *ce);
+
+struct intel_context *
+intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine);
+
+static inline void __intel_context_pin(struct intel_context *ce)
+{
+ GEM_BUG_ON(!intel_context_is_pinned(ce));
+ atomic_inc(&ce->pin_count);
+}
+
+void intel_context_unpin(struct intel_context *ce);
+
+static inline struct intel_context *intel_context_get(struct intel_context *ce)
+{
+ kref_get(&ce->ref);
+ return ce;
+}
+
+static inline void intel_context_put(struct intel_context *ce)
+{
+ kref_put(&ce->ref, ce->ops->destroy);
+}
+
+#endif /* __INTEL_CONTEXT_H__ */
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_CONTEXT_TYPES__
+#define __INTEL_CONTEXT_TYPES__
+
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+#include "i915_active_types.h"
+#include "intel_sseu.h"
+
+struct i915_gem_context;
+struct i915_vma;
+struct intel_context;
+struct intel_ring;
+
+struct intel_context_ops {
+ int (*pin)(struct intel_context *ce);
+ void (*unpin)(struct intel_context *ce);
+
+ void (*reset)(struct intel_context *ce);
+ void (*destroy)(struct kref *kref);
+};
+
+struct intel_context {
+ struct kref ref;
+
+ struct i915_gem_context *gem_context;
+ struct intel_engine_cs *engine;
+ struct intel_engine_cs *active;
+
+ struct list_head active_link;
+ struct list_head signal_link;
+ struct list_head signals;
+
+ struct i915_vma *state;
+ struct intel_ring *ring;
+
+ u32 *lrc_reg_state;
+ u64 lrc_desc;
+
+ atomic_t pin_count;
+ struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
+
+ /**
+ * active_tracker: Active tracker for the external rq activity
+ * on this intel_context object.
+ */
+ struct i915_active_request active_tracker;
+
+ const struct intel_context_ops *ops;
+ struct rb_node node;
+
+ /** sseu: Control eu/slice partitioning */
+ struct intel_sseu sseu;
+};
+
+#endif /* __INTEL_CONTEXT_TYPES__ */
--- /dev/null
+/* SPDX-License-Identifier: MIT */
+#ifndef _INTEL_RINGBUFFER_H_
+#define _INTEL_RINGBUFFER_H_
+
+#include <drm/drm_util.h>
+
+#include <linux/hashtable.h>
+#include <linux/irq_work.h>
+#include <linux/random.h>
+#include <linux/seqlock.h>
+
+#include "i915_gem_batch_pool.h"
+#include "i915_pmu.h"
+#include "i915_reg.h"
+#include "i915_request.h"
+#include "i915_selftest.h"
+#include "i915_timeline.h"
+#include "intel_engine_types.h"
+#include "intel_gpu_commands.h"
+#include "intel_workarounds.h"
+
+struct drm_printer;
+
+/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
+ * but keeps the logic simple. Indeed, the whole purpose of this macro is just
+ * to give some inclination as to some of the magic values used in the various
+ * workarounds!
+ */
+#define CACHELINE_BYTES 64
+#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))
+
+/*
+ * The register defines to be used with the following macros need to accept a
+ * base param, e.g:
+ *
+ * REG_FOO(base) _MMIO((base) + <relative offset>)
+ * ENGINE_READ(engine, REG_FOO);
+ *
+ * register arrays are to be defined and accessed as follows:
+ *
+ * REG_BAR(base, i) _MMIO((base) + <relative offset> + (i) * <shift>)
+ * ENGINE_READ_IDX(engine, REG_BAR, i)
+ */
+
+#define __ENGINE_REG_OP(op__, engine__, ...) \
+ intel_uncore_##op__((engine__)->uncore, __VA_ARGS__)
+
+#define __ENGINE_READ_OP(op__, engine__, reg__) \
+ __ENGINE_REG_OP(op__, (engine__), reg__((engine__)->mmio_base))
+
+#define ENGINE_READ16(...) __ENGINE_READ_OP(read16, __VA_ARGS__)
+#define ENGINE_READ(...) __ENGINE_READ_OP(read, __VA_ARGS__)
+#define ENGINE_READ_FW(...) __ENGINE_READ_OP(read_fw, __VA_ARGS__)
+#define ENGINE_POSTING_READ(...) __ENGINE_READ_OP(posting_read, __VA_ARGS__)
+
+#define ENGINE_READ64(engine__, lower_reg__, upper_reg__) \
+ __ENGINE_REG_OP(read64_2x32, (engine__), \
+ lower_reg__((engine__)->mmio_base), \
+ upper_reg__((engine__)->mmio_base))
+
+#define ENGINE_READ_IDX(engine__, reg__, idx__) \
+ __ENGINE_REG_OP(read, (engine__), reg__((engine__)->mmio_base, (idx__)))
+
+#define __ENGINE_WRITE_OP(op__, engine__, reg__, val__) \
+ __ENGINE_REG_OP(op__, (engine__), reg__((engine__)->mmio_base), (val__))
+
+#define ENGINE_WRITE16(...) __ENGINE_WRITE_OP(write16, __VA_ARGS__)
+#define ENGINE_WRITE(...) __ENGINE_WRITE_OP(write, __VA_ARGS__)
+#define ENGINE_WRITE_FW(...) __ENGINE_WRITE_OP(write_fw, __VA_ARGS__)
+
+/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
+ * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
+ */
+enum intel_engine_hangcheck_action {
+ ENGINE_IDLE = 0,
+ ENGINE_WAIT,
+ ENGINE_ACTIVE_SEQNO,
+ ENGINE_ACTIVE_HEAD,
+ ENGINE_ACTIVE_SUBUNITS,
+ ENGINE_WAIT_KICK,
+ ENGINE_DEAD,
+};
+
+static inline const char *
+hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
+{
+ switch (a) {
+ case ENGINE_IDLE:
+ return "idle";
+ case ENGINE_WAIT:
+ return "wait";
+ case ENGINE_ACTIVE_SEQNO:
+ return "active seqno";
+ case ENGINE_ACTIVE_HEAD:
+ return "active head";
+ case ENGINE_ACTIVE_SUBUNITS:
+ return "active subunits";
+ case ENGINE_WAIT_KICK:
+ return "wait kick";
+ case ENGINE_DEAD:
+ return "dead";
+ }
+
+ return "unknown";
+}
+
+void intel_engines_set_scheduler_caps(struct drm_i915_private *i915);
+
+static inline bool __execlists_need_preempt(int prio, int last)
+{
+ /*
+ * Allow preemption of low -> normal -> high, but we do
+ * not allow low priority tasks to preempt other low priority
+ * tasks under the impression that latency for low priority
+ * tasks does not matter (as much as background throughput),
+ * so kiss.
+ *
+ * More naturally we would write
+ * prio >= max(0, last);
+ * except that we wish to prevent triggering preemption at the same
+ * priority level: the task that is running should remain running
+ * to preserve FIFO ordering of dependencies.
+ */
+ return prio > max(I915_PRIORITY_NORMAL - 1, last);
+}
+
+static inline void
+execlists_set_active(struct intel_engine_execlists *execlists,
+ unsigned int bit)
+{
+ __set_bit(bit, (unsigned long *)&execlists->active);
+}
+
+static inline bool
+execlists_set_active_once(struct intel_engine_execlists *execlists,
+ unsigned int bit)
+{
+ return !__test_and_set_bit(bit, (unsigned long *)&execlists->active);
+}
+
+static inline void
+execlists_clear_active(struct intel_engine_execlists *execlists,
+ unsigned int bit)
+{
+ __clear_bit(bit, (unsigned long *)&execlists->active);
+}
+
+static inline void
+execlists_clear_all_active(struct intel_engine_execlists *execlists)
+{
+ execlists->active = 0;
+}
+
+static inline bool
+execlists_is_active(const struct intel_engine_execlists *execlists,
+ unsigned int bit)
+{
+ return test_bit(bit, (unsigned long *)&execlists->active);
+}
+
+void execlists_user_begin(struct intel_engine_execlists *execlists,
+ const struct execlist_port *port);
+void execlists_user_end(struct intel_engine_execlists *execlists);
+
+void
+execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
+
+struct i915_request *
+execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
+
+static inline unsigned int
+execlists_num_ports(const struct intel_engine_execlists * const execlists)
+{
+ return execlists->port_mask + 1;
+}
+
+static inline struct execlist_port *
+execlists_port_complete(struct intel_engine_execlists * const execlists,
+ struct execlist_port * const port)
+{
+ const unsigned int m = execlists->port_mask;
+
+ GEM_BUG_ON(port_index(port, execlists) != 0);
+ GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
+
+ memmove(port, port + 1, m * sizeof(struct execlist_port));
+ memset(port + m, 0, sizeof(struct execlist_port));
+
+ return port;
+}
+
+static inline u32
+intel_read_status_page(const struct intel_engine_cs *engine, int reg)
+{
+ /* Ensure that the compiler doesn't optimize away the load. */
+ return READ_ONCE(engine->status_page.addr[reg]);
+}
+
+static inline void
+intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
+{
+ /* Writing into the status page should be done sparingly. Since
+ * we do when we are uncertain of the device state, we take a bit
+ * of extra paranoia to try and ensure that the HWS takes the value
+ * we give and that it doesn't end up trapped inside the CPU!
+ */
+ if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ mb();
+ clflush(&engine->status_page.addr[reg]);
+ engine->status_page.addr[reg] = value;
+ clflush(&engine->status_page.addr[reg]);
+ mb();
+ } else {
+ WRITE_ONCE(engine->status_page.addr[reg], value);
+ }
+}
+
+/*
+ * Reads a dword out of the status page, which is written to from the command
+ * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or
+ * MI_STORE_DATA_IMM.
+ *
+ * The following dwords have a reserved meaning:
+ * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes.
+ * 0x04: ring 0 head pointer
+ * 0x05: ring 1 head pointer (915-class)
+ * 0x06: ring 2 head pointer (915-class)
+ * 0x10-0x1b: Context status DWords (GM45)
+ * 0x1f: Last written status offset. (GM45)
+ * 0x20-0x2f: Reserved (Gen6+)
+ *
+ * The area from dword 0x30 to 0x3ff is available for driver usage.
+ */
+#define I915_GEM_HWS_PREEMPT 0x32
+#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32))
+#define I915_GEM_HWS_HANGCHECK 0x34
+#define I915_GEM_HWS_HANGCHECK_ADDR (I915_GEM_HWS_HANGCHECK * sizeof(u32))
+#define I915_GEM_HWS_SEQNO 0x40
+#define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32))
+#define I915_GEM_HWS_SCRATCH 0x80
+#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32))
+
+#define I915_HWS_CSB_BUF0_INDEX 0x10
+#define I915_HWS_CSB_WRITE_INDEX 0x1f
+#define CNL_HWS_CSB_WRITE_INDEX 0x2f
+
+struct intel_ring *
+intel_engine_create_ring(struct intel_engine_cs *engine,
+ struct i915_timeline *timeline,
+ int size);
+int intel_ring_pin(struct intel_ring *ring);
+void intel_ring_reset(struct intel_ring *ring, u32 tail);
+unsigned int intel_ring_update_space(struct intel_ring *ring);
+void intel_ring_unpin(struct intel_ring *ring);
+void intel_ring_free(struct kref *ref);
+
+static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
+{
+ kref_get(&ring->ref);
+ return ring;
+}
+
+static inline void intel_ring_put(struct intel_ring *ring)
+{
+ kref_put(&ring->ref, intel_ring_free);
+}
+
+void intel_engine_stop(struct intel_engine_cs *engine);
+void intel_engine_cleanup(struct intel_engine_cs *engine);
+
+int __must_check intel_ring_cacheline_align(struct i915_request *rq);
+
+u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
+
+static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
+{
+ /* Dummy function.
+ *
+ * This serves as a placeholder in the code so that the reader
+ * can compare against the preceding intel_ring_begin() and
+ * check that the number of dwords emitted matches the space
+ * reserved for the command packet (i.e. the value passed to
+ * intel_ring_begin()).
+ */
+ GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
+}
+
+static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
+{
+ return pos & (ring->size - 1);
+}
+
+static inline bool
+intel_ring_offset_valid(const struct intel_ring *ring,
+ unsigned int pos)
+{
+ if (pos & -ring->size) /* must be strictly within the ring */
+ return false;
+
+ if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
+ return false;
+
+ return true;
+}
+
+static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
+{
+ /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
+ u32 offset = addr - rq->ring->vaddr;
+ GEM_BUG_ON(offset > rq->ring->size);
+ return intel_ring_wrap(rq->ring, offset);
+}
+
+static inline void
+assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
+{
+ GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
+
+ /*
+ * "Ring Buffer Use"
+ * Gen2 BSpec "1. Programming Environment" / 1.4.4.6
+ * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
+ * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
+ * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+ * same cacheline, the Head Pointer must not be greater than the Tail
+ * Pointer."
+ *
+ * We use ring->head as the last known location of the actual RING_HEAD,
+ * it may have advanced but in the worst case it is equally the same
+ * as ring->head and so we should never program RING_TAIL to advance
+ * into the same cacheline as ring->head.
+ */
+#define cacheline(a) round_down(a, CACHELINE_BYTES)
+ GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
+ tail < ring->head);
+#undef cacheline
+}
+
+static inline unsigned int
+intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
+{
+ /* Whilst writes to the tail are strictly order, there is no
+ * serialisation between readers and the writers. The tail may be
+ * read by i915_request_retire() just as it is being updated
+ * by execlists, as although the breadcrumb is complete, the context
+ * switch hasn't been seen.
+ */
+ assert_ring_tail_valid(ring, tail);
+ ring->tail = tail;
+ return tail;
+}
+
+static inline unsigned int
+__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
+{
+ /*
+ * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+ * same cacheline, the Head Pointer must not be greater than the Tail
+ * Pointer."
+ */
+ GEM_BUG_ON(!is_power_of_2(size));
+ return (head - tail - CACHELINE_BYTES) & (size - 1);
+}
+
+int intel_engine_setup_common(struct intel_engine_cs *engine);
+int intel_engine_init_common(struct intel_engine_cs *engine);
+void intel_engine_cleanup_common(struct intel_engine_cs *engine);
+
+int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
+int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
+int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
+int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
+
+int intel_engine_stop_cs(struct intel_engine_cs *engine);
+void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
+
+void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
+
+u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
+u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
+
+void intel_engine_get_instdone(struct intel_engine_cs *engine,
+ struct intel_instdone *instdone);
+
+void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
+void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
+
+void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
+void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
+
+void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine);
+void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
+
+static inline void
+intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine)
+{
+ irq_work_queue(&engine->breadcrumbs.irq_work);
+}
+
+void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine);
+
+void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
+void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
+
+void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
+ struct drm_printer *p);
+
+static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
+{
+ memset(batch, 0, 6 * sizeof(u32));
+
+ batch[0] = GFX_OP_PIPE_CONTROL(6);
+ batch[1] = flags;
+ batch[2] = offset;
+
+ return batch + 6;
+}
+
+static inline u32 *
+gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
+{
+ /* We're using qword write, offset should be aligned to 8 bytes. */
+ GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
+
+ /* w/a for post sync ops following a GPGPU operation we
+ * need a prior CS_STALL, which is emitted by the flush
+ * following the batch.
+ */
+ *cs++ = GFX_OP_PIPE_CONTROL(6);
+ *cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
+ *cs++ = gtt_offset;
+ *cs++ = 0;
+ *cs++ = value;
+ /* We're thrashing one dword of HWS. */
+ *cs++ = 0;
+
+ return cs;
+}
+
+static inline u32 *
+gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
+{
+ /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
+ GEM_BUG_ON(gtt_offset & (1 << 5));
+ /* Offset should be aligned to 8 bytes for both (QW/DW) write types */
+ GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
+
+ *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW | flags;
+ *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT;
+ *cs++ = 0;
+ *cs++ = value;
+
+ return cs;
+}
+
+static inline void intel_engine_reset(struct intel_engine_cs *engine,
+ bool stalled)
+{
+ if (engine->reset.reset)
+ engine->reset.reset(engine, stalled);
+}
+
+void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
+void intel_gt_resume(struct drm_i915_private *i915);
+
+bool intel_engine_is_idle(struct intel_engine_cs *engine);
+bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
+
+void intel_engine_lost_context(struct intel_engine_cs *engine);
+
+void intel_engines_park(struct drm_i915_private *i915);
+void intel_engines_unpark(struct drm_i915_private *i915);
+
+void intel_engines_reset_default_submission(struct drm_i915_private *i915);
+unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
+
+bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
+
+__printf(3, 4)
+void intel_engine_dump(struct intel_engine_cs *engine,
+ struct drm_printer *m,
+ const char *header, ...);
+
+struct intel_engine_cs *
+intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
+
+static inline void intel_engine_context_in(struct intel_engine_cs *engine)
+{
+ unsigned long flags;
+
+ if (READ_ONCE(engine->stats.enabled) == 0)
+ return;
+
+ write_seqlock_irqsave(&engine->stats.lock, flags);
+
+ if (engine->stats.enabled > 0) {
+ if (engine->stats.active++ == 0)
+ engine->stats.start = ktime_get();
+ GEM_BUG_ON(engine->stats.active == 0);
+ }
+
+ write_sequnlock_irqrestore(&engine->stats.lock, flags);
+}
+
+static inline void intel_engine_context_out(struct intel_engine_cs *engine)
+{
+ unsigned long flags;
+
+ if (READ_ONCE(engine->stats.enabled) == 0)
+ return;
+
+ write_seqlock_irqsave(&engine->stats.lock, flags);
+
+ if (engine->stats.enabled > 0) {
+ ktime_t last;
+
+ if (engine->stats.active && --engine->stats.active == 0) {
+ /*
+ * Decrement the active context count and in case GPU
+ * is now idle add up to the running total.
+ */
+ last = ktime_sub(ktime_get(), engine->stats.start);
+
+ engine->stats.total = ktime_add(engine->stats.total,
+ last);
+ } else if (engine->stats.active == 0) {
+ /*
+ * After turning on engine stats, context out might be
+ * the first event in which case we account from the
+ * time stats gathering was turned on.
+ */
+ last = ktime_sub(ktime_get(), engine->stats.enabled_at);
+
+ engine->stats.total = ktime_add(engine->stats.total,
+ last);
+ }
+ }
+
+ write_sequnlock_irqrestore(&engine->stats.lock, flags);
+}
+
+int intel_enable_engine_stats(struct intel_engine_cs *engine);
+void intel_disable_engine_stats(struct intel_engine_cs *engine);
+
+ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
+
+struct i915_request *
+intel_engine_find_active_request(struct intel_engine_cs *engine);
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+
+static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
+{
+ if (!execlists->preempt_hang.inject_hang)
+ return false;
+
+ complete(&execlists->preempt_hang.completion);
+ return true;
+}
+
+#else
+
+static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
+{
+ return false;
+}
+
+#endif
+
+static inline u32
+intel_engine_next_hangcheck_seqno(struct intel_engine_cs *engine)
+{
+ return engine->hangcheck.next_seqno =
+ next_pseudo_random32(engine->hangcheck.next_seqno);
+}
+
+static inline u32
+intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine)
+{
+ return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK);
+}
+
+#endif /* _INTEL_RINGBUFFER_H_ */
--- /dev/null
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/drm_print.h>
+
+#include "i915_drv.h"
+
+#include "intel_engine.h"
+#include "intel_lrc.h"
+#include "intel_reset.h"
+
+/* Haswell does have the CXT_SIZE register however it does not appear to be
+ * valid. Now, docs explain in dwords what is in the context object. The full
+ * size is 70720 bytes, however, the power context and execlist context will
+ * never be saved (power context is stored elsewhere, and execlists don't work
+ * on HSW) - so the final size, including the extra state required for the
+ * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
+ */
+#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE)
+
+#define DEFAULT_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
+#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
+#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
+#define GEN10_LR_CONTEXT_RENDER_SIZE (18 * PAGE_SIZE)
+#define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE)
+
+#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE)
+
+struct engine_class_info {
+ const char *name;
+ int (*init_legacy)(struct intel_engine_cs *engine);
+ int (*init_execlists)(struct intel_engine_cs *engine);
+
+ u8 uabi_class;
+};
+
+static const struct engine_class_info intel_engine_classes[] = {
+ [RENDER_CLASS] = {
+ .name = "rcs",
+ .init_execlists = logical_render_ring_init,
+ .init_legacy = intel_init_render_ring_buffer,
+ .uabi_class = I915_ENGINE_CLASS_RENDER,
+ },
+ [COPY_ENGINE_CLASS] = {
+ .name = "bcs",
+ .init_execlists = logical_xcs_ring_init,
+ .init_legacy = intel_init_blt_ring_buffer,
+ .uabi_class = I915_ENGINE_CLASS_COPY,
+ },
+ [VIDEO_DECODE_CLASS] = {
+ .name = "vcs",
+ .init_execlists = logical_xcs_ring_init,
+ .init_legacy = intel_init_bsd_ring_buffer,
+ .uabi_class = I915_ENGINE_CLASS_VIDEO,
+ },
+ [VIDEO_ENHANCEMENT_CLASS] = {
+ .name = "vecs",
+ .init_execlists = logical_xcs_ring_init,
+ .init_legacy = intel_init_vebox_ring_buffer,
+ .uabi_class = I915_ENGINE_CLASS_VIDEO_ENHANCE,
+ },
+};
+
+#define MAX_MMIO_BASES 3
+struct engine_info {
+ unsigned int hw_id;
+ u8 class;
+ u8 instance;
+ /* mmio bases table *must* be sorted in reverse gen order */
+ struct engine_mmio_base {
+ u32 gen : 8;
+ u32 base : 24;
+ } mmio_bases[MAX_MMIO_BASES];
+};
+
+static const struct engine_info intel_engines[] = {
+ [RCS0] = {
+ .hw_id = RCS0_HW,
+ .class = RENDER_CLASS,
+ .instance = 0,
+ .mmio_bases = {
+ { .gen = 1, .base = RENDER_RING_BASE }
+ },
+ },
+ [BCS0] = {
+ .hw_id = BCS0_HW,
+ .class = COPY_ENGINE_CLASS,
+ .instance = 0,
+ .mmio_bases = {
+ { .gen = 6, .base = BLT_RING_BASE }
+ },
+ },
+ [VCS0] = {
+ .hw_id = VCS0_HW,
+ .class = VIDEO_DECODE_CLASS,
+ .instance = 0,
+ .mmio_bases = {
+ { .gen = 11, .base = GEN11_BSD_RING_BASE },
+ { .gen = 6, .base = GEN6_BSD_RING_BASE },
+ { .gen = 4, .base = BSD_RING_BASE }
+ },
+ },
+ [VCS1] = {
+ .hw_id = VCS1_HW,
+ .class = VIDEO_DECODE_CLASS,
+ .instance = 1,
+ .mmio_bases = {
+ { .gen = 11, .base = GEN11_BSD2_RING_BASE },
+ { .gen = 8, .base = GEN8_BSD2_RING_BASE }
+ },
+ },
+ [VCS2] = {
+ .hw_id = VCS2_HW,
+ .class = VIDEO_DECODE_CLASS,
+ .instance = 2,
+ .mmio_bases = {
+ { .gen = 11, .base = GEN11_BSD3_RING_BASE }
+ },
+ },
+ [VCS3] = {
+ .hw_id = VCS3_HW,
+ .class = VIDEO_DECODE_CLASS,
+ .instance = 3,
+ .mmio_bases = {
+ { .gen = 11, .base = GEN11_BSD4_RING_BASE }
+ },
+ },
+ [VECS0] = {
+ .hw_id = VECS0_HW,
+ .class = VIDEO_ENHANCEMENT_CLASS,
+ .instance = 0,
+ .mmio_bases = {
+ { .gen = 11, .base = GEN11_VEBOX_RING_BASE },
+ { .gen = 7, .base = VEBOX_RING_BASE }
+ },
+ },
+ [VECS1] = {
+ .hw_id = VECS1_HW,
+ .class = VIDEO_ENHANCEMENT_CLASS,
+ .instance = 1,
+ .mmio_bases = {
+ { .gen = 11, .base = GEN11_VEBOX2_RING_BASE }
+ },
+ },
+};
+
+/**
+ * ___intel_engine_context_size() - return the size of the context for an engine
+ * @dev_priv: i915 device private
+ * @class: engine class
+ *
+ * Each engine class may require a different amount of space for a context
+ * image.
+ *
+ * Return: size (in bytes) of an engine class specific context image
+ *
+ * Note: this size includes the HWSP, which is part of the context image
+ * in LRC mode, but does not include the "shared data page" used with
+ * GuC submission. The caller should account for this if using the GuC.
+ */
+static u32
+__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
+{
+ u32 cxt_size;
+
+ BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
+
+ switch (class) {
+ case RENDER_CLASS:
+ switch (INTEL_GEN(dev_priv)) {
+ default:
+ MISSING_CASE(INTEL_GEN(dev_priv));
+ return DEFAULT_LR_CONTEXT_RENDER_SIZE;
+ case 11:
+ return GEN11_LR_CONTEXT_RENDER_SIZE;
+ case 10:
+ return GEN10_LR_CONTEXT_RENDER_SIZE;
+ case 9:
+ return GEN9_LR_CONTEXT_RENDER_SIZE;
+ case 8:
+ return GEN8_LR_CONTEXT_RENDER_SIZE;
+ case 7:
+ if (IS_HASWELL(dev_priv))
+ return HSW_CXT_TOTAL_SIZE;
+
+ cxt_size = I915_READ(GEN7_CXT_SIZE);
+ return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
+ PAGE_SIZE);
+ case 6:
+ cxt_size = I915_READ(CXT_SIZE);
+ return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
+ PAGE_SIZE);
+ case 5:
+ case 4:
+ case 3:
+ case 2:
+ /* For the special day when i810 gets merged. */
+ case 1:
+ return 0;
+ }
+ break;
+ default:
+ MISSING_CASE(class);
+ /* fall through */
+ case VIDEO_DECODE_CLASS:
+ case VIDEO_ENHANCEMENT_CLASS:
+ case COPY_ENGINE_CLASS:
+ if (INTEL_GEN(dev_priv) < 8)
+ return 0;
+ return GEN8_LR_CONTEXT_OTHER_SIZE;
+ }
+}
+
+static u32 __engine_mmio_base(struct drm_i915_private *i915,
+ const struct engine_mmio_base *bases)
+{
+ int i;
+
+ for (i = 0; i < MAX_MMIO_BASES; i++)
+ if (INTEL_GEN(i915) >= bases[i].gen)
+ break;
+
+ GEM_BUG_ON(i == MAX_MMIO_BASES);
+ GEM_BUG_ON(!bases[i].base);
+
+ return bases[i].base;
+}
+
+static void __sprint_engine_name(char *name, const struct engine_info *info)
+{
+ WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
+ intel_engine_classes[info->class].name,
+ info->instance) >= INTEL_ENGINE_CS_MAX_NAME);
+}
+
+void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask)
+{
+ /*
+ * Though they added more rings on g4x/ilk, they did not add
+ * per-engine HWSTAM until gen6.
+ */
+ if (INTEL_GEN(engine->i915) < 6 && engine->class != RENDER_CLASS)
+ return;
+
+ if (INTEL_GEN(engine->i915) >= 3)
+ ENGINE_WRITE(engine, RING_HWSTAM, mask);
+ else
+ ENGINE_WRITE16(engine, RING_HWSTAM, mask);
+}
+
+static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
+{
+ /* Mask off all writes into the unknown HWSP */
+ intel_engine_set_hwsp_writemask(engine, ~0u);
+}
+
+static int
+intel_engine_setup(struct drm_i915_private *dev_priv,
+ enum intel_engine_id id)
+{
+ const struct engine_info *info = &intel_engines[id];
+ struct intel_engine_cs *engine;
+
+ GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes));
+
+ BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
+ BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));
+
+ if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS))
+ return -EINVAL;
+
+ if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE))
+ return -EINVAL;
+
+ if (GEM_DEBUG_WARN_ON(dev_priv->engine_class[info->class][info->instance]))
+ return -EINVAL;
+
+ GEM_BUG_ON(dev_priv->engine[id]);
+ engine = kzalloc(sizeof(*engine), GFP_KERNEL);
+ if (!engine)
+ return -ENOMEM;
+
+ BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);
+
+ engine->id = id;
+ engine->mask = BIT(id);
+ engine->i915 = dev_priv;
+ engine->uncore = &dev_priv->uncore;
+ __sprint_engine_name(engine->name, info);
+ engine->hw_id = engine->guc_id = info->hw_id;
+ engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases);
+ engine->class = info->class;
+ engine->instance = info->instance;
+
+ engine->uabi_class = intel_engine_classes[info->class].uabi_class;
+
+ engine->context_size = __intel_engine_context_size(dev_priv,
+ engine->class);
+ if (WARN_ON(engine->context_size > BIT(20)))
+ engine->context_size = 0;
+ if (engine->context_size)
+ DRIVER_CAPS(dev_priv)->has_logical_contexts = true;
+
+ /* Nothing to do here, execute in order of dependencies */
+ engine->schedule = NULL;
+
+ seqlock_init(&engine->stats.lock);
+
+ ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
+
+ /* Scrub mmio state on takeover */
+ intel_engine_sanitize_mmio(engine);
+
+ dev_priv->engine_class[info->class][info->instance] = engine;
+ dev_priv->engine[id] = engine;
+ return 0;
+}
+
+/**
+ * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
+ * @dev_priv: i915 device private
+ *
+ * Return: non-zero if the initialization failed.
+ */
+int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
+{
+ struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
+ const unsigned int engine_mask = INTEL_INFO(dev_priv)->engine_mask;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ unsigned int mask = 0;
+ unsigned int i;
+ int err;
+
+ WARN_ON(engine_mask == 0);
+ WARN_ON(engine_mask &
+ GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES));
+
+ if (i915_inject_load_failure())
+ return -ENODEV;
+
+ for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
+ if (!HAS_ENGINE(dev_priv, i))
+ continue;
+
+ err = intel_engine_setup(dev_priv, i);
+ if (err)
+ goto cleanup;
+
+ mask |= BIT(i);
+ }
+
+ /*
+ * Catch failures to update intel_engines table when the new engines
+ * are added to the driver by a warning and disabling the forgotten
+ * engines.
+ */
+ if (WARN_ON(mask != engine_mask))
+ device_info->engine_mask = mask;
+
+ /* We always presume we have at least RCS available for later probing */
+ if (WARN_ON(!HAS_ENGINE(dev_priv, RCS0))) {
+ err = -ENODEV;
+ goto cleanup;
+ }
+
+ RUNTIME_INFO(dev_priv)->num_engines = hweight32(mask);
+
+ i915_check_and_clear_faults(dev_priv);
+
+ return 0;
+
+cleanup:
+ for_each_engine(engine, dev_priv, id)
+ kfree(engine);
+ return err;
+}
+
+/**
+ * intel_engines_init() - init the Engine Command Streamers
+ * @dev_priv: i915 device private
+ *
+ * Return: non-zero if the initialization failed.
+ */
+int intel_engines_init(struct drm_i915_private *dev_priv)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id, err_id;
+ int err;
+
+ for_each_engine(engine, dev_priv, id) {
+ const struct engine_class_info *class_info =
+ &intel_engine_classes[engine->class];
+ int (*init)(struct intel_engine_cs *engine);
+
+ if (HAS_EXECLISTS(dev_priv))
+ init = class_info->init_execlists;
+ else
+ init = class_info->init_legacy;
+
+ err = -EINVAL;
+ err_id = id;
+
+ if (GEM_DEBUG_WARN_ON(!init))
+ goto cleanup;
+
+ err = init(engine);
+ if (err)
+ goto cleanup;
+
+ GEM_BUG_ON(!engine->submit_request);
+ }
+
+ return 0;
+
+cleanup:
+ for_each_engine(engine, dev_priv, id) {
+ if (id >= err_id) {
+ kfree(engine);
+ dev_priv->engine[id] = NULL;
+ } else {
+ dev_priv->gt.cleanup_engine(engine);
+ }
+ }
+ return err;
+}
+
+static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
+{
+ i915_gem_batch_pool_init(&engine->batch_pool, engine);
+}
+
+static void intel_engine_init_execlist(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+
+ execlists->port_mask = 1;
+ GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
+ GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
+
+ execlists->queue_priority_hint = INT_MIN;
+ execlists->queue = RB_ROOT_CACHED;
+}
+
+static void cleanup_status_page(struct intel_engine_cs *engine)
+{
+ struct i915_vma *vma;
+
+ /* Prevent writes into HWSP after returning the page to the system */
+ intel_engine_set_hwsp_writemask(engine, ~0u);
+
+ vma = fetch_and_zero(&engine->status_page.vma);
+ if (!vma)
+ return;
+
+ if (!HWS_NEEDS_PHYSICAL(engine->i915))
+ i915_vma_unpin(vma);
+
+ i915_gem_object_unpin_map(vma->obj);
+ __i915_gem_object_release_unless_active(vma->obj);
+}
+
+static int pin_ggtt_status_page(struct intel_engine_cs *engine,
+ struct i915_vma *vma)
+{
+ unsigned int flags;
+
+ flags = PIN_GLOBAL;
+ if (!HAS_LLC(engine->i915))
+ /*
+ * On g33, we cannot place HWS above 256MiB, so
+ * restrict its pinning to the low mappable arena.
+ * Though this restriction is not documented for
+ * gen4, gen5, or byt, they also behave similarly
+ * and hang if the HWS is placed at the top of the
+ * GTT. To generalise, it appears that all !llc
+ * platforms have issues with us placing the HWS
+ * above the mappable region (even though we never
+ * actually map it).
+ */
+ flags |= PIN_MAPPABLE;
+ else
+ flags |= PIN_HIGH;
+
+ return i915_vma_pin(vma, 0, 0, flags);
+}
+
+static int init_status_page(struct intel_engine_cs *engine)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ void *vaddr;
+ int ret;
+
+ /*
+ * Though the HWS register does support 36bit addresses, historically
+ * we have had hangs and corruption reported due to wild writes if
+ * the HWS is placed above 4G. We only allow objects to be allocated
+ * in GFP_DMA32 for i965, and no earlier physical address users had
+ * access to more than 4G.
+ */
+ obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
+ if (IS_ERR(obj)) {
+ DRM_ERROR("Failed to allocate status page\n");
+ return PTR_ERR(obj);
+ }
+
+ i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
+
+ vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto err;
+ }
+
+ vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+ if (IS_ERR(vaddr)) {
+ ret = PTR_ERR(vaddr);
+ goto err;
+ }
+
+ engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
+ engine->status_page.vma = vma;
+
+ if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
+ ret = pin_ggtt_status_page(engine, vma);
+ if (ret)
+ goto err_unpin;
+ }
+
+ return 0;
+
+err_unpin:
+ i915_gem_object_unpin_map(obj);
+err:
+ i915_gem_object_put(obj);
+ return ret;
+}
+
+/**
+ * intel_engines_setup_common - setup engine state not requiring hw access
+ * @engine: Engine to setup.
+ *
+ * Initializes @engine@ structure members shared between legacy and execlists
+ * submission modes which do not require hardware access.
+ *
+ * Typically done early in the submission mode specific engine setup stage.
+ */
+int intel_engine_setup_common(struct intel_engine_cs *engine)
+{
+ int err;
+
+ err = init_status_page(engine);
+ if (err)
+ return err;
+
+ err = i915_timeline_init(engine->i915,
+ &engine->timeline,
+ engine->status_page.vma);
+ if (err)
+ goto err_hwsp;
+
+ i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
+
+ intel_engine_init_breadcrumbs(engine);
+ intel_engine_init_execlist(engine);
+ intel_engine_init_hangcheck(engine);
+ intel_engine_init_batch_pool(engine);
+ intel_engine_init_cmd_parser(engine);
+
+ /* Use the whole device by default */
+ engine->sseu =
+ intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);
+
+ return 0;
+
+err_hwsp:
+ cleanup_status_page(engine);
+ return err;
+}
+
+void intel_engines_set_scheduler_caps(struct drm_i915_private *i915)
+{
+ static const struct {
+ u8 engine;
+ u8 sched;
+ } map[] = {
+#define MAP(x, y) { ilog2(I915_ENGINE_HAS_##x), ilog2(I915_SCHEDULER_CAP_##y) }
+ MAP(PREEMPTION, PREEMPTION),
+ MAP(SEMAPHORES, SEMAPHORES),
+#undef MAP
+ };
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ u32 enabled, disabled;
+
+ enabled = 0;
+ disabled = 0;
+ for_each_engine(engine, i915, id) { /* all engines must agree! */
+ int i;
+
+ if (engine->schedule)
+ enabled |= (I915_SCHEDULER_CAP_ENABLED |
+ I915_SCHEDULER_CAP_PRIORITY);
+ else
+ disabled |= (I915_SCHEDULER_CAP_ENABLED |
+ I915_SCHEDULER_CAP_PRIORITY);
+
+ for (i = 0; i < ARRAY_SIZE(map); i++) {
+ if (engine->flags & BIT(map[i].engine))
+ enabled |= BIT(map[i].sched);
+ else
+ disabled |= BIT(map[i].sched);
+ }
+ }
+
+ i915->caps.scheduler = enabled & ~disabled;
+ if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_ENABLED))
+ i915->caps.scheduler = 0;
+}
+
+struct measure_breadcrumb {
+ struct i915_request rq;
+ struct i915_timeline timeline;
+ struct intel_ring ring;
+ u32 cs[1024];
+};
+
+static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
+{
+ struct measure_breadcrumb *frame;
+ int dw = -ENOMEM;
+
+ GEM_BUG_ON(!engine->i915->gt.scratch);
+
+ frame = kzalloc(sizeof(*frame), GFP_KERNEL);
+ if (!frame)
+ return -ENOMEM;
+
+ if (i915_timeline_init(engine->i915,
+ &frame->timeline,
+ engine->status_page.vma))
+ goto out_frame;
+
+ INIT_LIST_HEAD(&frame->ring.request_list);
+ frame->ring.timeline = &frame->timeline;
+ frame->ring.vaddr = frame->cs;
+ frame->ring.size = sizeof(frame->cs);
+ frame->ring.effective_size = frame->ring.size;
+ intel_ring_update_space(&frame->ring);
+
+ frame->rq.i915 = engine->i915;
+ frame->rq.engine = engine;
+ frame->rq.ring = &frame->ring;
+ frame->rq.timeline = &frame->timeline;
+
+ dw = i915_timeline_pin(&frame->timeline);
+ if (dw < 0)
+ goto out_timeline;
+
+ dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
+
+ i915_timeline_unpin(&frame->timeline);
+
+out_timeline:
+ i915_timeline_fini(&frame->timeline);
+out_frame:
+ kfree(frame);
+ return dw;
+}
+
+static int pin_context(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine,
+ struct intel_context **out)
+{
+ struct intel_context *ce;
+
+ ce = intel_context_pin(ctx, engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ *out = ce;
+ return 0;
+}
+
+/**
+ * intel_engines_init_common - initialize cengine state which might require hw access
+ * @engine: Engine to initialize.
+ *
+ * Initializes @engine@ structure members shared between legacy and execlists
+ * submission modes which do require hardware access.
+ *
+ * Typcally done at later stages of submission mode specific engine setup.
+ *
+ * Returns zero on success or an error code on failure.
+ */
+int intel_engine_init_common(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *i915 = engine->i915;
+ int ret;
+
+ /* We may need to do things with the shrinker which
+ * require us to immediately switch back to the default
+ * context. This can cause a problem as pinning the
+ * default context also requires GTT space which may not
+ * be available. To avoid this we always pin the default
+ * context.
+ */
+ ret = pin_context(i915->kernel_context, engine,
+ &engine->kernel_context);
+ if (ret)
+ return ret;
+
+ /*
+ * Similarly the preempt context must always be available so that
+ * we can interrupt the engine at any time. However, as preemption
+ * is optional, we allow it to fail.
+ */
+ if (i915->preempt_context)
+ pin_context(i915->preempt_context, engine,
+ &engine->preempt_context);
+
+ ret = measure_breadcrumb_dw(engine);
+ if (ret < 0)
+ goto err_unpin;
+
+ engine->emit_fini_breadcrumb_dw = ret;
+
+ engine->set_default_submission(engine);
+
+ return 0;
+
+err_unpin:
+ if (engine->preempt_context)
+ intel_context_unpin(engine->preempt_context);
+ intel_context_unpin(engine->kernel_context);
+ return ret;
+}
+
+void intel_gt_resume(struct drm_i915_private *i915)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /*
+ * After resume, we may need to poke into the pinned kernel
+ * contexts to paper over any damage caused by the sudden suspend.
+ * Only the kernel contexts should remain pinned over suspend,
+ * allowing us to fixup the user contexts on their first pin.
+ */
+ for_each_engine(engine, i915, id) {
+ struct intel_context *ce;
+
+ ce = engine->kernel_context;
+ if (ce)
+ ce->ops->reset(ce);
+
+ ce = engine->preempt_context;
+ if (ce)
+ ce->ops->reset(ce);
+ }
+}
+
+/**
+ * intel_engines_cleanup_common - cleans up the engine state created by
+ * the common initiailizers.
+ * @engine: Engine to cleanup.
+ *
+ * This cleans up everything created by the common helpers.
+ */
+void intel_engine_cleanup_common(struct intel_engine_cs *engine)
+{
+ cleanup_status_page(engine);
+
+ intel_engine_fini_breadcrumbs(engine);
+ intel_engine_cleanup_cmd_parser(engine);
+ i915_gem_batch_pool_fini(&engine->batch_pool);
+
+ if (engine->default_state)
+ i915_gem_object_put(engine->default_state);
+
+ if (engine->preempt_context)
+ intel_context_unpin(engine->preempt_context);
+ intel_context_unpin(engine->kernel_context);
+
+ i915_timeline_fini(&engine->timeline);
+
+ intel_wa_list_free(&engine->ctx_wa_list);
+ intel_wa_list_free(&engine->wa_list);
+ intel_wa_list_free(&engine->whitelist);
+}
+
+u64 intel_engine_get_active_head(const struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *i915 = engine->i915;
+
+ u64 acthd;
+
+ if (INTEL_GEN(i915) >= 8)
+ acthd = ENGINE_READ64(engine, RING_ACTHD, RING_ACTHD_UDW);
+ else if (INTEL_GEN(i915) >= 4)
+ acthd = ENGINE_READ(engine, RING_ACTHD);
+ else
+ acthd = ENGINE_READ(engine, ACTHD);
+
+ return acthd;
+}
+
+u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)
+{
+ u64 bbaddr;
+
+ if (INTEL_GEN(engine->i915) >= 8)
+ bbaddr = ENGINE_READ64(engine, RING_BBADDR, RING_BBADDR_UDW);
+ else
+ bbaddr = ENGINE_READ(engine, RING_BBADDR);
+
+ return bbaddr;
+}
+
+int intel_engine_stop_cs(struct intel_engine_cs *engine)
+{
+ struct intel_uncore *uncore = engine->uncore;
+ const u32 base = engine->mmio_base;
+ const i915_reg_t mode = RING_MI_MODE(base);
+ int err;
+
+ if (INTEL_GEN(engine->i915) < 3)
+ return -ENODEV;
+
+ GEM_TRACE("%s\n", engine->name);
+
+ intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
+
+ err = 0;
+ if (__intel_wait_for_register_fw(uncore,
+ mode, MODE_IDLE, MODE_IDLE,
+ 1000, 0,
+ NULL)) {
+ GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name);
+ err = -ETIMEDOUT;
+ }
+
+ /* A final mmio read to let GPU writes be hopefully flushed to memory */
+ intel_uncore_posting_read_fw(uncore, mode);
+
+ return err;
+}
+
+void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
+{
+ GEM_TRACE("%s\n", engine->name);
+
+ ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
+}
+
+const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
+{
+ switch (type) {
+ case I915_CACHE_NONE: return " uncached";
+ case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped";
+ case I915_CACHE_L3_LLC: return " L3+LLC";
+ case I915_CACHE_WT: return " WT";
+ default: return "";
+ }
+}
+
+u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv)
+{
+ const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
+ u32 mcr_s_ss_select;
+ u32 slice = fls(sseu->slice_mask);
+ u32 subslice = fls(sseu->subslice_mask[slice]);
+
+ if (IS_GEN(dev_priv, 10))
+ mcr_s_ss_select = GEN8_MCR_SLICE(slice) |
+ GEN8_MCR_SUBSLICE(subslice);
+ else if (INTEL_GEN(dev_priv) >= 11)
+ mcr_s_ss_select = GEN11_MCR_SLICE(slice) |
+ GEN11_MCR_SUBSLICE(subslice);
+ else
+ mcr_s_ss_select = 0;
+
+ return mcr_s_ss_select;
+}
+
+static inline u32
+read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
+ int subslice, i915_reg_t reg)
+{
+ struct intel_uncore *uncore = &dev_priv->uncore;
+ u32 mcr_slice_subslice_mask;
+ u32 mcr_slice_subslice_select;
+ u32 default_mcr_s_ss_select;
+ u32 mcr;
+ u32 ret;
+ enum forcewake_domains fw_domains;
+
+ if (INTEL_GEN(dev_priv) >= 11) {
+ mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
+ GEN11_MCR_SUBSLICE_MASK;
+ mcr_slice_subslice_select = GEN11_MCR_SLICE(slice) |
+ GEN11_MCR_SUBSLICE(subslice);
+ } else {
+ mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
+ GEN8_MCR_SUBSLICE_MASK;
+ mcr_slice_subslice_select = GEN8_MCR_SLICE(slice) |
+ GEN8_MCR_SUBSLICE(subslice);
+ }
+
+ default_mcr_s_ss_select = intel_calculate_mcr_s_ss_select(dev_priv);
+
+ fw_domains = intel_uncore_forcewake_for_reg(uncore, reg,
+ FW_REG_READ);
+ fw_domains |= intel_uncore_forcewake_for_reg(uncore,
+ GEN8_MCR_SELECTOR,
+ FW_REG_READ | FW_REG_WRITE);
+
+ spin_lock_irq(&uncore->lock);
+ intel_uncore_forcewake_get__locked(uncore, fw_domains);
+
+ mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
+
+ WARN_ON_ONCE((mcr & mcr_slice_subslice_mask) !=
+ default_mcr_s_ss_select);
+
+ mcr &= ~mcr_slice_subslice_mask;
+ mcr |= mcr_slice_subslice_select;
+ intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
+
+ ret = intel_uncore_read_fw(uncore, reg);
+
+ mcr &= ~mcr_slice_subslice_mask;
+ mcr |= default_mcr_s_ss_select;
+
+ intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
+
+ intel_uncore_forcewake_put__locked(uncore, fw_domains);
+ spin_unlock_irq(&uncore->lock);
+
+ return ret;
+}
+
+/* NB: please notice the memset */
+void intel_engine_get_instdone(struct intel_engine_cs *engine,
+ struct intel_instdone *instdone)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+ struct intel_uncore *uncore = engine->uncore;
+ u32 mmio_base = engine->mmio_base;
+ int slice;
+ int subslice;
+
+ memset(instdone, 0, sizeof(*instdone));
+
+ switch (INTEL_GEN(dev_priv)) {
+ default:
+ instdone->instdone =
+ intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
+
+ if (engine->id != RCS0)
+ break;
+
+ instdone->slice_common =
+ intel_uncore_read(uncore, GEN7_SC_INSTDONE);
+ for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
+ instdone->sampler[slice][subslice] =
+ read_subslice_reg(dev_priv, slice, subslice,
+ GEN7_SAMPLER_INSTDONE);
+ instdone->row[slice][subslice] =
+ read_subslice_reg(dev_priv, slice, subslice,
+ GEN7_ROW_INSTDONE);
+ }
+ break;
+ case 7:
+ instdone->instdone =
+ intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
+
+ if (engine->id != RCS0)
+ break;
+
+ instdone->slice_common =
+ intel_uncore_read(uncore, GEN7_SC_INSTDONE);
+ instdone->sampler[0][0] =
+ intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE);
+ instdone->row[0][0] =
+ intel_uncore_read(uncore, GEN7_ROW_INSTDONE);
+
+ break;
+ case 6:
+ case 5:
+ case 4:
+ instdone->instdone =
+ intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
+ if (engine->id == RCS0)
+ /* HACK: Using the wrong struct member */
+ instdone->slice_common =
+ intel_uncore_read(uncore, GEN4_INSTDONE1);
+ break;
+ case 3:
+ case 2:
+ instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE);
+ break;
+ }
+}
+
+static bool ring_is_idle(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+ intel_wakeref_t wakeref;
+ bool idle = true;
+
+ if (I915_SELFTEST_ONLY(!engine->mmio_base))
+ return true;
+
+ /* If the whole device is asleep, the engine must be idle */
+ wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
+ if (!wakeref)
+ return true;
+
+ /* First check that no commands are left in the ring */
+ if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) !=
+ (ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR))
+ idle = false;
+
+ /* No bit for gen2, so assume the CS parser is idle */
+ if (INTEL_GEN(dev_priv) > 2 &&
+ !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE))
+ idle = false;
+
+ intel_runtime_pm_put(dev_priv, wakeref);
+
+ return idle;
+}
+
+/**
+ * intel_engine_is_idle() - Report if the engine has finished process all work
+ * @engine: the intel_engine_cs
+ *
+ * Return true if there are no requests pending, nothing left to be submitted
+ * to hardware, and that the engine is idle.
+ */
+bool intel_engine_is_idle(struct intel_engine_cs *engine)
+{
+ /* More white lies, if wedged, hw state is inconsistent */
+ if (i915_reset_failed(engine->i915))
+ return true;
+
+ /* Waiting to drain ELSP? */
+ if (READ_ONCE(engine->execlists.active)) {
+ struct tasklet_struct *t = &engine->execlists.tasklet;
+
+ local_bh_disable();
+ if (tasklet_trylock(t)) {
+ /* Must wait for any GPU reset in progress. */
+ if (__tasklet_is_enabled(t))
+ t->func(t->data);
+ tasklet_unlock(t);
+ }
+ local_bh_enable();
+
+ /* Otherwise flush the tasklet if it was on another cpu */
+ tasklet_unlock_wait(t);
+
+ if (READ_ONCE(engine->execlists.active))
+ return false;
+ }
+
+ /* ELSP is empty, but there are ready requests? E.g. after reset */
+ if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root))
+ return false;
+
+ /* Ring stopped? */
+ return ring_is_idle(engine);
+}
+
+bool intel_engines_are_idle(struct drm_i915_private *i915)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /*
+ * If the driver is wedged, HW state may be very inconsistent and
+ * report that it is still busy, even though we have stopped using it.
+ */
+ if (i915_reset_failed(i915))
+ return true;
+
+ /* Already parked (and passed an idleness test); must still be idle */
+ if (!READ_ONCE(i915->gt.awake))
+ return true;
+
+ for_each_engine(engine, i915, id) {
+ if (!intel_engine_is_idle(engine))
+ return false;
+ }
+
+ return true;
+}
+
+void intel_engines_reset_default_submission(struct drm_i915_private *i915)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine(engine, i915, id)
+ engine->set_default_submission(engine);
+}
+
+static bool reset_engines(struct drm_i915_private *i915)
+{
+ if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
+ return false;
+
+ return intel_gpu_reset(i915, ALL_ENGINES) == 0;
+}
+
+/**
+ * intel_engines_sanitize: called after the GPU has lost power
+ * @i915: the i915 device
+ * @force: ignore a failed reset and sanitize engine state anyway
+ *
+ * Anytime we reset the GPU, either with an explicit GPU reset or through a
+ * PCI power cycle, the GPU loses state and we must reset our state tracking
+ * to match. Note that calling intel_engines_sanitize() if the GPU has not
+ * been reset results in much confusion!
+ */
+void intel_engines_sanitize(struct drm_i915_private *i915, bool force)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ GEM_TRACE("\n");
+
+ if (!reset_engines(i915) && !force)
+ return;
+
+ for_each_engine(engine, i915, id)
+ intel_engine_reset(engine, false);
+}
+
+/**
+ * intel_engines_park: called when the GT is transitioning from busy->idle
+ * @i915: the i915 device
+ *
+ * The GT is now idle and about to go to sleep (maybe never to wake again?).
+ * Time for us to tidy and put away our toys (release resources back to the
+ * system).
+ */
+void intel_engines_park(struct drm_i915_private *i915)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine(engine, i915, id) {
+ /* Flush the residual irq tasklets first. */
+ intel_engine_disarm_breadcrumbs(engine);
+ tasklet_kill(&engine->execlists.tasklet);
+
+ /*
+ * We are committed now to parking the engines, make sure there
+ * will be no more interrupts arriving later and the engines
+ * are truly idle.
+ */
+ if (wait_for(intel_engine_is_idle(engine), 10)) {
+ struct drm_printer p = drm_debug_printer(__func__);
+
+ dev_err(i915->drm.dev,
+ "%s is not idle before parking\n",
+ engine->name);
+ intel_engine_dump(engine, &p, NULL);
+ }
+
+ /* Must be reset upon idling, or we may miss the busy wakeup. */
+ GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
+
+ if (engine->park)
+ engine->park(engine);
+
+ if (engine->pinned_default_state) {
+ i915_gem_object_unpin_map(engine->default_state);
+ engine->pinned_default_state = NULL;
+ }
+
+ i915_gem_batch_pool_fini(&engine->batch_pool);
+ engine->execlists.no_priolist = false;
+ }
+
+ i915->gt.active_engines = 0;
+}
+
+/**
+ * intel_engines_unpark: called when the GT is transitioning from idle->busy
+ * @i915: the i915 device
+ *
+ * The GT was idle and now about to fire up with some new user requests.
+ */
+void intel_engines_unpark(struct drm_i915_private *i915)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine(engine, i915, id) {
+ void *map;
+
+ /* Pin the default state for fast resets from atomic context. */
+ map = NULL;
+ if (engine->default_state)
+ map = i915_gem_object_pin_map(engine->default_state,
+ I915_MAP_WB);
+ if (!IS_ERR_OR_NULL(map))
+ engine->pinned_default_state = map;
+
+ if (engine->unpark)
+ engine->unpark(engine);
+
+ intel_engine_init_hangcheck(engine);
+ }
+}
+
+/**
+ * intel_engine_lost_context: called when the GPU is reset into unknown state
+ * @engine: the engine
+ *
+ * We have either reset the GPU or otherwise about to lose state tracking of
+ * the current GPU logical state (e.g. suspend). On next use, it is therefore
+ * imperative that we make no presumptions about the current state and load
+ * from scratch.
+ */
+void intel_engine_lost_context(struct intel_engine_cs *engine)
+{
+ struct intel_context *ce;
+
+ lockdep_assert_held(&engine->i915->drm.struct_mutex);
+
+ ce = fetch_and_zero(&engine->last_retired_context);
+ if (ce)
+ intel_context_unpin(ce);
+}
+
+bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
+{
+ switch (INTEL_GEN(engine->i915)) {
+ case 2:
+ return false; /* uses physical not virtual addresses */
+ case 3:
+ /* maybe only uses physical not virtual addresses */
+ return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915));
+ case 6:
+ return engine->class != VIDEO_DECODE_CLASS; /* b0rked */
+ default:
+ return true;
+ }
+}
+
+unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ unsigned int which;
+
+ which = 0;
+ for_each_engine(engine, i915, id)
+ if (engine->default_state)
+ which |= BIT(engine->uabi_class);
+
+ return which;
+}
+
+static int print_sched_attr(struct drm_i915_private *i915,
+ const struct i915_sched_attr *attr,
+ char *buf, int x, int len)
+{
+ if (attr->priority == I915_PRIORITY_INVALID)
+ return x;
+
+ x += snprintf(buf + x, len - x,
+ " prio=%d", attr->priority);
+
+ return x;
+}
+
+static void print_request(struct drm_printer *m,
+ struct i915_request *rq,
+ const char *prefix)
+{
+ const char *name = rq->fence.ops->get_timeline_name(&rq->fence);
+ char buf[80] = "";
+ int x = 0;
+
+ x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf));
+
+ drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n",
+ prefix,
+ rq->fence.context, rq->fence.seqno,
+ i915_request_completed(rq) ? "!" :
+ i915_request_started(rq) ? "*" :
+ "",
+ test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+ &rq->fence.flags) ? "+" : "",
+ buf,
+ jiffies_to_msecs(jiffies - rq->emitted_jiffies),
+ name);
+}
+
+static void hexdump(struct drm_printer *m, const void *buf, size_t len)
+{
+ const size_t rowsize = 8 * sizeof(u32);
+ const void *prev = NULL;
+ bool skip = false;
+ size_t pos;
+
+ for (pos = 0; pos < len; pos += rowsize) {
+ char line[128];
+
+ if (prev && !memcmp(prev, buf + pos, rowsize)) {
+ if (!skip) {
+ drm_printf(m, "*\n");
+ skip = true;
+ }
+ continue;
+ }
+
+ WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
+ rowsize, sizeof(u32),
+ line, sizeof(line),
+ false) >= sizeof(line));
+ drm_printf(m, "[%04zx] %s\n", pos, line);
+
+ prev = buf + pos;
+ skip = false;
+ }
+}
+
+static void intel_engine_print_registers(const struct intel_engine_cs *engine,
+ struct drm_printer *m)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+ const struct intel_engine_execlists * const execlists =
+ &engine->execlists;
+ u64 addr;
+
+ if (engine->id == RCS0 && IS_GEN_RANGE(dev_priv, 4, 7))
+ drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
+ drm_printf(m, "\tRING_START: 0x%08x\n",
+ ENGINE_READ(engine, RING_START));
+ drm_printf(m, "\tRING_HEAD: 0x%08x\n",
+ ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR);
+ drm_printf(m, "\tRING_TAIL: 0x%08x\n",
+ ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR);
+ drm_printf(m, "\tRING_CTL: 0x%08x%s\n",
+ ENGINE_READ(engine, RING_CTL),
+ ENGINE_READ(engine, RING_CTL) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : "");
+ if (INTEL_GEN(engine->i915) > 2) {
+ drm_printf(m, "\tRING_MODE: 0x%08x%s\n",
+ ENGINE_READ(engine, RING_MI_MODE),
+ ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : "");
+ }
+
+ if (INTEL_GEN(dev_priv) >= 6) {
+ drm_printf(m, "\tRING_IMR: %08x\n",
+ ENGINE_READ(engine, RING_IMR));
+ }
+
+ addr = intel_engine_get_active_head(engine);
+ drm_printf(m, "\tACTHD: 0x%08x_%08x\n",
+ upper_32_bits(addr), lower_32_bits(addr));
+ addr = intel_engine_get_last_batch_head(engine);
+ drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
+ upper_32_bits(addr), lower_32_bits(addr));
+ if (INTEL_GEN(dev_priv) >= 8)
+ addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW);
+ else if (INTEL_GEN(dev_priv) >= 4)
+ addr = ENGINE_READ(engine, RING_DMA_FADD);
+ else
+ addr = ENGINE_READ(engine, DMA_FADD_I8XX);
+ drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n",
+ upper_32_bits(addr), lower_32_bits(addr));
+ if (INTEL_GEN(dev_priv) >= 4) {
+ drm_printf(m, "\tIPEIR: 0x%08x\n",
+ ENGINE_READ(engine, RING_IPEIR));
+ drm_printf(m, "\tIPEHR: 0x%08x\n",
+ ENGINE_READ(engine, RING_IPEHR));
+ } else {
+ drm_printf(m, "\tIPEIR: 0x%08x\n", ENGINE_READ(engine, IPEIR));
+ drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
+ }
+
+ if (HAS_EXECLISTS(dev_priv)) {
+ const u32 *hws =
+ &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
+ const u8 num_entries = execlists->csb_size;
+ unsigned int idx;
+ u8 read, write;
+
+ drm_printf(m, "\tExeclist status: 0x%08x %08x, entries %u\n",
+ ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
+ ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
+ num_entries);
+
+ read = execlists->csb_head;
+ write = READ_ONCE(*execlists->csb_write);
+
+ drm_printf(m, "\tExeclist CSB read %d, write %d, tasklet queued? %s (%s)\n",
+ read, write,
+ yesno(test_bit(TASKLET_STATE_SCHED,
+ &engine->execlists.tasklet.state)),
+ enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
+ if (read >= num_entries)
+ read = 0;
+ if (write >= num_entries)
+ write = 0;
+ if (read > write)
+ write += num_entries;
+ while (read < write) {
+ idx = ++read % num_entries;
+ drm_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
+ idx, hws[idx * 2], hws[idx * 2 + 1]);
+ }
+
+ rcu_read_lock();
+ for (idx = 0; idx < execlists_num_ports(execlists); idx++) {
+ struct i915_request *rq;
+ unsigned int count;
+
+ rq = port_unpack(&execlists->port[idx], &count);
+ if (rq) {
+ char hdr[80];
+
+ snprintf(hdr, sizeof(hdr),
+ "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
+ idx, count,
+ i915_ggtt_offset(rq->ring->vma),
+ rq->timeline->hwsp_offset,
+ hwsp_seqno(rq));
+ print_request(m, rq, hdr);
+ } else {
+ drm_printf(m, "\t\tELSP[%d] idle\n", idx);
+ }
+ }
+ drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active);
+ rcu_read_unlock();
+ } else if (INTEL_GEN(dev_priv) > 6) {
+ drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
+ ENGINE_READ(engine, RING_PP_DIR_BASE));
+ drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n",
+ ENGINE_READ(engine, RING_PP_DIR_BASE_READ));
+ drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n",
+ ENGINE_READ(engine, RING_PP_DIR_DCLV));
+ }
+}
+
+static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
+{
+ void *ring;
+ int size;
+
+ drm_printf(m,
+ "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
+ rq->head, rq->postfix, rq->tail,
+ rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
+ rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
+
+ size = rq->tail - rq->head;
+ if (rq->tail < rq->head)
+ size += rq->ring->size;
+
+ ring = kmalloc(size, GFP_ATOMIC);
+ if (ring) {
+ const void *vaddr = rq->ring->vaddr;
+ unsigned int head = rq->head;
+ unsigned int len = 0;
+
+ if (rq->tail < head) {
+ len = rq->ring->size - head;
+ memcpy(ring, vaddr + head, len);
+ head = 0;
+ }
+ memcpy(ring + len, vaddr + head, size - len);
+
+ hexdump(m, ring, size);
+ kfree(ring);
+ }
+}
+
+void intel_engine_dump(struct intel_engine_cs *engine,
+ struct drm_printer *m,
+ const char *header, ...)
+{
+ struct i915_gpu_error * const error = &engine->i915->gpu_error;
+ struct i915_request *rq;
+ intel_wakeref_t wakeref;
+
+ if (header) {
+ va_list ap;
+
+ va_start(ap, header);
+ drm_vprintf(m, header, &ap);
+ va_end(ap);
+ }
+
+ if (i915_reset_failed(engine->i915))
+ drm_printf(m, "*** WEDGED ***\n");
+
+ drm_printf(m, "\tHangcheck %x:%x [%d ms]\n",
+ engine->hangcheck.last_seqno,
+ engine->hangcheck.next_seqno,
+ jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
+ drm_printf(m, "\tReset count: %d (global %d)\n",
+ i915_reset_engine_count(error, engine),
+ i915_reset_count(error));
+
+ rcu_read_lock();
+
+ drm_printf(m, "\tRequests:\n");
+
+ rq = list_first_entry(&engine->timeline.requests,
+ struct i915_request, link);
+ if (&rq->link != &engine->timeline.requests)
+ print_request(m, rq, "\t\tfirst ");
+
+ rq = list_last_entry(&engine->timeline.requests,
+ struct i915_request, link);
+ if (&rq->link != &engine->timeline.requests)
+ print_request(m, rq, "\t\tlast ");
+
+ rq = intel_engine_find_active_request(engine);
+ if (rq) {
+ print_request(m, rq, "\t\tactive ");
+
+ drm_printf(m, "\t\tring->start: 0x%08x\n",
+ i915_ggtt_offset(rq->ring->vma));
+ drm_printf(m, "\t\tring->head: 0x%08x\n",
+ rq->ring->head);
+ drm_printf(m, "\t\tring->tail: 0x%08x\n",
+ rq->ring->tail);
+ drm_printf(m, "\t\tring->emit: 0x%08x\n",
+ rq->ring->emit);
+ drm_printf(m, "\t\tring->space: 0x%08x\n",
+ rq->ring->space);
+ drm_printf(m, "\t\tring->hwsp: 0x%08x\n",
+ rq->timeline->hwsp_offset);
+
+ print_request_ring(m, rq);
+ }
+
+ rcu_read_unlock();
+
+ wakeref = intel_runtime_pm_get_if_in_use(engine->i915);
+ if (wakeref) {
+ intel_engine_print_registers(engine, m);
+ intel_runtime_pm_put(engine->i915, wakeref);
+ } else {
+ drm_printf(m, "\tDevice is asleep; skipping register dump\n");
+ }
+
+ intel_execlists_show_requests(engine, m, print_request, 8);
+
+ drm_printf(m, "HWSP:\n");
+ hexdump(m, engine->status_page.addr, PAGE_SIZE);
+
+ drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
+
+ intel_engine_print_breadcrumbs(engine, m);
+}
+
+static u8 user_class_map[] = {
+ [I915_ENGINE_CLASS_RENDER] = RENDER_CLASS,
+ [I915_ENGINE_CLASS_COPY] = COPY_ENGINE_CLASS,
+ [I915_ENGINE_CLASS_VIDEO] = VIDEO_DECODE_CLASS,
+ [I915_ENGINE_CLASS_VIDEO_ENHANCE] = VIDEO_ENHANCEMENT_CLASS,
+};
+
+struct intel_engine_cs *
+intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
+{
+ if (class >= ARRAY_SIZE(user_class_map))
+ return NULL;
+
+ class = user_class_map[class];
+
+ GEM_BUG_ON(class > MAX_ENGINE_CLASS);
+
+ if (instance > MAX_ENGINE_INSTANCE)
+ return NULL;
+
+ return i915->engine_class[class][instance];
+}
+
+/**
+ * intel_enable_engine_stats() - Enable engine busy tracking on engine
+ * @engine: engine to enable stats collection
+ *
+ * Start collecting the engine busyness data for @engine.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int intel_enable_engine_stats(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists *execlists = &engine->execlists;
+ unsigned long flags;
+ int err = 0;
+
+ if (!intel_engine_supports_stats(engine))
+ return -ENODEV;
+
+ spin_lock_irqsave(&engine->timeline.lock, flags);
+ write_seqlock(&engine->stats.lock);
+
+ if (unlikely(engine->stats.enabled == ~0)) {
+ err = -EBUSY;
+ goto unlock;
+ }
+
+ if (engine->stats.enabled++ == 0) {
+ const struct execlist_port *port = execlists->port;
+ unsigned int num_ports = execlists_num_ports(execlists);
+
+ engine->stats.enabled_at = ktime_get();
+
+ /* XXX submission method oblivious? */
+ while (num_ports-- && port_isset(port)) {
+ engine->stats.active++;
+ port++;
+ }
+
+ if (engine->stats.active)
+ engine->stats.start = engine->stats.enabled_at;
+ }
+
+unlock:
+ write_sequnlock(&engine->stats.lock);
+ spin_unlock_irqrestore(&engine->timeline.lock, flags);
+
+ return err;
+}
+
+static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
+{
+ ktime_t total = engine->stats.total;
+
+ /*
+ * If the engine is executing something at the moment
+ * add it to the total.
+ */
+ if (engine->stats.active)
+ total = ktime_add(total,
+ ktime_sub(ktime_get(), engine->stats.start));
+
+ return total;
+}
+
+/**
+ * intel_engine_get_busy_time() - Return current accumulated engine busyness
+ * @engine: engine to report on
+ *
+ * Returns accumulated time @engine was busy since engine stats were enabled.
+ */
+ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine)
+{
+ unsigned int seq;
+ ktime_t total;
+
+ do {
+ seq = read_seqbegin(&engine->stats.lock);
+ total = __intel_engine_get_busy_time(engine);
+ } while (read_seqretry(&engine->stats.lock, seq));
+
+ return total;
+}
+
+/**
+ * intel_disable_engine_stats() - Disable engine busy tracking on engine
+ * @engine: engine to disable stats collection
+ *
+ * Stops collecting the engine busyness data for @engine.
+ */
+void intel_disable_engine_stats(struct intel_engine_cs *engine)
+{
+ unsigned long flags;
+
+ if (!intel_engine_supports_stats(engine))
+ return;
+
+ write_seqlock_irqsave(&engine->stats.lock, flags);
+ WARN_ON_ONCE(engine->stats.enabled == 0);
+ if (--engine->stats.enabled == 0) {
+ engine->stats.total = __intel_engine_get_busy_time(engine);
+ engine->stats.active = 0;
+ }
+ write_sequnlock_irqrestore(&engine->stats.lock, flags);
+}
+
+static bool match_ring(struct i915_request *rq)
+{
+ u32 ring = ENGINE_READ(rq->engine, RING_START);
+
+ return ring == i915_ggtt_offset(rq->ring->vma);
+}
+
+struct i915_request *
+intel_engine_find_active_request(struct intel_engine_cs *engine)
+{
+ struct i915_request *request, *active = NULL;
+ unsigned long flags;
+
+ /*
+ * We are called by the error capture, reset and to dump engine
+ * state at random points in time. In particular, note that neither is
+ * crucially ordered with an interrupt. After a hang, the GPU is dead
+ * and we assume that no more writes can happen (we waited long enough
+ * for all writes that were in transaction to be flushed) - adding an
+ * extra delay for a recent interrupt is pointless. Hence, we do
+ * not need an engine->irq_seqno_barrier() before the seqno reads.
+ * At all other times, we must assume the GPU is still running, but
+ * we only care about the snapshot of this moment.
+ */
+ spin_lock_irqsave(&engine->timeline.lock, flags);
+ list_for_each_entry(request, &engine->timeline.requests, link) {
+ if (i915_request_completed(request))
+ continue;
+
+ if (!i915_request_started(request))
+ break;
+
+ /* More than one preemptible request may match! */
+ if (!match_ring(request))
+ break;
+
+ active = request;
+ break;
+ }
+ spin_unlock_irqrestore(&engine->timeline.lock, flags);
+
+ return active;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_engine_cs.c"
+#endif
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_ENGINE_TYPES__
+#define __INTEL_ENGINE_TYPES__
+
+#include <linux/hashtable.h>
+#include <linux/irq_work.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/types.h>
+
+#include "i915_gem.h"
+#include "i915_gem_batch_pool.h"
+#include "i915_pmu.h"
+#include "i915_priolist_types.h"
+#include "i915_selftest.h"
+#include "i915_timeline_types.h"
+#include "intel_sseu.h"
+#include "intel_workarounds_types.h"
+
+#define I915_MAX_SLICES 3
+#define I915_MAX_SUBSLICES 8
+
+#define I915_CMD_HASH_ORDER 9
+
+struct dma_fence;
+struct drm_i915_reg_table;
+struct i915_gem_context;
+struct i915_request;
+struct i915_sched_attr;
+struct intel_uncore;
+
+typedef u8 intel_engine_mask_t;
+#define ALL_ENGINES ((intel_engine_mask_t)~0ul)
+
+struct intel_hw_status_page {
+ struct i915_vma *vma;
+ u32 *addr;
+};
+
+struct intel_instdone {
+ u32 instdone;
+ /* The following exist only in the RCS engine */
+ u32 slice_common;
+ u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES];
+ u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
+};
+
+struct intel_engine_hangcheck {
+ u64 acthd;
+ u32 last_seqno;
+ u32 next_seqno;
+ unsigned long action_timestamp;
+ struct intel_instdone instdone;
+};
+
+struct intel_ring {
+ struct kref ref;
+ struct i915_vma *vma;
+ void *vaddr;
+
+ struct i915_timeline *timeline;
+ struct list_head request_list;
+ struct list_head active_link;
+
+ u32 head;
+ u32 tail;
+ u32 emit;
+
+ u32 space;
+ u32 size;
+ u32 effective_size;
+};
+
+/*
+ * we use a single page to load ctx workarounds so all of these
+ * values are referred in terms of dwords
+ *
+ * struct i915_wa_ctx_bb:
+ * offset: specifies batch starting position, also helpful in case
+ * if we want to have multiple batches at different offsets based on
+ * some criteria. It is not a requirement at the moment but provides
+ * an option for future use.
+ * size: size of the batch in DWORDS
+ */
+struct i915_ctx_workarounds {
+ struct i915_wa_ctx_bb {
+ u32 offset;
+ u32 size;
+ } indirect_ctx, per_ctx;
+ struct i915_vma *vma;
+};
+
+#define I915_MAX_VCS 4
+#define I915_MAX_VECS 2
+
+/*
+ * Engine IDs definitions.
+ * Keep instances of the same type engine together.
+ */
+enum intel_engine_id {
+ RCS0 = 0,
+ BCS0,
+ VCS0,
+ VCS1,
+ VCS2,
+ VCS3,
+#define _VCS(n) (VCS0 + (n))
+ VECS0,
+ VECS1,
+#define _VECS(n) (VECS0 + (n))
+ I915_NUM_ENGINES
+};
+
+struct st_preempt_hang {
+ struct completion completion;
+ unsigned int count;
+ bool inject_hang;
+};
+
+/**
+ * struct intel_engine_execlists - execlist submission queue and port state
+ *
+ * The struct intel_engine_execlists represents the combined logical state of
+ * driver and the hardware state for execlist mode of submission.
+ */
+struct intel_engine_execlists {
+ /**
+ * @tasklet: softirq tasklet for bottom handler
+ */
+ struct tasklet_struct tasklet;
+
+ /**
+ * @default_priolist: priority list for I915_PRIORITY_NORMAL
+ */
+ struct i915_priolist default_priolist;
+
+ /**
+ * @no_priolist: priority lists disabled
+ */
+ bool no_priolist;
+
+ /**
+ * @submit_reg: gen-specific execlist submission register
+ * set to the ExecList Submission Port (elsp) register pre-Gen11 and to
+ * the ExecList Submission Queue Contents register array for Gen11+
+ */
+ u32 __iomem *submit_reg;
+
+ /**
+ * @ctrl_reg: the enhanced execlists control register, used to load the
+ * submit queue on the HW and to request preemptions to idle
+ */
+ u32 __iomem *ctrl_reg;
+
+ /**
+ * @port: execlist port states
+ *
+ * For each hardware ELSP (ExecList Submission Port) we keep
+ * track of the last request and the number of times we submitted
+ * that port to hw. We then count the number of times the hw reports
+ * a context completion or preemption. As only one context can
+ * be active on hw, we limit resubmission of context to port[0]. This
+ * is called Lite Restore, of the context.
+ */
+ struct execlist_port {
+ /**
+ * @request_count: combined request and submission count
+ */
+ struct i915_request *request_count;
+#define EXECLIST_COUNT_BITS 2
+#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
+#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
+#define port_set(p, packed) ((p)->request_count = (packed))
+#define port_isset(p) ((p)->request_count)
+#define port_index(p, execlists) ((p) - (execlists)->port)
+
+ /**
+ * @context_id: context ID for port
+ */
+ GEM_DEBUG_DECL(u32 context_id);
+
+#define EXECLIST_MAX_PORTS 2
+ } port[EXECLIST_MAX_PORTS];
+
+ /**
+ * @active: is the HW active? We consider the HW as active after
+ * submitting any context for execution and until we have seen the
+ * last context completion event. After that, we do not expect any
+ * more events until we submit, and so can park the HW.
+ *
+ * As we have a small number of different sources from which we feed
+ * the HW, we track the state of each inside a single bitfield.
+ */
+ unsigned int active;
+#define EXECLISTS_ACTIVE_USER 0
+#define EXECLISTS_ACTIVE_PREEMPT 1
+#define EXECLISTS_ACTIVE_HWACK 2
+
+ /**
+ * @port_mask: number of execlist ports - 1
+ */
+ unsigned int port_mask;
+
+ /**
+ * @queue_priority_hint: Highest pending priority.
+ *
+ * When we add requests into the queue, or adjust the priority of
+ * executing requests, we compute the maximum priority of those
+ * pending requests. We can then use this value to determine if
+ * we need to preempt the executing requests to service the queue.
+ * However, since the we may have recorded the priority of an inflight
+ * request we wanted to preempt but since completed, at the time of
+ * dequeuing the priority hint may no longer may match the highest
+ * available request priority.
+ */
+ int queue_priority_hint;
+
+ /**
+ * @queue: queue of requests, in priority lists
+ */
+ struct rb_root_cached queue;
+
+ /**
+ * @csb_write: control register for Context Switch buffer
+ *
+ * Note this register may be either mmio or HWSP shadow.
+ */
+ u32 *csb_write;
+
+ /**
+ * @csb_status: status array for Context Switch buffer
+ *
+ * Note these register may be either mmio or HWSP shadow.
+ */
+ u32 *csb_status;
+
+ /**
+ * @preempt_complete_status: expected CSB upon completing preemption
+ */
+ u32 preempt_complete_status;
+
+ /**
+ * @csb_size: context status buffer FIFO size
+ */
+ u8 csb_size;
+
+ /**
+ * @csb_head: context status buffer head
+ */
+ u8 csb_head;
+
+ I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;)
+};
+
+#define INTEL_ENGINE_CS_MAX_NAME 8
+
+struct intel_engine_cs {
+ struct drm_i915_private *i915;
+ struct intel_uncore *uncore;
+ char name[INTEL_ENGINE_CS_MAX_NAME];
+
+ enum intel_engine_id id;
+ unsigned int hw_id;
+ unsigned int guc_id;
+ intel_engine_mask_t mask;
+
+ u8 uabi_class;
+
+ u8 class;
+ u8 instance;
+ u32 context_size;
+ u32 mmio_base;
+
+ struct intel_sseu sseu;
+
+ struct intel_ring *buffer;
+
+ struct i915_timeline timeline;
+
+ struct intel_context *kernel_context; /* pinned */
+ struct intel_context *preempt_context; /* pinned; optional */
+
+ struct drm_i915_gem_object *default_state;
+ void *pinned_default_state;
+
+ /* Rather than have every client wait upon all user interrupts,
+ * with the herd waking after every interrupt and each doing the
+ * heavyweight seqno dance, we delegate the task (of being the
+ * bottom-half of the user interrupt) to the first client. After
+ * every interrupt, we wake up one client, who does the heavyweight
+ * coherent seqno read and either goes back to sleep (if incomplete),
+ * or wakes up all the completed clients in parallel, before then
+ * transferring the bottom-half status to the next client in the queue.
+ *
+ * Compared to walking the entire list of waiters in a single dedicated
+ * bottom-half, we reduce the latency of the first waiter by avoiding
+ * a context switch, but incur additional coherent seqno reads when
+ * following the chain of request breadcrumbs. Since it is most likely
+ * that we have a single client waiting on each seqno, then reducing
+ * the overhead of waking that client is much preferred.
+ */
+ struct intel_breadcrumbs {
+ spinlock_t irq_lock;
+ struct list_head signalers;
+
+ struct irq_work irq_work; /* for use from inside irq_lock */
+
+ unsigned int irq_enabled;
+
+ bool irq_armed;
+ } breadcrumbs;
+
+ struct intel_engine_pmu {
+ /**
+ * @enable: Bitmask of enable sample events on this engine.
+ *
+ * Bits correspond to sample event types, for instance
+ * I915_SAMPLE_QUEUED is bit 0 etc.
+ */
+ u32 enable;
+ /**
+ * @enable_count: Reference count for the enabled samplers.
+ *
+ * Index number corresponds to @enum drm_i915_pmu_engine_sample.
+ */
+ unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT];
+ /**
+ * @sample: Counter values for sampling events.
+ *
+ * Our internal timer stores the current counters in this field.
+ *
+ * Index number corresponds to @enum drm_i915_pmu_engine_sample.
+ */
+ struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT];
+ } pmu;
+
+ /*
+ * A pool of objects to use as shadow copies of client batch buffers
+ * when the command parser is enabled. Prevents the client from
+ * modifying the batch contents after software parsing.
+ */
+ struct i915_gem_batch_pool batch_pool;
+
+ struct intel_hw_status_page status_page;
+ struct i915_ctx_workarounds wa_ctx;
+ struct i915_wa_list ctx_wa_list;
+ struct i915_wa_list wa_list;
+ struct i915_wa_list whitelist;
+
+ u32 irq_keep_mask; /* always keep these interrupts */
+ u32 irq_enable_mask; /* bitmask to enable ring interrupt */
+ void (*irq_enable)(struct intel_engine_cs *engine);
+ void (*irq_disable)(struct intel_engine_cs *engine);
+
+ int (*init_hw)(struct intel_engine_cs *engine);
+
+ struct {
+ void (*prepare)(struct intel_engine_cs *engine);
+ void (*reset)(struct intel_engine_cs *engine, bool stalled);
+ void (*finish)(struct intel_engine_cs *engine);
+ } reset;
+
+ void (*park)(struct intel_engine_cs *engine);
+ void (*unpark)(struct intel_engine_cs *engine);
+
+ void (*set_default_submission)(struct intel_engine_cs *engine);
+
+ const struct intel_context_ops *cops;
+
+ int (*request_alloc)(struct i915_request *rq);
+ int (*init_context)(struct i915_request *rq);
+
+ int (*emit_flush)(struct i915_request *request, u32 mode);
+#define EMIT_INVALIDATE BIT(0)
+#define EMIT_FLUSH BIT(1)
+#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH)
+ int (*emit_bb_start)(struct i915_request *rq,
+ u64 offset, u32 length,
+ unsigned int dispatch_flags);
+#define I915_DISPATCH_SECURE BIT(0)
+#define I915_DISPATCH_PINNED BIT(1)
+ int (*emit_init_breadcrumb)(struct i915_request *rq);
+ u32 *(*emit_fini_breadcrumb)(struct i915_request *rq,
+ u32 *cs);
+ unsigned int emit_fini_breadcrumb_dw;
+
+ /* Pass the request to the hardware queue (e.g. directly into
+ * the legacy ringbuffer or to the end of an execlist).
+ *
+ * This is called from an atomic context with irqs disabled; must
+ * be irq safe.
+ */
+ void (*submit_request)(struct i915_request *rq);
+
+ /*
+ * Call when the priority on a request has changed and it and its
+ * dependencies may need rescheduling. Note the request itself may
+ * not be ready to run!
+ */
+ void (*schedule)(struct i915_request *request,
+ const struct i915_sched_attr *attr);
+
+ /*
+ * Cancel all requests on the hardware, or queued for execution.
+ * This should only cancel the ready requests that have been
+ * submitted to the engine (via the engine->submit_request callback).
+ * This is called when marking the device as wedged.
+ */
+ void (*cancel_requests)(struct intel_engine_cs *engine);
+
+ void (*cleanup)(struct intel_engine_cs *engine);
+
+ struct intel_engine_execlists execlists;
+
+ /* Contexts are pinned whilst they are active on the GPU. The last
+ * context executed remains active whilst the GPU is idle - the
+ * switch away and write to the context object only occurs on the
+ * next execution. Contexts are only unpinned on retirement of the
+ * following request ensuring that we can always write to the object
+ * on the context switch even after idling. Across suspend, we switch
+ * to the kernel context and trash it as the save may not happen
+ * before the hardware is powered down.
+ */
+ struct intel_context *last_retired_context;
+
+ /* status_notifier: list of callbacks for context-switch changes */
+ struct atomic_notifier_head context_status_notifier;
+
+ struct intel_engine_hangcheck hangcheck;
+
+#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
+#define I915_ENGINE_SUPPORTS_STATS BIT(1)
+#define I915_ENGINE_HAS_PREEMPTION BIT(2)
+#define I915_ENGINE_HAS_SEMAPHORES BIT(3)
+#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
+ unsigned int flags;
+
+ /*
+ * Table of commands the command parser needs to know about
+ * for this engine.
+ */
+ DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER);
+
+ /*
+ * Table of registers allowed in commands that read/write registers.
+ */
+ const struct drm_i915_reg_table *reg_tables;
+ int reg_table_count;
+
+ /*
+ * Returns the bitmask for the length field of the specified command.
+ * Return 0 for an unrecognized/invalid command.
+ *
+ * If the command parser finds an entry for a command in the engine's
+ * cmd_tables, it gets the command's length based on the table entry.
+ * If not, it calls this function to determine the per-engine length
+ * field encoding for the command (i.e. different opcode ranges use
+ * certain bits to encode the command length in the header).
+ */
+ u32 (*get_cmd_length_mask)(u32 cmd_header);
+
+ struct {
+ /**
+ * @lock: Lock protecting the below fields.
+ */
+ seqlock_t lock;
+ /**
+ * @enabled: Reference count indicating number of listeners.
+ */
+ unsigned int enabled;
+ /**
+ * @active: Number of contexts currently scheduled in.
+ */
+ unsigned int active;
+ /**
+ * @enabled_at: Timestamp when busy stats were enabled.
+ */
+ ktime_t enabled_at;
+ /**
+ * @start: Timestamp of the last idle to active transition.
+ *
+ * Idle is defined as active == 0, active is active > 0.
+ */
+ ktime_t start;
+ /**
+ * @total: Total time this engine was busy.
+ *
+ * Accumulated time not counting the most recent block in cases
+ * where engine is currently busy (active > 0).
+ */
+ ktime_t total;
+ } stats;
+};
+
+static inline bool
+intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
+}
+
+static inline bool
+intel_engine_supports_stats(const struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_SUPPORTS_STATS;
+}
+
+static inline bool
+intel_engine_has_preemption(const struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_HAS_PREEMPTION;
+}
+
+static inline bool
+intel_engine_has_semaphores(const struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_HAS_SEMAPHORES;
+}
+
+static inline bool
+intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
+}
+
+#define instdone_slice_mask(dev_priv__) \
+ (IS_GEN(dev_priv__, 7) ? \
+ 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
+
+#define instdone_subslice_mask(dev_priv__) \
+ (IS_GEN(dev_priv__, 7) ? \
+ 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0])
+
+#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
+ for ((slice__) = 0, (subslice__) = 0; \
+ (slice__) < I915_MAX_SLICES; \
+ (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
+ (slice__) += ((subslice__) == 0)) \
+ for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
+ (BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
+
+#endif /* __INTEL_ENGINE_TYPES_H__ */
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright � 2003-2018 Intel Corporation
+ */
+
+#ifndef _INTEL_GPU_COMMANDS_H_
+#define _INTEL_GPU_COMMANDS_H_
+
+/*
+ * Instruction field definitions used by the command parser
+ */
+#define INSTR_CLIENT_SHIFT 29
+#define INSTR_MI_CLIENT 0x0
+#define INSTR_BC_CLIENT 0x2
+#define INSTR_RC_CLIENT 0x3
+#define INSTR_SUBCLIENT_SHIFT 27
+#define INSTR_SUBCLIENT_MASK 0x18000000
+#define INSTR_MEDIA_SUBCLIENT 0x2
+#define INSTR_26_TO_24_MASK 0x7000000
+#define INSTR_26_TO_24_SHIFT 24
+
+/*
+ * Memory interface instructions used by the kernel
+ */
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+/* Many MI commands use bit 22 of the header dword for GGTT vs PPGTT */
+#define MI_GLOBAL_GTT (1<<22)
+
+#define MI_NOOP MI_INSTR(0, 0)
+#define MI_USER_INTERRUPT MI_INSTR(0x02, 0)
+#define MI_WAIT_FOR_EVENT MI_INSTR(0x03, 0)
+#define MI_WAIT_FOR_OVERLAY_FLIP (1<<16)
+#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6)
+#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2)
+#define MI_WAIT_FOR_PLANE_A_SCANLINES (1<<1)
+#define MI_FLUSH MI_INSTR(0x04, 0)
+#define MI_READ_FLUSH (1 << 0)
+#define MI_EXE_FLUSH (1 << 1)
+#define MI_NO_WRITE_FLUSH (1 << 2)
+#define MI_SCENE_COUNT (1 << 3) /* just increment scene count */
+#define MI_END_SCENE (1 << 4) /* flush binner and incr scene count */
+#define MI_INVALIDATE_ISP (1 << 5) /* invalidate indirect state pointers */
+#define MI_REPORT_HEAD MI_INSTR(0x07, 0)
+#define MI_ARB_ON_OFF MI_INSTR(0x08, 0)
+#define MI_ARB_ENABLE (1<<0)
+#define MI_ARB_DISABLE (0<<0)
+#define MI_BATCH_BUFFER_END MI_INSTR(0x0a, 0)
+#define MI_SUSPEND_FLUSH MI_INSTR(0x0b, 0)
+#define MI_SUSPEND_FLUSH_EN (1<<0)
+#define MI_SET_APPID MI_INSTR(0x0e, 0)
+#define MI_OVERLAY_FLIP MI_INSTR(0x11, 0)
+#define MI_OVERLAY_CONTINUE (0x0<<21)
+#define MI_OVERLAY_ON (0x1<<21)
+#define MI_OVERLAY_OFF (0x2<<21)
+#define MI_LOAD_SCAN_LINES_INCL MI_INSTR(0x12, 0)
+#define MI_DISPLAY_FLIP MI_INSTR(0x14, 2)
+#define MI_DISPLAY_FLIP_I915 MI_INSTR(0x14, 1)
+#define MI_DISPLAY_FLIP_PLANE(n) ((n) << 20)
+/* IVB has funny definitions for which plane to flip. */
+#define MI_DISPLAY_FLIP_IVB_PLANE_A (0 << 19)
+#define MI_DISPLAY_FLIP_IVB_PLANE_B (1 << 19)
+#define MI_DISPLAY_FLIP_IVB_SPRITE_A (2 << 19)
+#define MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19)
+#define MI_DISPLAY_FLIP_IVB_PLANE_C (4 << 19)
+#define MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19)
+/* SKL ones */
+#define MI_DISPLAY_FLIP_SKL_PLANE_1_A (0 << 8)
+#define MI_DISPLAY_FLIP_SKL_PLANE_1_B (1 << 8)
+#define MI_DISPLAY_FLIP_SKL_PLANE_1_C (2 << 8)
+#define MI_DISPLAY_FLIP_SKL_PLANE_2_A (4 << 8)
+#define MI_DISPLAY_FLIP_SKL_PLANE_2_B (5 << 8)
+#define MI_DISPLAY_FLIP_SKL_PLANE_2_C (6 << 8)
+#define MI_DISPLAY_FLIP_SKL_PLANE_3_A (7 << 8)
+#define MI_DISPLAY_FLIP_SKL_PLANE_3_B (8 << 8)
+#define MI_DISPLAY_FLIP_SKL_PLANE_3_C (9 << 8)
+#define MI_SEMAPHORE_MBOX MI_INSTR(0x16, 1) /* gen6, gen7 */
+#define MI_SEMAPHORE_GLOBAL_GTT (1<<22)
+#define MI_SEMAPHORE_UPDATE (1<<21)
+#define MI_SEMAPHORE_COMPARE (1<<20)
+#define MI_SEMAPHORE_REGISTER (1<<18)
+#define MI_SEMAPHORE_SYNC_VR (0<<16) /* RCS wait for VCS (RVSYNC) */
+#define MI_SEMAPHORE_SYNC_VER (1<<16) /* RCS wait for VECS (RVESYNC) */
+#define MI_SEMAPHORE_SYNC_BR (2<<16) /* RCS wait for BCS (RBSYNC) */
+#define MI_SEMAPHORE_SYNC_BV (0<<16) /* VCS wait for BCS (VBSYNC) */
+#define MI_SEMAPHORE_SYNC_VEV (1<<16) /* VCS wait for VECS (VVESYNC) */
+#define MI_SEMAPHORE_SYNC_RV (2<<16) /* VCS wait for RCS (VRSYNC) */
+#define MI_SEMAPHORE_SYNC_RB (0<<16) /* BCS wait for RCS (BRSYNC) */
+#define MI_SEMAPHORE_SYNC_VEB (1<<16) /* BCS wait for VECS (BVESYNC) */
+#define MI_SEMAPHORE_SYNC_VB (2<<16) /* BCS wait for VCS (BVSYNC) */
+#define MI_SEMAPHORE_SYNC_BVE (0<<16) /* VECS wait for BCS (VEBSYNC) */
+#define MI_SEMAPHORE_SYNC_VVE (1<<16) /* VECS wait for VCS (VEVSYNC) */
+#define MI_SEMAPHORE_SYNC_RVE (2<<16) /* VECS wait for RCS (VERSYNC) */
+#define MI_SEMAPHORE_SYNC_INVALID (3<<16)
+#define MI_SEMAPHORE_SYNC_MASK (3<<16)
+#define MI_SET_CONTEXT MI_INSTR(0x18, 0)
+#define MI_MM_SPACE_GTT (1<<8)
+#define MI_MM_SPACE_PHYSICAL (0<<8)
+#define MI_SAVE_EXT_STATE_EN (1<<3)
+#define MI_RESTORE_EXT_STATE_EN (1<<2)
+#define MI_FORCE_RESTORE (1<<1)
+#define MI_RESTORE_INHIBIT (1<<0)
+#define HSW_MI_RS_SAVE_STATE_EN (1<<3)
+#define HSW_MI_RS_RESTORE_STATE_EN (1<<2)
+#define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */
+#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15)
+#define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */
+#define MI_SEMAPHORE_POLL (1 << 15)
+#define MI_SEMAPHORE_SAD_GT_SDD (0 << 12)
+#define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12)
+#define MI_SEMAPHORE_SAD_LT_SDD (2 << 12)
+#define MI_SEMAPHORE_SAD_LTE_SDD (3 << 12)
+#define MI_SEMAPHORE_SAD_EQ_SDD (4 << 12)
+#define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12)
+#define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1)
+#define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2)
+#define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */
+#define MI_USE_GGTT (1 << 22) /* g4x+ */
+#define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1)
+/*
+ * Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM:
+ * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw
+ * simply ignores the register load under certain conditions.
+ * - One can actually load arbitrary many arbitrary registers: Simply issue x
+ * address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
+ */
+#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
+#define MI_LRI_FORCE_POSTED (1<<12)
+#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
+#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
+#define MI_SRM_LRM_GLOBAL_GTT (1<<22)
+#define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */
+#define MI_FLUSH_DW_STORE_INDEX (1<<21)
+#define MI_INVALIDATE_TLB (1<<18)
+#define MI_FLUSH_DW_OP_STOREDW (1<<14)
+#define MI_FLUSH_DW_OP_MASK (3<<14)
+#define MI_FLUSH_DW_NOTIFY (1<<8)
+#define MI_INVALIDATE_BSD (1<<7)
+#define MI_FLUSH_DW_USE_GTT (1<<2)
+#define MI_FLUSH_DW_USE_PPGTT (0<<2)
+#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1)
+#define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2)
+#define MI_BATCH_BUFFER MI_INSTR(0x30, 1)
+#define MI_BATCH_NON_SECURE (1)
+/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
+#define MI_BATCH_NON_SECURE_I965 (1<<8)
+#define MI_BATCH_PPGTT_HSW (1<<8)
+#define MI_BATCH_NON_SECURE_HSW (1<<13)
+#define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0)
+#define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */
+#define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1)
+#define MI_BATCH_RESOURCE_STREAMER (1<<10)
+
+/*
+ * 3D instructions used by the kernel
+ */
+#define GFX_INSTR(opcode, flags) ((0x3 << 29) | ((opcode) << 24) | (flags))
+
+#define GEN9_MEDIA_POOL_STATE ((0x3 << 29) | (0x2 << 27) | (0x5 << 16) | 4)
+#define GEN9_MEDIA_POOL_ENABLE (1 << 31)
+#define GFX_OP_RASTER_RULES ((0x3<<29)|(0x7<<24))
+#define GFX_OP_SCISSOR ((0x3<<29)|(0x1c<<24)|(0x10<<19))
+#define SC_UPDATE_SCISSOR (0x1<<1)
+#define SC_ENABLE_MASK (0x1<<0)
+#define SC_ENABLE (0x1<<0)
+#define GFX_OP_LOAD_INDIRECT ((0x3<<29)|(0x1d<<24)|(0x7<<16))
+#define GFX_OP_SCISSOR_INFO ((0x3<<29)|(0x1d<<24)|(0x81<<16)|(0x1))
+#define SCI_YMIN_MASK (0xffff<<16)
+#define SCI_XMIN_MASK (0xffff<<0)
+#define SCI_YMAX_MASK (0xffff<<16)
+#define SCI_XMAX_MASK (0xffff<<0)
+#define GFX_OP_SCISSOR_ENABLE ((0x3<<29)|(0x1c<<24)|(0x10<<19))
+#define GFX_OP_SCISSOR_RECT ((0x3<<29)|(0x1d<<24)|(0x81<<16)|1)
+#define GFX_OP_COLOR_FACTOR ((0x3<<29)|(0x1d<<24)|(0x1<<16)|0x0)
+#define GFX_OP_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16))
+#define GFX_OP_MAP_INFO ((0x3<<29)|(0x1d<<24)|0x4)
+#define GFX_OP_DESTBUFFER_VARS ((0x3<<29)|(0x1d<<24)|(0x85<<16)|0x0)
+#define GFX_OP_DESTBUFFER_INFO ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1)
+#define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3))
+#define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2)
+
+#define COLOR_BLT_CMD (2<<29 | 0x40<<22 | (5-2))
+#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|4)
+#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6)
+#define XY_MONO_SRC_COPY_IMM_BLT ((2<<29)|(0x71<<22)|5)
+#define BLT_WRITE_A (2<<20)
+#define BLT_WRITE_RGB (1<<20)
+#define BLT_WRITE_RGBA (BLT_WRITE_RGB | BLT_WRITE_A)
+#define BLT_DEPTH_8 (0<<24)
+#define BLT_DEPTH_16_565 (1<<24)
+#define BLT_DEPTH_16_1555 (2<<24)
+#define BLT_DEPTH_32 (3<<24)
+#define BLT_ROP_SRC_COPY (0xcc<<16)
+#define BLT_ROP_COLOR_COPY (0xf0<<16)
+#define XY_SRC_COPY_BLT_SRC_TILED (1<<15) /* 965+ only */
+#define XY_SRC_COPY_BLT_DST_TILED (1<<11) /* 965+ only */
+#define CMD_OP_DISPLAYBUFFER_INFO ((0x0<<29)|(0x14<<23)|2)
+#define ASYNC_FLIP (1<<22)
+#define DISPLAY_PLANE_A (0<<20)
+#define DISPLAY_PLANE_B (1<<20)
+#define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
+#define PIPE_CONTROL_FLUSH_L3 (1<<27)
+#define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */
+#define PIPE_CONTROL_MMIO_WRITE (1<<23)
+#define PIPE_CONTROL_STORE_DATA_INDEX (1<<21)
+#define PIPE_CONTROL_CS_STALL (1<<20)
+#define PIPE_CONTROL_TLB_INVALIDATE (1<<18)
+#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16)
+#define PIPE_CONTROL_QW_WRITE (1<<14)
+#define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14)
+#define PIPE_CONTROL_DEPTH_STALL (1<<13)
+#define PIPE_CONTROL_WRITE_FLUSH (1<<12)
+#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */
+#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */
+#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */
+#define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9)
+#define PIPE_CONTROL_NOTIFY (1<<8)
+#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */
+#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5)
+#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1<<4)
+#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1<<3)
+#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1<<2)
+#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1<<1)
+#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0)
+#define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
+
+/*
+ * Commands used only by the command parser
+ */
+#define MI_SET_PREDICATE MI_INSTR(0x01, 0)
+#define MI_ARB_CHECK MI_INSTR(0x05, 0)
+#define MI_RS_CONTROL MI_INSTR(0x06, 0)
+#define MI_URB_ATOMIC_ALLOC MI_INSTR(0x09, 0)
+#define MI_PREDICATE MI_INSTR(0x0C, 0)
+#define MI_RS_CONTEXT MI_INSTR(0x0F, 0)
+#define MI_TOPOLOGY_FILTER MI_INSTR(0x0D, 0)
+#define MI_LOAD_SCAN_LINES_EXCL MI_INSTR(0x13, 0)
+#define MI_URB_CLEAR MI_INSTR(0x19, 0)
+#define MI_UPDATE_GTT MI_INSTR(0x23, 0)
+#define MI_CLFLUSH MI_INSTR(0x27, 0)
+#define MI_REPORT_PERF_COUNT MI_INSTR(0x28, 0)
+#define MI_REPORT_PERF_COUNT_GGTT (1<<0)
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 0)
+#define MI_RS_STORE_DATA_IMM MI_INSTR(0x2B, 0)
+#define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0)
+#define MI_STORE_URB_MEM MI_INSTR(0x2D, 0)
+#define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0)
+
+#define PIPELINE_SELECT ((0x3<<29)|(0x1<<27)|(0x1<<24)|(0x4<<16))
+#define GFX_OP_3DSTATE_VF_STATISTICS ((0x3<<29)|(0x1<<27)|(0x0<<24)|(0xB<<16))
+#define MEDIA_VFE_STATE ((0x3<<29)|(0x2<<27)|(0x0<<24)|(0x0<<16))
+#define MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18)
+#define GPGPU_OBJECT ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x4<<16))
+#define GPGPU_WALKER ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x5<<16))
+#define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \
+ ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x39<<16))
+#define GFX_OP_3DSTATE_DX9_CONSTANTF_PS \
+ ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x3A<<16))
+#define GFX_OP_3DSTATE_SO_DECL_LIST \
+ ((0x3<<29)|(0x3<<27)|(0x1<<24)|(0x17<<16))
+
+#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_VS \
+ ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x43<<16))
+#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_GS \
+ ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x44<<16))
+#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_HS \
+ ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x45<<16))
+#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_DS \
+ ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x46<<16))
+#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS \
+ ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x47<<16))
+
+#define MFX_WAIT ((0x3<<29)|(0x1<<27)|(0x0<<16))
+
+#define COLOR_BLT ((0x2<<29)|(0x40<<22))
+#define SRC_COPY_BLT ((0x2<<29)|(0x43<<22))
+
+#endif /* _INTEL_GPU_COMMANDS_H_ */
--- /dev/null
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "intel_reset.h"
+#include "i915_drv.h"
+
+struct hangcheck {
+ u64 acthd;
+ u32 seqno;
+ enum intel_engine_hangcheck_action action;
+ unsigned long action_timestamp;
+ int deadlock;
+ struct intel_instdone instdone;
+ bool wedged:1;
+ bool stalled:1;
+};
+
+static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
+{
+ u32 tmp = current_instdone | *old_instdone;
+ bool unchanged;
+
+ unchanged = tmp == *old_instdone;
+ *old_instdone |= tmp;
+
+ return unchanged;
+}
+
+static bool subunits_stuck(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+ struct intel_instdone instdone;
+ struct intel_instdone *accu_instdone = &engine->hangcheck.instdone;
+ bool stuck;
+ int slice;
+ int subslice;
+
+ if (engine->id != RCS0)
+ return true;
+
+ intel_engine_get_instdone(engine, &instdone);
+
+ /* There might be unstable subunit states even when
+ * actual head is not moving. Filter out the unstable ones by
+ * accumulating the undone -> done transitions and only
+ * consider those as progress.
+ */
+ stuck = instdone_unchanged(instdone.instdone,
+ &accu_instdone->instdone);
+ stuck &= instdone_unchanged(instdone.slice_common,
+ &accu_instdone->slice_common);
+
+ for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
+ stuck &= instdone_unchanged(instdone.sampler[slice][subslice],
+ &accu_instdone->sampler[slice][subslice]);
+ stuck &= instdone_unchanged(instdone.row[slice][subslice],
+ &accu_instdone->row[slice][subslice]);
+ }
+
+ return stuck;
+}
+
+static enum intel_engine_hangcheck_action
+head_stuck(struct intel_engine_cs *engine, u64 acthd)
+{
+ if (acthd != engine->hangcheck.acthd) {
+
+ /* Clear subunit states on head movement */
+ memset(&engine->hangcheck.instdone, 0,
+ sizeof(engine->hangcheck.instdone));
+
+ return ENGINE_ACTIVE_HEAD;
+ }
+
+ if (!subunits_stuck(engine))
+ return ENGINE_ACTIVE_SUBUNITS;
+
+ return ENGINE_DEAD;
+}
+
+static enum intel_engine_hangcheck_action
+engine_stuck(struct intel_engine_cs *engine, u64 acthd)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+ enum intel_engine_hangcheck_action ha;
+ u32 tmp;
+
+ ha = head_stuck(engine, acthd);
+ if (ha != ENGINE_DEAD)
+ return ha;
+
+ if (IS_GEN(dev_priv, 2))
+ return ENGINE_DEAD;
+
+ /* Is the chip hanging on a WAIT_FOR_EVENT?
+ * If so we can simply poke the RB_WAIT bit
+ * and break the hang. This should work on
+ * all but the second generation chipsets.
+ */
+ tmp = ENGINE_READ(engine, RING_CTL);
+ if (tmp & RING_WAIT) {
+ i915_handle_error(dev_priv, engine->mask, 0,
+ "stuck wait on %s", engine->name);
+ ENGINE_WRITE(engine, RING_CTL, tmp);
+ return ENGINE_WAIT_KICK;
+ }
+
+ return ENGINE_DEAD;
+}
+
+static void hangcheck_load_sample(struct intel_engine_cs *engine,
+ struct hangcheck *hc)
+{
+ hc->acthd = intel_engine_get_active_head(engine);
+ hc->seqno = intel_engine_get_hangcheck_seqno(engine);
+}
+
+static void hangcheck_store_sample(struct intel_engine_cs *engine,
+ const struct hangcheck *hc)
+{
+ engine->hangcheck.acthd = hc->acthd;
+ engine->hangcheck.last_seqno = hc->seqno;
+}
+
+static enum intel_engine_hangcheck_action
+hangcheck_get_action(struct intel_engine_cs *engine,
+ const struct hangcheck *hc)
+{
+ if (engine->hangcheck.last_seqno != hc->seqno)
+ return ENGINE_ACTIVE_SEQNO;
+
+ if (intel_engine_is_idle(engine))
+ return ENGINE_IDLE;
+
+ return engine_stuck(engine, hc->acthd);
+}
+
+static void hangcheck_accumulate_sample(struct intel_engine_cs *engine,
+ struct hangcheck *hc)
+{
+ unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT;
+
+ hc->action = hangcheck_get_action(engine, hc);
+
+ /* We always increment the progress
+ * if the engine is busy and still processing
+ * the same request, so that no single request
+ * can run indefinitely (such as a chain of
+ * batches). The only time we do not increment
+ * the hangcheck score on this ring, if this
+ * engine is in a legitimate wait for another
+ * engine. In that case the waiting engine is a
+ * victim and we want to be sure we catch the
+ * right culprit. Then every time we do kick
+ * the ring, make it as a progress as the seqno
+ * advancement might ensure and if not, it
+ * will catch the hanging engine.
+ */
+
+ switch (hc->action) {
+ case ENGINE_IDLE:
+ case ENGINE_ACTIVE_SEQNO:
+ /* Clear head and subunit states on seqno movement */
+ hc->acthd = 0;
+
+ memset(&engine->hangcheck.instdone, 0,
+ sizeof(engine->hangcheck.instdone));
+
+ /* Intentional fall through */
+ case ENGINE_WAIT_KICK:
+ case ENGINE_WAIT:
+ engine->hangcheck.action_timestamp = jiffies;
+ break;
+
+ case ENGINE_ACTIVE_HEAD:
+ case ENGINE_ACTIVE_SUBUNITS:
+ /*
+ * Seqno stuck with still active engine gets leeway,
+ * in hopes that it is just a long shader.
+ */
+ timeout = I915_SEQNO_DEAD_TIMEOUT;
+ break;
+
+ case ENGINE_DEAD:
+ break;
+
+ default:
+ MISSING_CASE(hc->action);
+ }
+
+ hc->stalled = time_after(jiffies,
+ engine->hangcheck.action_timestamp + timeout);
+ hc->wedged = time_after(jiffies,
+ engine->hangcheck.action_timestamp +
+ I915_ENGINE_WEDGED_TIMEOUT);
+}
+
+static void hangcheck_declare_hang(struct drm_i915_private *i915,
+ unsigned int hung,
+ unsigned int stuck)
+{
+ struct intel_engine_cs *engine;
+ intel_engine_mask_t tmp;
+ char msg[80];
+ int len;
+
+ /* If some rings hung but others were still busy, only
+ * blame the hanging rings in the synopsis.
+ */
+ if (stuck != hung)
+ hung &= ~stuck;
+ len = scnprintf(msg, sizeof(msg),
+ "%s on ", stuck == hung ? "no progress" : "hang");
+ for_each_engine_masked(engine, i915, hung, tmp)
+ len += scnprintf(msg + len, sizeof(msg) - len,
+ "%s, ", engine->name);
+ msg[len-2] = '\0';
+
+ return i915_handle_error(i915, hung, I915_ERROR_CAPTURE, "%s", msg);
+}
+
+/*
+ * This is called when the chip hasn't reported back with completed
+ * batchbuffers in a long time. We keep track per ring seqno progress and
+ * if there are no progress, hangcheck score for that ring is increased.
+ * Further, acthd is inspected to see if the ring is stuck. On stuck case
+ * we kick the ring. If we see no progress on three subsequent calls
+ * we assume chip is wedged and try to fix it by resetting the chip.
+ */
+static void i915_hangcheck_elapsed(struct work_struct *work)
+{
+ struct drm_i915_private *dev_priv =
+ container_of(work, typeof(*dev_priv),
+ gpu_error.hangcheck_work.work);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ unsigned int hung = 0, stuck = 0, wedged = 0;
+
+ if (!i915_modparams.enable_hangcheck)
+ return;
+
+ if (!READ_ONCE(dev_priv->gt.awake))
+ return;
+
+ if (i915_terminally_wedged(dev_priv))
+ return;
+
+ /* As enabling the GPU requires fairly extensive mmio access,
+ * periodically arm the mmio checker to see if we are triggering
+ * any invalid access.
+ */
+ intel_uncore_arm_unclaimed_mmio_detection(&dev_priv->uncore);
+
+ for_each_engine(engine, dev_priv, id) {
+ struct hangcheck hc;
+
+ intel_engine_signal_breadcrumbs(engine);
+
+ hangcheck_load_sample(engine, &hc);
+ hangcheck_accumulate_sample(engine, &hc);
+ hangcheck_store_sample(engine, &hc);
+
+ if (hc.stalled) {
+ hung |= engine->mask;
+ if (hc.action != ENGINE_DEAD)
+ stuck |= engine->mask;
+ }
+
+ if (hc.wedged)
+ wedged |= engine->mask;
+ }
+
+ if (GEM_SHOW_DEBUG() && (hung | stuck)) {
+ struct drm_printer p = drm_debug_printer("hangcheck");
+
+ for_each_engine(engine, dev_priv, id) {
+ if (intel_engine_is_idle(engine))
+ continue;
+
+ intel_engine_dump(engine, &p, "%s\n", engine->name);
+ }
+ }
+
+ if (wedged) {
+ dev_err(dev_priv->drm.dev,
+ "GPU recovery timed out,"
+ " cancelling all in-flight rendering.\n");
+ GEM_TRACE_DUMP();
+ i915_gem_set_wedged(dev_priv);
+ }
+
+ if (hung)
+ hangcheck_declare_hang(dev_priv, hung, stuck);
+
+ /* Reset timer in case GPU hangs without another request being added */
+ i915_queue_hangcheck(dev_priv);
+}
+
+void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
+{
+ memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
+ engine->hangcheck.action_timestamp = jiffies;
+}
+
+void intel_hangcheck_init(struct drm_i915_private *i915)
+{
+ INIT_DELAYED_WORK(&i915->gpu_error.hangcheck_work,
+ i915_hangcheck_elapsed);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_hangcheck.c"
+#endif
--- /dev/null
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Ben Widawsky <ben@bwidawsk.net>
+ * Michel Thierry <michel.thierry@intel.com>
+ * Thomas Daniel <thomas.daniel@intel.com>
+ * Oscar Mateo <oscar.mateo@intel.com>
+ *
+ */
+
+/**
+ * DOC: Logical Rings, Logical Ring Contexts and Execlists
+ *
+ * Motivation:
+ * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
+ * These expanded contexts enable a number of new abilities, especially
+ * "Execlists" (also implemented in this file).
+ *
+ * One of the main differences with the legacy HW contexts is that logical
+ * ring contexts incorporate many more things to the context's state, like
+ * PDPs or ringbuffer control registers:
+ *
+ * The reason why PDPs are included in the context is straightforward: as
+ * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
+ * contained there mean you don't need to do a ppgtt->switch_mm yourself,
+ * instead, the GPU will do it for you on the context switch.
+ *
+ * But, what about the ringbuffer control registers (head, tail, etc..)?
+ * shouldn't we just need a set of those per engine command streamer? This is
+ * where the name "Logical Rings" starts to make sense: by virtualizing the
+ * rings, the engine cs shifts to a new "ring buffer" with every context
+ * switch. When you want to submit a workload to the GPU you: A) choose your
+ * context, B) find its appropriate virtualized ring, C) write commands to it
+ * and then, finally, D) tell the GPU to switch to that context.
+ *
+ * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
+ * to a contexts is via a context execution list, ergo "Execlists".
+ *
+ * LRC implementation:
+ * Regarding the creation of contexts, we have:
+ *
+ * - One global default context.
+ * - One local default context for each opened fd.
+ * - One local extra context for each context create ioctl call.
+ *
+ * Now that ringbuffers belong per-context (and not per-engine, like before)
+ * and that contexts are uniquely tied to a given engine (and not reusable,
+ * like before) we need:
+ *
+ * - One ringbuffer per-engine inside each context.
+ * - One backing object per-engine inside each context.
+ *
+ * The global default context starts its life with these new objects fully
+ * allocated and populated. The local default context for each opened fd is
+ * more complex, because we don't know at creation time which engine is going
+ * to use them. To handle this, we have implemented a deferred creation of LR
+ * contexts:
+ *
+ * The local context starts its life as a hollow or blank holder, that only
+ * gets populated for a given engine once we receive an execbuffer. If later
+ * on we receive another execbuffer ioctl for the same context but a different
+ * engine, we allocate/populate a new ringbuffer and context backing object and
+ * so on.
+ *
+ * Finally, regarding local contexts created using the ioctl call: as they are
+ * only allowed with the render ring, we can allocate & populate them right
+ * away (no need to defer anything, at least for now).
+ *
+ * Execlists implementation:
+ * Execlists are the new method by which, on gen8+ hardware, workloads are
+ * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
+ * This method works as follows:
+ *
+ * When a request is committed, its commands (the BB start and any leading or
+ * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
+ * for the appropriate context. The tail pointer in the hardware context is not
+ * updated at this time, but instead, kept by the driver in the ringbuffer
+ * structure. A structure representing this request is added to a request queue
+ * for the appropriate engine: this structure contains a copy of the context's
+ * tail after the request was written to the ring buffer and a pointer to the
+ * context itself.
+ *
+ * If the engine's request queue was empty before the request was added, the
+ * queue is processed immediately. Otherwise the queue will be processed during
+ * a context switch interrupt. In any case, elements on the queue will get sent
+ * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
+ * globally unique 20-bits submission ID.
+ *
+ * When execution of a request completes, the GPU updates the context status
+ * buffer with a context complete event and generates a context switch interrupt.
+ * During the interrupt handling, the driver examines the events in the buffer:
+ * for each context complete event, if the announced ID matches that on the head
+ * of the request queue, then that request is retired and removed from the queue.
+ *
+ * After processing, if any requests were retired and the queue is not empty
+ * then a new execution list can be submitted. The two requests at the front of
+ * the queue are next to be submitted but since a context may not occur twice in
+ * an execution list, if subsequent requests have the same ID as the first then
+ * the two requests must be combined. This is done simply by discarding requests
+ * at the head of the queue until either only one requests is left (in which case
+ * we use a NULL second context) or the first two requests have unique IDs.
+ *
+ * By always executing the first two requests in the queue the driver ensures
+ * that the GPU is kept as busy as possible. In the case where a single context
+ * completes but a second context is still executing, the request for this second
+ * context will be at the head of the queue when we remove the first one. This
+ * request will then be resubmitted along with a new request for a different context,
+ * which will cause the hardware to continue executing the second request and queue
+ * the new request (the GPU detects the condition of a context getting preempted
+ * with the same context and optimizes the context switch flow by not doing
+ * preemption, but just sampling the new tail pointer).
+ *
+ */
+#include <linux/interrupt.h>
+
+#include "i915_drv.h"
+#include "i915_gem_render_state.h"
+#include "i915_vgpu.h"
+#include "intel_lrc_reg.h"
+#include "intel_mocs.h"
+#include "intel_reset.h"
+#include "intel_workarounds.h"
+
+#define RING_EXECLIST_QFULL (1 << 0x2)
+#define RING_EXECLIST1_VALID (1 << 0x3)
+#define RING_EXECLIST0_VALID (1 << 0x4)
+#define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE)
+#define RING_EXECLIST1_ACTIVE (1 << 0x11)
+#define RING_EXECLIST0_ACTIVE (1 << 0x12)
+
+#define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0)
+#define GEN8_CTX_STATUS_PREEMPTED (1 << 1)
+#define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2)
+#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
+#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
+#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
+
+#define GEN8_CTX_STATUS_COMPLETED_MASK \
+ (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
+
+/* Typical size of the average request (2 pipecontrols and a MI_BB) */
+#define EXECLISTS_REQUEST_SIZE 64 /* bytes */
+#define WA_TAIL_DWORDS 2
+#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
+
+#define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT | I915_PRIORITY_NOSEMAPHORE)
+
+static int execlists_context_deferred_alloc(struct intel_context *ce,
+ struct intel_engine_cs *engine);
+static void execlists_init_reg_state(u32 *reg_state,
+ struct intel_context *ce,
+ struct intel_engine_cs *engine,
+ struct intel_ring *ring);
+
+static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+{
+ return rb_entry(rb, struct i915_priolist, node);
+}
+
+static inline int rq_prio(const struct i915_request *rq)
+{
+ return rq->sched.attr.priority;
+}
+
+static int effective_prio(const struct i915_request *rq)
+{
+ int prio = rq_prio(rq);
+
+ /*
+ * On unwinding the active request, we give it a priority bump
+ * equivalent to a freshly submitted request. This protects it from
+ * being gazumped again, but it would be preferable if we didn't
+ * let it be gazumped in the first place!
+ *
+ * See __unwind_incomplete_requests()
+ */
+ if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(rq)) {
+ /*
+ * After preemption, we insert the active request at the
+ * end of the new priority level. This means that we will be
+ * _lower_ priority than the preemptee all things equal (and
+ * so the preemption is valid), so adjust our comparison
+ * accordingly.
+ */
+ prio |= ACTIVE_PRIORITY;
+ prio--;
+ }
+
+ /* Restrict mere WAIT boosts from triggering preemption */
+ return prio | __NO_PREEMPTION;
+}
+
+static int queue_prio(const struct intel_engine_execlists *execlists)
+{
+ struct i915_priolist *p;
+ struct rb_node *rb;
+
+ rb = rb_first_cached(&execlists->queue);
+ if (!rb)
+ return INT_MIN;
+
+ /*
+ * As the priolist[] are inverted, with the highest priority in [0],
+ * we have to flip the index value to become priority.
+ */
+ p = to_priolist(rb);
+ return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
+}
+
+static inline bool need_preempt(const struct intel_engine_cs *engine,
+ const struct i915_request *rq)
+{
+ int last_prio;
+
+ if (!engine->preempt_context)
+ return false;
+
+ if (i915_request_completed(rq))
+ return false;
+
+ /*
+ * Check if the current priority hint merits a preemption attempt.
+ *
+ * We record the highest value priority we saw during rescheduling
+ * prior to this dequeue, therefore we know that if it is strictly
+ * less than the current tail of ESLP[0], we do not need to force
+ * a preempt-to-idle cycle.
+ *
+ * However, the priority hint is a mere hint that we may need to
+ * preempt. If that hint is stale or we may be trying to preempt
+ * ourselves, ignore the request.
+ */
+ last_prio = effective_prio(rq);
+ if (!__execlists_need_preempt(engine->execlists.queue_priority_hint,
+ last_prio))
+ return false;
+
+ /*
+ * Check against the first request in ELSP[1], it will, thanks to the
+ * power of PI, be the highest priority of that context.
+ */
+ if (!list_is_last(&rq->link, &engine->timeline.requests) &&
+ rq_prio(list_next_entry(rq, link)) > last_prio)
+ return true;
+
+ /*
+ * If the inflight context did not trigger the preemption, then maybe
+ * it was the set of queued requests? Pick the highest priority in
+ * the queue (the first active priolist) and see if it deserves to be
+ * running instead of ELSP[0].
+ *
+ * The highest priority request in the queue can not be either
+ * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
+ * context, it's priority would not exceed ELSP[0] aka last_prio.
+ */
+ return queue_prio(&engine->execlists) > last_prio;
+}
+
+__maybe_unused static inline bool
+assert_priority_queue(const struct i915_request *prev,
+ const struct i915_request *next)
+{
+ const struct intel_engine_execlists *execlists =
+ &prev->engine->execlists;
+
+ /*
+ * Without preemption, the prev may refer to the still active element
+ * which we refuse to let go.
+ *
+ * Even with preemption, there are times when we think it is better not
+ * to preempt and leave an ostensibly lower priority request in flight.
+ */
+ if (port_request(execlists->port) == prev)
+ return true;
+
+ return rq_prio(prev) >= rq_prio(next);
+}
+
+/*
+ * The context descriptor encodes various attributes of a context,
+ * including its GTT address and some flags. Because it's fairly
+ * expensive to calculate, we'll just do it once and cache the result,
+ * which remains valid until the context is unpinned.
+ *
+ * This is what a descriptor looks like, from LSB to MSB::
+ *
+ * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template)
+ * bits 12-31: LRCA, GTT address of (the HWSP of) this context
+ * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC)
+ * bits 53-54: mbz, reserved for use by hardware
+ * bits 55-63: group ID, currently unused and set to 0
+ *
+ * Starting from Gen11, the upper dword of the descriptor has a new format:
+ *
+ * bits 32-36: reserved
+ * bits 37-47: SW context ID
+ * bits 48:53: engine instance
+ * bit 54: mbz, reserved for use by hardware
+ * bits 55-60: SW counter
+ * bits 61-63: engine class
+ *
+ * engine info, SW context ID and SW counter need to form a unique number
+ * (Context ID) per lrc.
+ */
+static u64
+lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
+{
+ struct i915_gem_context *ctx = ce->gem_context;
+ u64 desc;
+
+ BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
+ BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
+
+ desc = ctx->desc_template; /* bits 0-11 */
+ GEM_BUG_ON(desc & GENMASK_ULL(63, 12));
+
+ desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
+ /* bits 12-31 */
+ GEM_BUG_ON(desc & GENMASK_ULL(63, 32));
+
+ /*
+ * The following 32bits are copied into the OA reports (dword 2).
+ * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
+ * anything below.
+ */
+ if (INTEL_GEN(engine->i915) >= 11) {
+ GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
+ desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
+ /* bits 37-47 */
+
+ desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
+ /* bits 48-53 */
+
+ /* TODO: decide what to do with SW counter (bits 55-60) */
+
+ desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
+ /* bits 61-63 */
+ } else {
+ GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH));
+ desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */
+ }
+
+ return desc;
+}
+
+static void unwind_wa_tail(struct i915_request *rq)
+{
+ rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
+ assert_ring_tail_valid(rq->ring, rq->tail);
+}
+
+static struct i915_request *
+__unwind_incomplete_requests(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq, *rn, *active = NULL;
+ struct list_head *uninitialized_var(pl);
+ int prio = I915_PRIORITY_INVALID | ACTIVE_PRIORITY;
+
+ lockdep_assert_held(&engine->timeline.lock);
+
+ list_for_each_entry_safe_reverse(rq, rn,
+ &engine->timeline.requests,
+ link) {
+ if (i915_request_completed(rq))
+ break;
+
+ __i915_request_unsubmit(rq);
+ unwind_wa_tail(rq);
+
+ GEM_BUG_ON(rq->hw_context->active);
+
+ GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
+ if (rq_prio(rq) != prio) {
+ prio = rq_prio(rq);
+ pl = i915_sched_lookup_priolist(engine, prio);
+ }
+ GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
+
+ list_add(&rq->sched.link, pl);
+
+ active = rq;
+ }
+
+ /*
+ * The active request is now effectively the start of a new client
+ * stream, so give it the equivalent small priority bump to prevent
+ * it being gazumped a second time by another peer.
+ *
+ * Note we have to be careful not to apply a priority boost to a request
+ * still spinning on its semaphores. If the request hasn't started, that
+ * means it is still waiting for its dependencies to be signaled, and
+ * if we apply a priority boost to this request, we will boost it past
+ * its signalers and so break PI.
+ *
+ * One consequence of this preemption boost is that we may jump
+ * over lesser priorities (such as I915_PRIORITY_WAIT), effectively
+ * making those priorities non-preemptible. They will be moved forward
+ * in the priority queue, but they will not gain immediate access to
+ * the GPU.
+ */
+ if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(active)) {
+ prio |= ACTIVE_PRIORITY;
+ active->sched.attr.priority = prio;
+ list_move_tail(&active->sched.link,
+ i915_sched_lookup_priolist(engine, prio));
+ }
+
+ return active;
+}
+
+struct i915_request *
+execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
+{
+ struct intel_engine_cs *engine =
+ container_of(execlists, typeof(*engine), execlists);
+
+ return __unwind_incomplete_requests(engine);
+}
+
+static inline void
+execlists_context_status_change(struct i915_request *rq, unsigned long status)
+{
+ /*
+ * Only used when GVT-g is enabled now. When GVT-g is disabled,
+ * The compiler should eliminate this function as dead-code.
+ */
+ if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
+ return;
+
+ atomic_notifier_call_chain(&rq->engine->context_status_notifier,
+ status, rq);
+}
+
+inline void
+execlists_user_begin(struct intel_engine_execlists *execlists,
+ const struct execlist_port *port)
+{
+ execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER);
+}
+
+inline void
+execlists_user_end(struct intel_engine_execlists *execlists)
+{
+ execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
+}
+
+static inline void
+execlists_context_schedule_in(struct i915_request *rq)
+{
+ GEM_BUG_ON(rq->hw_context->active);
+
+ execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+ intel_engine_context_in(rq->engine);
+ rq->hw_context->active = rq->engine;
+}
+
+static inline void
+execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
+{
+ rq->hw_context->active = NULL;
+ intel_engine_context_out(rq->engine);
+ execlists_context_status_change(rq, status);
+ trace_i915_request_out(rq);
+}
+
+static u64 execlists_update_context(struct i915_request *rq)
+{
+ struct intel_context *ce = rq->hw_context;
+
+ ce->lrc_reg_state[CTX_RING_TAIL + 1] =
+ intel_ring_set_tail(rq->ring, rq->tail);
+
+ /*
+ * Make sure the context image is complete before we submit it to HW.
+ *
+ * Ostensibly, writes (including the WCB) should be flushed prior to
+ * an uncached write such as our mmio register access, the empirical
+ * evidence (esp. on Braswell) suggests that the WC write into memory
+ * may not be visible to the HW prior to the completion of the UC
+ * register write and that we may begin execution from the context
+ * before its image is complete leading to invalid PD chasing.
+ *
+ * Furthermore, Braswell, at least, wants a full mb to be sure that
+ * the writes are coherent in memory (visible to the GPU) prior to
+ * execution, and not just visible to other CPUs (as is the result of
+ * wmb).
+ */
+ mb();
+ return ce->lrc_desc;
+}
+
+static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
+{
+ if (execlists->ctrl_reg) {
+ writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
+ writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
+ } else {
+ writel(upper_32_bits(desc), execlists->submit_reg);
+ writel(lower_32_bits(desc), execlists->submit_reg);
+ }
+}
+
+static void execlists_submit_ports(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists *execlists = &engine->execlists;
+ struct execlist_port *port = execlists->port;
+ unsigned int n;
+
+ /*
+ * We can skip acquiring intel_runtime_pm_get() here as it was taken
+ * on our behalf by the request (see i915_gem_mark_busy()) and it will
+ * not be relinquished until the device is idle (see
+ * i915_gem_idle_work_handler()). As a precaution, we make sure
+ * that all ELSP are drained i.e. we have processed the CSB,
+ * before allowing ourselves to idle and calling intel_runtime_pm_put().
+ */
+ GEM_BUG_ON(!engine->i915->gt.awake);
+
+ /*
+ * ELSQ note: the submit queue is not cleared after being submitted
+ * to the HW so we need to make sure we always clean it up. This is
+ * currently ensured by the fact that we always write the same number
+ * of elsq entries, keep this in mind before changing the loop below.
+ */
+ for (n = execlists_num_ports(execlists); n--; ) {
+ struct i915_request *rq;
+ unsigned int count;
+ u64 desc;
+
+ rq = port_unpack(&port[n], &count);
+ if (rq) {
+ GEM_BUG_ON(count > !n);
+ if (!count++)
+ execlists_context_schedule_in(rq);
+ port_set(&port[n], port_pack(rq, count));
+ desc = execlists_update_context(rq);
+ GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
+
+ GEM_TRACE("%s in[%d]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
+ engine->name, n,
+ port[n].context_id, count,
+ rq->fence.context, rq->fence.seqno,
+ hwsp_seqno(rq),
+ rq_prio(rq));
+ } else {
+ GEM_BUG_ON(!n);
+ desc = 0;
+ }
+
+ write_desc(execlists, desc, n);
+ }
+
+ /* we need to manually load the submit queue */
+ if (execlists->ctrl_reg)
+ writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+
+ execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
+}
+
+static bool ctx_single_port_submission(const struct intel_context *ce)
+{
+ return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
+ i915_gem_context_force_single_submission(ce->gem_context));
+}
+
+static bool can_merge_ctx(const struct intel_context *prev,
+ const struct intel_context *next)
+{
+ if (prev != next)
+ return false;
+
+ if (ctx_single_port_submission(prev))
+ return false;
+
+ return true;
+}
+
+static bool can_merge_rq(const struct i915_request *prev,
+ const struct i915_request *next)
+{
+ GEM_BUG_ON(!assert_priority_queue(prev, next));
+
+ if (!can_merge_ctx(prev->hw_context, next->hw_context))
+ return false;
+
+ return true;
+}
+
+static void port_assign(struct execlist_port *port, struct i915_request *rq)
+{
+ GEM_BUG_ON(rq == port_request(port));
+
+ if (port_isset(port))
+ i915_request_put(port_request(port));
+
+ port_set(port, port_pack(i915_request_get(rq), port_count(port)));
+}
+
+static void inject_preempt_context(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists *execlists = &engine->execlists;
+ struct intel_context *ce = engine->preempt_context;
+ unsigned int n;
+
+ GEM_BUG_ON(execlists->preempt_complete_status !=
+ upper_32_bits(ce->lrc_desc));
+
+ /*
+ * Switch to our empty preempt context so
+ * the state of the GPU is known (idle).
+ */
+ GEM_TRACE("%s\n", engine->name);
+ for (n = execlists_num_ports(execlists); --n; )
+ write_desc(execlists, 0, n);
+
+ write_desc(execlists, ce->lrc_desc, n);
+
+ /* we need to manually load the submit queue */
+ if (execlists->ctrl_reg)
+ writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+
+ execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
+ execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
+
+ (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
+}
+
+static void complete_preempt_context(struct intel_engine_execlists *execlists)
+{
+ GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
+
+ if (inject_preempt_hang(execlists))
+ return;
+
+ execlists_cancel_port_requests(execlists);
+ __unwind_incomplete_requests(container_of(execlists,
+ struct intel_engine_cs,
+ execlists));
+}
+
+static void execlists_dequeue(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct execlist_port *port = execlists->port;
+ const struct execlist_port * const last_port =
+ &execlists->port[execlists->port_mask];
+ struct i915_request *last = port_request(port);
+ struct rb_node *rb;
+ bool submit = false;
+
+ /*
+ * Hardware submission is through 2 ports. Conceptually each port
+ * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
+ * static for a context, and unique to each, so we only execute
+ * requests belonging to a single context from each ring. RING_HEAD
+ * is maintained by the CS in the context image, it marks the place
+ * where it got up to last time, and through RING_TAIL we tell the CS
+ * where we want to execute up to this time.
+ *
+ * In this list the requests are in order of execution. Consecutive
+ * requests from the same context are adjacent in the ringbuffer. We
+ * can combine these requests into a single RING_TAIL update:
+ *
+ * RING_HEAD...req1...req2
+ * ^- RING_TAIL
+ * since to execute req2 the CS must first execute req1.
+ *
+ * Our goal then is to point each port to the end of a consecutive
+ * sequence of requests as being the most optimal (fewest wake ups
+ * and context switches) submission.
+ */
+
+ if (last) {
+ /*
+ * Don't resubmit or switch until all outstanding
+ * preemptions (lite-restore) are seen. Then we
+ * know the next preemption status we see corresponds
+ * to this ELSP update.
+ */
+ GEM_BUG_ON(!execlists_is_active(execlists,
+ EXECLISTS_ACTIVE_USER));
+ GEM_BUG_ON(!port_count(&port[0]));
+
+ /*
+ * If we write to ELSP a second time before the HW has had
+ * a chance to respond to the previous write, we can confuse
+ * the HW and hit "undefined behaviour". After writing to ELSP,
+ * we must then wait until we see a context-switch event from
+ * the HW to indicate that it has had a chance to respond.
+ */
+ if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
+ return;
+
+ if (need_preempt(engine, last)) {
+ inject_preempt_context(engine);
+ return;
+ }
+
+ /*
+ * In theory, we could coalesce more requests onto
+ * the second port (the first port is active, with
+ * no preemptions pending). However, that means we
+ * then have to deal with the possible lite-restore
+ * of the second port (as we submit the ELSP, there
+ * may be a context-switch) but also we may complete
+ * the resubmission before the context-switch. Ergo,
+ * coalescing onto the second port will cause a
+ * preemption event, but we cannot predict whether
+ * that will affect port[0] or port[1].
+ *
+ * If the second port is already active, we can wait
+ * until the next context-switch before contemplating
+ * new requests. The GPU will be busy and we should be
+ * able to resubmit the new ELSP before it idles,
+ * avoiding pipeline bubbles (momentary pauses where
+ * the driver is unable to keep up the supply of new
+ * work). However, we have to double check that the
+ * priorities of the ports haven't been switch.
+ */
+ if (port_count(&port[1]))
+ return;
+
+ /*
+ * WaIdleLiteRestore:bdw,skl
+ * Apply the wa NOOPs to prevent
+ * ring:HEAD == rq:TAIL as we resubmit the
+ * request. See gen8_emit_fini_breadcrumb() for
+ * where we prepare the padding after the
+ * end of the request.
+ */
+ last->tail = last->wa_tail;
+ }
+
+ while ((rb = rb_first_cached(&execlists->queue))) {
+ struct i915_priolist *p = to_priolist(rb);
+ struct i915_request *rq, *rn;
+ int i;
+
+ priolist_for_each_request_consume(rq, rn, p, i) {
+ /*
+ * Can we combine this request with the current port?
+ * It has to be the same context/ringbuffer and not
+ * have any exceptions (e.g. GVT saying never to
+ * combine contexts).
+ *
+ * If we can combine the requests, we can execute both
+ * by updating the RING_TAIL to point to the end of the
+ * second request, and so we never need to tell the
+ * hardware about the first.
+ */
+ if (last && !can_merge_rq(last, rq)) {
+ /*
+ * If we are on the second port and cannot
+ * combine this request with the last, then we
+ * are done.
+ */
+ if (port == last_port)
+ goto done;
+
+ /*
+ * We must not populate both ELSP[] with the
+ * same LRCA, i.e. we must submit 2 different
+ * contexts if we submit 2 ELSP.
+ */
+ if (last->hw_context == rq->hw_context)
+ goto done;
+
+ /*
+ * If GVT overrides us we only ever submit
+ * port[0], leaving port[1] empty. Note that we
+ * also have to be careful that we don't queue
+ * the same context (even though a different
+ * request) to the second port.
+ */
+ if (ctx_single_port_submission(last->hw_context) ||
+ ctx_single_port_submission(rq->hw_context))
+ goto done;
+
+
+ if (submit)
+ port_assign(port, last);
+ port++;
+
+ GEM_BUG_ON(port_isset(port));
+ }
+
+ list_del_init(&rq->sched.link);
+
+ __i915_request_submit(rq);
+ trace_i915_request_in(rq, port_index(port, execlists));
+
+ last = rq;
+ submit = true;
+ }
+
+ rb_erase_cached(&p->node, &execlists->queue);
+ i915_priolist_free(p);
+ }
+
+done:
+ /*
+ * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
+ *
+ * We choose the priority hint such that if we add a request of greater
+ * priority than this, we kick the submission tasklet to decide on
+ * the right order of submitting the requests to hardware. We must
+ * also be prepared to reorder requests as they are in-flight on the
+ * HW. We derive the priority hint then as the first "hole" in
+ * the HW submission ports and if there are no available slots,
+ * the priority of the lowest executing request, i.e. last.
+ *
+ * When we do receive a higher priority request ready to run from the
+ * user, see queue_request(), the priority hint is bumped to that
+ * request triggering preemption on the next dequeue (or subsequent
+ * interrupt for secondary ports).
+ */
+ execlists->queue_priority_hint = queue_prio(execlists);
+
+ if (submit) {
+ port_assign(port, last);
+ execlists_submit_ports(engine);
+ }
+
+ /* We must always keep the beast fed if we have work piled up */
+ GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
+ !port_isset(execlists->port));
+
+ /* Re-evaluate the executing context setup after each preemptive kick */
+ if (last)
+ execlists_user_begin(execlists, execlists->port);
+
+ /* If the engine is now idle, so should be the flag; and vice versa. */
+ GEM_BUG_ON(execlists_is_active(&engine->execlists,
+ EXECLISTS_ACTIVE_USER) ==
+ !port_isset(engine->execlists.port));
+}
+
+void
+execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
+{
+ struct execlist_port *port = execlists->port;
+ unsigned int num_ports = execlists_num_ports(execlists);
+
+ while (num_ports-- && port_isset(port)) {
+ struct i915_request *rq = port_request(port);
+
+ GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n",
+ rq->engine->name,
+ (unsigned int)(port - execlists->port),
+ rq->fence.context, rq->fence.seqno,
+ hwsp_seqno(rq));
+
+ GEM_BUG_ON(!execlists->active);
+ execlists_context_schedule_out(rq,
+ i915_request_completed(rq) ?
+ INTEL_CONTEXT_SCHEDULE_OUT :
+ INTEL_CONTEXT_SCHEDULE_PREEMPTED);
+
+ i915_request_put(rq);
+
+ memset(port, 0, sizeof(*port));
+ port++;
+ }
+
+ execlists_clear_all_active(execlists);
+}
+
+static inline void
+invalidate_csb_entries(const u32 *first, const u32 *last)
+{
+ clflush((void *)first);
+ clflush((void *)last);
+}
+
+static inline bool
+reset_in_progress(const struct intel_engine_execlists *execlists)
+{
+ return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
+}
+
+static void process_csb(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct execlist_port *port = execlists->port;
+ const u32 * const buf = execlists->csb_status;
+ const u8 num_entries = execlists->csb_size;
+ u8 head, tail;
+
+ lockdep_assert_held(&engine->timeline.lock);
+
+ /*
+ * Note that csb_write, csb_status may be either in HWSP or mmio.
+ * When reading from the csb_write mmio register, we have to be
+ * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
+ * the low 4bits. As it happens we know the next 4bits are always
+ * zero and so we can simply masked off the low u8 of the register
+ * and treat it identically to reading from the HWSP (without having
+ * to use explicit shifting and masking, and probably bifurcating
+ * the code to handle the legacy mmio read).
+ */
+ head = execlists->csb_head;
+ tail = READ_ONCE(*execlists->csb_write);
+ GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
+ if (unlikely(head == tail))
+ return;
+
+ /*
+ * Hopefully paired with a wmb() in HW!
+ *
+ * We must complete the read of the write pointer before any reads
+ * from the CSB, so that we do not see stale values. Without an rmb
+ * (lfence) the HW may speculatively perform the CSB[] reads *before*
+ * we perform the READ_ONCE(*csb_write).
+ */
+ rmb();
+
+ do {
+ struct i915_request *rq;
+ unsigned int status;
+ unsigned int count;
+
+ if (++head == num_entries)
+ head = 0;
+
+ /*
+ * We are flying near dragons again.
+ *
+ * We hold a reference to the request in execlist_port[]
+ * but no more than that. We are operating in softirq
+ * context and so cannot hold any mutex or sleep. That
+ * prevents us stopping the requests we are processing
+ * in port[] from being retired simultaneously (the
+ * breadcrumb will be complete before we see the
+ * context-switch). As we only hold the reference to the
+ * request, any pointer chasing underneath the request
+ * is subject to a potential use-after-free. Thus we
+ * store all of the bookkeeping within port[] as
+ * required, and avoid using unguarded pointers beneath
+ * request itself. The same applies to the atomic
+ * status notifier.
+ */
+
+ GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
+ engine->name, head,
+ buf[2 * head + 0], buf[2 * head + 1],
+ execlists->active);
+
+ status = buf[2 * head];
+ if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+ GEN8_CTX_STATUS_PREEMPTED))
+ execlists_set_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+ if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+ execlists_clear_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+
+ if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+ continue;
+
+ /* We should never get a COMPLETED | IDLE_ACTIVE! */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+
+ if (status & GEN8_CTX_STATUS_COMPLETE &&
+ buf[2*head + 1] == execlists->preempt_complete_status) {
+ GEM_TRACE("%s preempt-idle\n", engine->name);
+ complete_preempt_context(execlists);
+ continue;
+ }
+
+ if (status & GEN8_CTX_STATUS_PREEMPTED &&
+ execlists_is_active(execlists,
+ EXECLISTS_ACTIVE_PREEMPT))
+ continue;
+
+ GEM_BUG_ON(!execlists_is_active(execlists,
+ EXECLISTS_ACTIVE_USER));
+
+ rq = port_unpack(port, &count);
+ GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
+ engine->name,
+ port->context_id, count,
+ rq ? rq->fence.context : 0,
+ rq ? rq->fence.seqno : 0,
+ rq ? hwsp_seqno(rq) : 0,
+ rq ? rq_prio(rq) : 0);
+
+ /* Check the context/desc id for this event matches */
+ GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+
+ GEM_BUG_ON(count == 0);
+ if (--count == 0) {
+ /*
+ * On the final event corresponding to the
+ * submission of this context, we expect either
+ * an element-switch event or a completion
+ * event (and on completion, the active-idle
+ * marker). No more preemptions, lite-restore
+ * or otherwise.
+ */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
+ GEM_BUG_ON(port_isset(&port[1]) &&
+ !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
+ GEM_BUG_ON(!port_isset(&port[1]) &&
+ !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+
+ /*
+ * We rely on the hardware being strongly
+ * ordered, that the breadcrumb write is
+ * coherent (visible from the CPU) before the
+ * user interrupt and CSB is processed.
+ */
+ GEM_BUG_ON(!i915_request_completed(rq));
+
+ execlists_context_schedule_out(rq,
+ INTEL_CONTEXT_SCHEDULE_OUT);
+ i915_request_put(rq);
+
+ GEM_TRACE("%s completed ctx=%d\n",
+ engine->name, port->context_id);
+
+ port = execlists_port_complete(execlists, port);
+ if (port_isset(port))
+ execlists_user_begin(execlists, port);
+ else
+ execlists_user_end(execlists);
+ } else {
+ port_set(port, port_pack(rq, count));
+ }
+ } while (head != tail);
+
+ execlists->csb_head = head;
+
+ /*
+ * Gen11 has proven to fail wrt global observation point between
+ * entry and tail update, failing on the ordering and thus
+ * we see an old entry in the context status buffer.
+ *
+ * Forcibly evict out entries for the next gpu csb update,
+ * to increase the odds that we get a fresh entries with non
+ * working hardware. The cost for doing so comes out mostly with
+ * the wash as hardware, working or not, will need to do the
+ * invalidation before.
+ */
+ invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
+}
+
+static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
+{
+ lockdep_assert_held(&engine->timeline.lock);
+
+ process_csb(engine);
+ if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
+ execlists_dequeue(engine);
+}
+
+/*
+ * Check the unread Context Status Buffers and manage the submission of new
+ * contexts to the ELSP accordingly.
+ */
+static void execlists_submission_tasklet(unsigned long data)
+{
+ struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
+ unsigned long flags;
+
+ GEM_TRACE("%s awake?=%d, active=%x\n",
+ engine->name,
+ !!engine->i915->gt.awake,
+ engine->execlists.active);
+
+ spin_lock_irqsave(&engine->timeline.lock, flags);
+ __execlists_submission_tasklet(engine);
+ spin_unlock_irqrestore(&engine->timeline.lock, flags);
+}
+
+static void queue_request(struct intel_engine_cs *engine,
+ struct i915_sched_node *node,
+ int prio)
+{
+ list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio));
+}
+
+static void __submit_queue_imm(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+
+ if (reset_in_progress(execlists))
+ return; /* defer until we restart the engine following reset */
+
+ if (execlists->tasklet.func == execlists_submission_tasklet)
+ __execlists_submission_tasklet(engine);
+ else
+ tasklet_hi_schedule(&execlists->tasklet);
+}
+
+static void submit_queue(struct intel_engine_cs *engine, int prio)
+{
+ if (prio > engine->execlists.queue_priority_hint) {
+ engine->execlists.queue_priority_hint = prio;
+ __submit_queue_imm(engine);
+ }
+}
+
+static void execlists_submit_request(struct i915_request *request)
+{
+ struct intel_engine_cs *engine = request->engine;
+ unsigned long flags;
+
+ /* Will be called from irq-context when using foreign fences. */
+ spin_lock_irqsave(&engine->timeline.lock, flags);
+
+ queue_request(engine, &request->sched, rq_prio(request));
+
+ GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
+ GEM_BUG_ON(list_empty(&request->sched.link));
+
+ submit_queue(engine, rq_prio(request));
+
+ spin_unlock_irqrestore(&engine->timeline.lock, flags);
+}
+
+static void __execlists_context_fini(struct intel_context *ce)
+{
+ intel_ring_put(ce->ring);
+
+ GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
+ i915_gem_object_put(ce->state->obj);
+}
+
+static void execlists_context_destroy(struct kref *kref)
+{
+ struct intel_context *ce = container_of(kref, typeof(*ce), ref);
+
+ GEM_BUG_ON(intel_context_is_pinned(ce));
+
+ if (ce->state)
+ __execlists_context_fini(ce);
+
+ intel_context_free(ce);
+}
+
+static int __context_pin(struct i915_vma *vma)
+{
+ unsigned int flags;
+ int err;
+
+ flags = PIN_GLOBAL | PIN_HIGH;
+ flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+
+ err = i915_vma_pin(vma, 0, 0, flags);
+ if (err)
+ return err;
+
+ vma->obj->pin_global++;
+ vma->obj->mm.dirty = true;
+
+ return 0;
+}
+
+static void __context_unpin(struct i915_vma *vma)
+{
+ vma->obj->pin_global--;
+ __i915_vma_unpin(vma);
+}
+
+static void execlists_context_unpin(struct intel_context *ce)
+{
+ struct intel_engine_cs *engine;
+
+ /*
+ * The tasklet may still be using a pointer to our state, via an
+ * old request. However, since we know we only unpin the context
+ * on retirement of the following request, we know that the last
+ * request referencing us will have had a completion CS interrupt.
+ * If we see that it is still active, it means that the tasklet hasn't
+ * had the chance to run yet; let it run before we teardown the
+ * reference it may use.
+ */
+ engine = READ_ONCE(ce->active);
+ if (unlikely(engine)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&engine->timeline.lock, flags);
+ process_csb(engine);
+ spin_unlock_irqrestore(&engine->timeline.lock, flags);
+
+ GEM_BUG_ON(READ_ONCE(ce->active));
+ }
+
+ i915_gem_context_unpin_hw_id(ce->gem_context);
+
+ intel_ring_unpin(ce->ring);
+
+ i915_gem_object_unpin_map(ce->state->obj);
+ __context_unpin(ce->state);
+}
+
+static void
+__execlists_update_reg_state(struct intel_context *ce,
+ struct intel_engine_cs *engine)
+{
+ struct intel_ring *ring = ce->ring;
+ u32 *regs = ce->lrc_reg_state;
+
+ GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
+ GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
+
+ regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma);
+ regs[CTX_RING_HEAD + 1] = ring->head;
+ regs[CTX_RING_TAIL + 1] = ring->tail;
+
+ /* RPCS */
+ if (engine->class == RENDER_CLASS)
+ regs[CTX_R_PWR_CLK_STATE + 1] =
+ intel_sseu_make_rpcs(engine->i915, &ce->sseu);
+}
+
+static int
+__execlists_context_pin(struct intel_context *ce,
+ struct intel_engine_cs *engine)
+{
+ void *vaddr;
+ int ret;
+
+ GEM_BUG_ON(!ce->gem_context->ppgtt);
+
+ ret = execlists_context_deferred_alloc(ce, engine);
+ if (ret)
+ goto err;
+ GEM_BUG_ON(!ce->state);
+
+ ret = __context_pin(ce->state);
+ if (ret)
+ goto err;
+
+ vaddr = i915_gem_object_pin_map(ce->state->obj,
+ i915_coherent_map_type(engine->i915) |
+ I915_MAP_OVERRIDE);
+ if (IS_ERR(vaddr)) {
+ ret = PTR_ERR(vaddr);
+ goto unpin_vma;
+ }
+
+ ret = intel_ring_pin(ce->ring);
+ if (ret)
+ goto unpin_map;
+
+ ret = i915_gem_context_pin_hw_id(ce->gem_context);
+ if (ret)
+ goto unpin_ring;
+
+ ce->lrc_desc = lrc_descriptor(ce, engine);
+ ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
+ __execlists_update_reg_state(ce, engine);
+
+ return 0;
+
+unpin_ring:
+ intel_ring_unpin(ce->ring);
+unpin_map:
+ i915_gem_object_unpin_map(ce->state->obj);
+unpin_vma:
+ __context_unpin(ce->state);
+err:
+ return ret;
+}
+
+static int execlists_context_pin(struct intel_context *ce)
+{
+ return __execlists_context_pin(ce, ce->engine);
+}
+
+static void execlists_context_reset(struct intel_context *ce)
+{
+ /*
+ * Because we emit WA_TAIL_DWORDS there may be a disparity
+ * between our bookkeeping in ce->ring->head and ce->ring->tail and
+ * that stored in context. As we only write new commands from
+ * ce->ring->tail onwards, everything before that is junk. If the GPU
+ * starts reading from its RING_HEAD from the context, it may try to
+ * execute that junk and die.
+ *
+ * The contexts that are stilled pinned on resume belong to the
+ * kernel, and are local to each engine. All other contexts will
+ * have their head/tail sanitized upon pinning before use, so they
+ * will never see garbage,
+ *
+ * So to avoid that we reset the context images upon resume. For
+ * simplicity, we just zero everything out.
+ */
+ intel_ring_reset(ce->ring, 0);
+ __execlists_update_reg_state(ce, ce->engine);
+}
+
+static const struct intel_context_ops execlists_context_ops = {
+ .pin = execlists_context_pin,
+ .unpin = execlists_context_unpin,
+
+ .reset = execlists_context_reset,
+ .destroy = execlists_context_destroy,
+};
+
+static int gen8_emit_init_breadcrumb(struct i915_request *rq)
+{
+ u32 *cs;
+
+ GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb);
+
+ cs = intel_ring_begin(rq, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /*
+ * Check if we have been preempted before we even get started.
+ *
+ * After this point i915_request_started() reports true, even if
+ * we get preempted and so are no longer running.
+ */
+ *cs++ = MI_ARB_CHECK;
+ *cs++ = MI_NOOP;
+
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = rq->timeline->hwsp_offset;
+ *cs++ = 0;
+ *cs++ = rq->fence.seqno - 1;
+
+ intel_ring_advance(rq, cs);
+
+ /* Record the updated position of the request's payload */
+ rq->infix = intel_ring_offset(rq, cs);
+
+ return 0;
+}
+
+static int emit_pdps(struct i915_request *rq)
+{
+ const struct intel_engine_cs * const engine = rq->engine;
+ struct i915_hw_ppgtt * const ppgtt = rq->gem_context->ppgtt;
+ int err, i;
+ u32 *cs;
+
+ GEM_BUG_ON(intel_vgpu_active(rq->i915));
+
+ /*
+ * Beware ye of the dragons, this sequence is magic!
+ *
+ * Small changes to this sequence can cause anything from
+ * GPU hangs to forcewake errors and machine lockups!
+ */
+
+ /* Flush any residual operations from the context load */
+ err = engine->emit_flush(rq, EMIT_FLUSH);
+ if (err)
+ return err;
+
+ /* Magic required to prevent forcewake errors! */
+ err = engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ return err;
+
+ cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /* Ensure the LRI have landed before we invalidate & continue */
+ *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
+ for (i = GEN8_3LVL_PDPES; i--; ) {
+ const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
+ u32 base = engine->mmio_base;
+
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
+ *cs++ = upper_32_bits(pd_daddr);
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
+ *cs++ = lower_32_bits(pd_daddr);
+ }
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+
+ /* Be doubly sure the LRI have landed before proceeding */
+ err = engine->emit_flush(rq, EMIT_FLUSH);
+ if (err)
+ return err;
+
+ /* Re-invalidate the TLB for luck */
+ return engine->emit_flush(rq, EMIT_INVALIDATE);
+}
+
+static int execlists_request_alloc(struct i915_request *request)
+{
+ int ret;
+
+ GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
+
+ /*
+ * Flush enough space to reduce the likelihood of waiting after
+ * we start building the request - in which case we will just
+ * have to repeat work.
+ */
+ request->reserved_space += EXECLISTS_REQUEST_SIZE;
+
+ /*
+ * Note that after this point, we have committed to using
+ * this request as it is being used to both track the
+ * state of engine initialisation and liveness of the
+ * golden renderstate above. Think twice before you try
+ * to cancel/unwind this request now.
+ */
+
+ /* Unconditionally invalidate GPU caches and TLBs. */
+ if (i915_vm_is_4lvl(&request->gem_context->ppgtt->vm))
+ ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
+ else
+ ret = emit_pdps(request);
+ if (ret)
+ return ret;
+
+ request->reserved_space -= EXECLISTS_REQUEST_SIZE;
+ return 0;
+}
+
+/*
+ * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
+ * PIPE_CONTROL instruction. This is required for the flush to happen correctly
+ * but there is a slight complication as this is applied in WA batch where the
+ * values are only initialized once so we cannot take register value at the
+ * beginning and reuse it further; hence we save its value to memory, upload a
+ * constant value with bit21 set and then we restore it back with the saved value.
+ * To simplify the WA, a constant value is formed by using the default value
+ * of this register. This shouldn't be a problem because we are only modifying
+ * it for a short period and this batch in non-premptible. We can ofcourse
+ * use additional instructions that read the actual value of the register
+ * at that time and set our bit of interest but it makes the WA complicated.
+ *
+ * This WA is also required for Gen9 so extracting as a function avoids
+ * code duplication.
+ */
+static u32 *
+gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
+{
+ /* NB no one else is allowed to scribble over scratch + 256! */
+ *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
+ *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+ *batch++ = i915_scratch_offset(engine->i915) + 256;
+ *batch++ = 0;
+
+ *batch++ = MI_LOAD_REGISTER_IMM(1);
+ *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+ *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
+
+ batch = gen8_emit_pipe_control(batch,
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_DC_FLUSH_ENABLE,
+ 0);
+
+ *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
+ *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+ *batch++ = i915_scratch_offset(engine->i915) + 256;
+ *batch++ = 0;
+
+ return batch;
+}
+
+/*
+ * Typically we only have one indirect_ctx and per_ctx batch buffer which are
+ * initialized at the beginning and shared across all contexts but this field
+ * helps us to have multiple batches at different offsets and select them based
+ * on a criteria. At the moment this batch always start at the beginning of the page
+ * and at this point we don't have multiple wa_ctx batch buffers.
+ *
+ * The number of WA applied are not known at the beginning; we use this field
+ * to return the no of DWORDS written.
+ *
+ * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
+ * so it adds NOOPs as padding to make it cacheline aligned.
+ * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
+ * makes a complete batch buffer.
+ */
+static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
+{
+ /* WaDisableCtxRestoreArbitration:bdw,chv */
+ *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+ /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
+ if (IS_BROADWELL(engine->i915))
+ batch = gen8_emit_flush_coherentl3_wa(engine, batch);
+
+ /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
+ /* Actual scratch location is at 128 bytes offset */
+ batch = gen8_emit_pipe_control(batch,
+ PIPE_CONTROL_FLUSH_L3 |
+ PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_QW_WRITE,
+ i915_scratch_offset(engine->i915) +
+ 2 * CACHELINE_BYTES);
+
+ *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+ /* Pad to end of cacheline */
+ while ((unsigned long)batch % CACHELINE_BYTES)
+ *batch++ = MI_NOOP;
+
+ /*
+ * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
+ * execution depends on the length specified in terms of cache lines
+ * in the register CTX_RCS_INDIRECT_CTX
+ */
+
+ return batch;
+}
+
+struct lri {
+ i915_reg_t reg;
+ u32 value;
+};
+
+static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
+{
+ GEM_BUG_ON(!count || count > 63);
+
+ *batch++ = MI_LOAD_REGISTER_IMM(count);
+ do {
+ *batch++ = i915_mmio_reg_offset(lri->reg);
+ *batch++ = lri->value;
+ } while (lri++, --count);
+ *batch++ = MI_NOOP;
+
+ return batch;
+}
+
+static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
+{
+ static const struct lri lri[] = {
+ /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
+ {
+ COMMON_SLICE_CHICKEN2,
+ __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
+ 0),
+ },
+
+ /* BSpec: 11391 */
+ {
+ FF_SLICE_CHICKEN,
+ __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
+ FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
+ },
+
+ /* BSpec: 11299 */
+ {
+ _3D_CHICKEN3,
+ __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
+ _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
+ }
+ };
+
+ *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+ /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
+ batch = gen8_emit_flush_coherentl3_wa(engine, batch);
+
+ batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
+
+ /* WaMediaPoolStateCmdInWABB:bxt,glk */
+ if (HAS_POOLED_EU(engine->i915)) {
+ /*
+ * EU pool configuration is setup along with golden context
+ * during context initialization. This value depends on
+ * device type (2x6 or 3x6) and needs to be updated based
+ * on which subslice is disabled especially for 2x6
+ * devices, however it is safe to load default
+ * configuration of 3x6 device instead of masking off
+ * corresponding bits because HW ignores bits of a disabled
+ * subslice and drops down to appropriate config. Please
+ * see render_state_setup() in i915_gem_render_state.c for
+ * possible configurations, to avoid duplication they are
+ * not shown here again.
+ */
+ *batch++ = GEN9_MEDIA_POOL_STATE;
+ *batch++ = GEN9_MEDIA_POOL_ENABLE;
+ *batch++ = 0x00777000;
+ *batch++ = 0;
+ *batch++ = 0;
+ *batch++ = 0;
+ }
+
+ *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+ /* Pad to end of cacheline */
+ while ((unsigned long)batch % CACHELINE_BYTES)
+ *batch++ = MI_NOOP;
+
+ return batch;
+}
+
+static u32 *
+gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
+{
+ int i;
+
+ /*
+ * WaPipeControlBefore3DStateSamplePattern: cnl
+ *
+ * Ensure the engine is idle prior to programming a
+ * 3DSTATE_SAMPLE_PATTERN during a context restore.
+ */
+ batch = gen8_emit_pipe_control(batch,
+ PIPE_CONTROL_CS_STALL,
+ 0);
+ /*
+ * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
+ * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
+ * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
+ * confusing. Since gen8_emit_pipe_control() already advances the
+ * batch by 6 dwords, we advance the other 10 here, completing a
+ * cacheline. It's not clear if the workaround requires this padding
+ * before other commands, or if it's just the regular padding we would
+ * already have for the workaround bb, so leave it here for now.
+ */
+ for (i = 0; i < 10; i++)
+ *batch++ = MI_NOOP;
+
+ /* Pad to end of cacheline */
+ while ((unsigned long)batch % CACHELINE_BYTES)
+ *batch++ = MI_NOOP;
+
+ return batch;
+}
+
+#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
+
+static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int err;
+
+ obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ if (err)
+ goto err;
+
+ engine->wa_ctx.vma = vma;
+ return 0;
+
+err:
+ i915_gem_object_put(obj);
+ return err;
+}
+
+static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
+{
+ i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
+}
+
+typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
+
+static int intel_init_workaround_bb(struct intel_engine_cs *engine)
+{
+ struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
+ struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
+ &wa_ctx->per_ctx };
+ wa_bb_func_t wa_bb_fn[2];
+ struct page *page;
+ void *batch, *batch_ptr;
+ unsigned int i;
+ int ret;
+
+ if (GEM_DEBUG_WARN_ON(engine->id != RCS0))
+ return -EINVAL;
+
+ switch (INTEL_GEN(engine->i915)) {
+ case 11:
+ return 0;
+ case 10:
+ wa_bb_fn[0] = gen10_init_indirectctx_bb;
+ wa_bb_fn[1] = NULL;
+ break;
+ case 9:
+ wa_bb_fn[0] = gen9_init_indirectctx_bb;
+ wa_bb_fn[1] = NULL;
+ break;
+ case 8:
+ wa_bb_fn[0] = gen8_init_indirectctx_bb;
+ wa_bb_fn[1] = NULL;
+ break;
+ default:
+ MISSING_CASE(INTEL_GEN(engine->i915));
+ return 0;
+ }
+
+ ret = lrc_setup_wa_ctx(engine);
+ if (ret) {
+ DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
+ return ret;
+ }
+
+ page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
+ batch = batch_ptr = kmap_atomic(page);
+
+ /*
+ * Emit the two workaround batch buffers, recording the offset from the
+ * start of the workaround batch buffer object for each and their
+ * respective sizes.
+ */
+ for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
+ wa_bb[i]->offset = batch_ptr - batch;
+ if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
+ CACHELINE_BYTES))) {
+ ret = -EINVAL;
+ break;
+ }
+ if (wa_bb_fn[i])
+ batch_ptr = wa_bb_fn[i](engine, batch_ptr);
+ wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
+ }
+
+ BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
+
+ kunmap_atomic(batch);
+ if (ret)
+ lrc_destroy_wa_ctx(engine);
+
+ return ret;
+}
+
+static void enable_execlists(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+
+ intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
+
+ if (INTEL_GEN(dev_priv) >= 11)
+ I915_WRITE(RING_MODE_GEN7(engine),
+ _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
+ else
+ I915_WRITE(RING_MODE_GEN7(engine),
+ _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
+
+ I915_WRITE(RING_MI_MODE(engine->mmio_base),
+ _MASKED_BIT_DISABLE(STOP_RING));
+
+ I915_WRITE(RING_HWS_PGA(engine->mmio_base),
+ i915_ggtt_offset(engine->status_page.vma));
+ POSTING_READ(RING_HWS_PGA(engine->mmio_base));
+}
+
+static bool unexpected_starting_state(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+ bool unexpected = false;
+
+ if (I915_READ(RING_MI_MODE(engine->mmio_base)) & STOP_RING) {
+ DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
+ unexpected = true;
+ }
+
+ return unexpected;
+}
+
+static int gen8_init_common_ring(struct intel_engine_cs *engine)
+{
+ intel_engine_apply_workarounds(engine);
+ intel_engine_apply_whitelist(engine);
+
+ intel_mocs_init_engine(engine);
+
+ intel_engine_reset_breadcrumbs(engine);
+
+ if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
+ struct drm_printer p = drm_debug_printer(__func__);
+
+ intel_engine_dump(engine, &p, NULL);
+ }
+
+ enable_execlists(engine);
+
+ return 0;
+}
+
+static void execlists_reset_prepare(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ unsigned long flags;
+
+ GEM_TRACE("%s: depth<-%d\n", engine->name,
+ atomic_read(&execlists->tasklet.count));
+
+ /*
+ * Prevent request submission to the hardware until we have
+ * completed the reset in i915_gem_reset_finish(). If a request
+ * is completed by one engine, it may then queue a request
+ * to a second via its execlists->tasklet *just* as we are
+ * calling engine->init_hw() and also writing the ELSP.
+ * Turning off the execlists->tasklet until the reset is over
+ * prevents the race.
+ */
+ __tasklet_disable_sync_once(&execlists->tasklet);
+ GEM_BUG_ON(!reset_in_progress(execlists));
+
+ intel_engine_stop_cs(engine);
+
+ /* And flush any current direct submission. */
+ spin_lock_irqsave(&engine->timeline.lock, flags);
+ spin_unlock_irqrestore(&engine->timeline.lock, flags);
+}
+
+static bool lrc_regs_ok(const struct i915_request *rq)
+{
+ const struct intel_ring *ring = rq->ring;
+ const u32 *regs = rq->hw_context->lrc_reg_state;
+
+ /* Quick spot check for the common signs of context corruption */
+
+ if (regs[CTX_RING_BUFFER_CONTROL + 1] !=
+ (RING_CTL_SIZE(ring->size) | RING_VALID))
+ return false;
+
+ if (regs[CTX_RING_BUFFER_START + 1] != i915_ggtt_offset(ring->vma))
+ return false;
+
+ return true;
+}
+
+static void reset_csb_pointers(struct intel_engine_execlists *execlists)
+{
+ const unsigned int reset_value = execlists->csb_size - 1;
+
+ /*
+ * After a reset, the HW starts writing into CSB entry [0]. We
+ * therefore have to set our HEAD pointer back one entry so that
+ * the *first* entry we check is entry 0. To complicate this further,
+ * as we don't wait for the first interrupt after reset, we have to
+ * fake the HW write to point back to the last entry so that our
+ * inline comparison of our cached head position against the last HW
+ * write works even before the first interrupt.
+ */
+ execlists->csb_head = reset_value;
+ WRITE_ONCE(*execlists->csb_write, reset_value);
+ wmb(); /* Make sure this is visible to HW (paranoia?) */
+
+ invalidate_csb_entries(&execlists->csb_status[0],
+ &execlists->csb_status[reset_value]);
+}
+
+static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct intel_context *ce;
+ struct i915_request *rq;
+ u32 *regs;
+
+ process_csb(engine); /* drain preemption events */
+
+ /* Following the reset, we need to reload the CSB read/write pointers */
+ reset_csb_pointers(&engine->execlists);
+
+ /*
+ * Save the currently executing context, even if we completed
+ * its request, it was still running at the time of the
+ * reset and will have been clobbered.
+ */
+ if (!port_isset(execlists->port))
+ goto out_clear;
+
+ ce = port_request(execlists->port)->hw_context;
+
+ /*
+ * Catch up with any missed context-switch interrupts.
+ *
+ * Ideally we would just read the remaining CSB entries now that we
+ * know the gpu is idle. However, the CSB registers are sometimes^W
+ * often trashed across a GPU reset! Instead we have to rely on
+ * guessing the missed context-switch events by looking at what
+ * requests were completed.
+ */
+ execlists_cancel_port_requests(execlists);
+
+ /* Push back any incomplete requests for replay after the reset. */
+ rq = __unwind_incomplete_requests(engine);
+ if (!rq)
+ goto out_replay;
+
+ if (rq->hw_context != ce) { /* caught just before a CS event */
+ rq = NULL;
+ goto out_replay;
+ }
+
+ /*
+ * If this request hasn't started yet, e.g. it is waiting on a
+ * semaphore, we need to avoid skipping the request or else we
+ * break the signaling chain. However, if the context is corrupt
+ * the request will not restart and we will be stuck with a wedged
+ * device. It is quite often the case that if we issue a reset
+ * while the GPU is loading the context image, that the context
+ * image becomes corrupt.
+ *
+ * Otherwise, if we have not started yet, the request should replay
+ * perfectly and we do not need to flag the result as being erroneous.
+ */
+ if (!i915_request_started(rq) && lrc_regs_ok(rq))
+ goto out_replay;
+
+ /*
+ * If the request was innocent, we leave the request in the ELSP
+ * and will try to replay it on restarting. The context image may
+ * have been corrupted by the reset, in which case we may have
+ * to service a new GPU hang, but more likely we can continue on
+ * without impact.
+ *
+ * If the request was guilty, we presume the context is corrupt
+ * and have to at least restore the RING register in the context
+ * image back to the expected values to skip over the guilty request.
+ */
+ i915_reset_request(rq, stalled);
+ if (!stalled && lrc_regs_ok(rq))
+ goto out_replay;
+
+ /*
+ * We want a simple context + ring to execute the breadcrumb update.
+ * We cannot rely on the context being intact across the GPU hang,
+ * so clear it and rebuild just what we need for the breadcrumb.
+ * All pending requests for this context will be zapped, and any
+ * future request will be after userspace has had the opportunity
+ * to recreate its own state.
+ */
+ regs = ce->lrc_reg_state;
+ if (engine->pinned_default_state) {
+ memcpy(regs, /* skip restoring the vanilla PPHWSP */
+ engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
+ engine->context_size - PAGE_SIZE);
+ }
+ execlists_init_reg_state(regs, ce, engine, ce->ring);
+
+ /* Rerun the request; its payload has been neutered (if guilty). */
+out_replay:
+ ce->ring->head =
+ rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail;
+ intel_ring_update_space(ce->ring);
+ __execlists_update_reg_state(ce, engine);
+
+out_clear:
+ execlists_clear_all_active(execlists);
+}
+
+static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
+{
+ unsigned long flags;
+
+ GEM_TRACE("%s\n", engine->name);
+
+ spin_lock_irqsave(&engine->timeline.lock, flags);
+
+ __execlists_reset(engine, stalled);
+
+ spin_unlock_irqrestore(&engine->timeline.lock, flags);
+}
+
+static void nop_submission_tasklet(unsigned long data)
+{
+ /* The driver is wedged; don't process any more events. */
+}
+
+static void execlists_cancel_requests(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct i915_request *rq, *rn;
+ struct rb_node *rb;
+ unsigned long flags;
+
+ GEM_TRACE("%s\n", engine->name);
+
+ /*
+ * Before we call engine->cancel_requests(), we should have exclusive
+ * access to the submission state. This is arranged for us by the
+ * caller disabling the interrupt generation, the tasklet and other
+ * threads that may then access the same state, giving us a free hand
+ * to reset state. However, we still need to let lockdep be aware that
+ * we know this state may be accessed in hardirq context, so we
+ * disable the irq around this manipulation and we want to keep
+ * the spinlock focused on its duties and not accidentally conflate
+ * coverage to the submission's irq state. (Similarly, although we
+ * shouldn't need to disable irq around the manipulation of the
+ * submission's irq state, we also wish to remind ourselves that
+ * it is irq state.)
+ */
+ spin_lock_irqsave(&engine->timeline.lock, flags);
+
+ __execlists_reset(engine, true);
+
+ /* Mark all executing requests as skipped. */
+ list_for_each_entry(rq, &engine->timeline.requests, link) {
+ if (!i915_request_signaled(rq))
+ dma_fence_set_error(&rq->fence, -EIO);
+
+ i915_request_mark_complete(rq);
+ }
+
+ /* Flush the queued requests to the timeline list (for retiring). */
+ while ((rb = rb_first_cached(&execlists->queue))) {
+ struct i915_priolist *p = to_priolist(rb);
+ int i;
+
+ priolist_for_each_request_consume(rq, rn, p, i) {
+ list_del_init(&rq->sched.link);
+ __i915_request_submit(rq);
+ dma_fence_set_error(&rq->fence, -EIO);
+ i915_request_mark_complete(rq);
+ }
+
+ rb_erase_cached(&p->node, &execlists->queue);
+ i915_priolist_free(p);
+ }
+
+ /* Remaining _unready_ requests will be nop'ed when submitted */
+
+ execlists->queue_priority_hint = INT_MIN;
+ execlists->queue = RB_ROOT_CACHED;
+ GEM_BUG_ON(port_isset(execlists->port));
+
+ GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
+ execlists->tasklet.func = nop_submission_tasklet;
+
+ spin_unlock_irqrestore(&engine->timeline.lock, flags);
+}
+
+static void execlists_reset_finish(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+
+ /*
+ * After a GPU reset, we may have requests to replay. Do so now while
+ * we still have the forcewake to be sure that the GPU is not allowed
+ * to sleep before we restart and reload a context.
+ */
+ GEM_BUG_ON(!reset_in_progress(execlists));
+ if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
+ execlists->tasklet.func(execlists->tasklet.data);
+
+ if (__tasklet_enable(&execlists->tasklet))
+ /* And kick in case we missed a new request submission. */
+ tasklet_hi_schedule(&execlists->tasklet);
+ GEM_TRACE("%s: depth->%d\n", engine->name,
+ atomic_read(&execlists->tasklet.count));
+}
+
+static int gen8_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags)
+{
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /*
+ * WaDisableCtxRestoreArbitration:bdw,chv
+ *
+ * We don't need to perform MI_ARB_ENABLE as often as we do (in
+ * particular all the gen that do not need the w/a at all!), if we
+ * took care to make sure that on every switch into this context
+ * (both ordinary and for preemption) that arbitrartion was enabled
+ * we would be fine. However, for gen8 there is another w/a that
+ * requires us to not preempt inside GPGPU execution, so we keep
+ * arbitration disabled for gen8 batches. Arbitration will be
+ * re-enabled before we close the request
+ * (engine->emit_fini_breadcrumb).
+ */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+ /* FIXME(BDW+): Address space and security selectors. */
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 |
+ (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
+ *cs++ = lower_32_bits(offset);
+ *cs++ = upper_32_bits(offset);
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int gen9_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags)
+{
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 |
+ (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
+ *cs++ = lower_32_bits(offset);
+ *cs++ = upper_32_bits(offset);
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
+{
+ ENGINE_WRITE(engine, RING_IMR,
+ ~(engine->irq_enable_mask | engine->irq_keep_mask));
+ ENGINE_POSTING_READ(engine, RING_IMR);
+}
+
+static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
+{
+ ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
+}
+
+static int gen8_emit_flush(struct i915_request *request, u32 mode)
+{
+ u32 cmd, *cs;
+
+ cs = intel_ring_begin(request, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cmd = MI_FLUSH_DW + 1;
+
+ /* We always require a command barrier so that subsequent
+ * commands, such as breadcrumb interrupts, are strictly ordered
+ * wrt the contents of the write cache being flushed to memory
+ * (and thus being coherent from the CPU).
+ */
+ cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
+ if (mode & EMIT_INVALIDATE) {
+ cmd |= MI_INVALIDATE_TLB;
+ if (request->engine->class == VIDEO_DECODE_CLASS)
+ cmd |= MI_INVALIDATE_BSD;
+ }
+
+ *cs++ = cmd;
+ *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
+ *cs++ = 0; /* upper addr */
+ *cs++ = 0; /* value */
+ intel_ring_advance(request, cs);
+
+ return 0;
+}
+
+static int gen8_emit_flush_render(struct i915_request *request,
+ u32 mode)
+{
+ struct intel_engine_cs *engine = request->engine;
+ u32 scratch_addr =
+ i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES;
+ bool vf_flush_wa = false, dc_flush_wa = false;
+ u32 *cs, flags = 0;
+ int len;
+
+ flags |= PIPE_CONTROL_CS_STALL;
+
+ if (mode & EMIT_FLUSH) {
+ flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+ flags |= PIPE_CONTROL_FLUSH_ENABLE;
+ }
+
+ if (mode & EMIT_INVALIDATE) {
+ flags |= PIPE_CONTROL_TLB_INVALIDATE;
+ flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_QW_WRITE;
+ flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+
+ /*
+ * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
+ * pipe control.
+ */
+ if (IS_GEN(request->i915, 9))
+ vf_flush_wa = true;
+
+ /* WaForGAMHang:kbl */
+ if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
+ dc_flush_wa = true;
+ }
+
+ len = 6;
+
+ if (vf_flush_wa)
+ len += 6;
+
+ if (dc_flush_wa)
+ len += 12;
+
+ cs = intel_ring_begin(request, len);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ if (vf_flush_wa)
+ cs = gen8_emit_pipe_control(cs, 0, 0);
+
+ if (dc_flush_wa)
+ cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
+ 0);
+
+ cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+
+ if (dc_flush_wa)
+ cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
+
+ intel_ring_advance(request, cs);
+
+ return 0;
+}
+
+/*
+ * Reserve space for 2 NOOPs at the end of each request to be
+ * used as a workaround for not being allowed to do lite
+ * restore with HEAD==TAIL (WaIdleLiteRestore).
+ */
+static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
+{
+ /* Ensure there's always at least one preemption point per-request. */
+ *cs++ = MI_ARB_CHECK;
+ *cs++ = MI_NOOP;
+ request->wa_tail = intel_ring_offset(request, cs);
+
+ return cs;
+}
+
+static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+{
+ cs = gen8_emit_ggtt_write(cs,
+ request->fence.seqno,
+ request->timeline->hwsp_offset,
+ 0);
+
+ cs = gen8_emit_ggtt_write(cs,
+ intel_engine_next_hangcheck_seqno(request->engine),
+ I915_GEM_HWS_HANGCHECK_ADDR,
+ MI_FLUSH_DW_STORE_INDEX);
+
+
+ *cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+ request->tail = intel_ring_offset(request, cs);
+ assert_ring_tail_valid(request->ring, request->tail);
+
+ return gen8_emit_wa_tail(request, cs);
+}
+
+static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
+{
+ cs = gen8_emit_ggtt_write_rcs(cs,
+ request->fence.seqno,
+ request->timeline->hwsp_offset,
+ PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_FLUSH_ENABLE |
+ PIPE_CONTROL_CS_STALL);
+
+ cs = gen8_emit_ggtt_write_rcs(cs,
+ intel_engine_next_hangcheck_seqno(request->engine),
+ I915_GEM_HWS_HANGCHECK_ADDR,
+ PIPE_CONTROL_STORE_DATA_INDEX);
+
+ *cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+ request->tail = intel_ring_offset(request, cs);
+ assert_ring_tail_valid(request->ring, request->tail);
+
+ return gen8_emit_wa_tail(request, cs);
+}
+
+static int gen8_init_rcs_context(struct i915_request *rq)
+{
+ int ret;
+
+ ret = intel_engine_emit_ctx_wa(rq);
+ if (ret)
+ return ret;
+
+ ret = intel_rcs_context_init_mocs(rq);
+ /*
+ * Failing to program the MOCS is non-fatal.The system will not
+ * run at peak performance. So generate an error and carry on.
+ */
+ if (ret)
+ DRM_ERROR("MOCS failed to program: expect performance issues.\n");
+
+ return i915_gem_render_state_emit(rq);
+}
+
+/**
+ * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
+ * @engine: Engine Command Streamer.
+ */
+void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv;
+
+ /*
+ * Tasklet cannot be active at this point due intel_mark_active/idle
+ * so this is just for documentation.
+ */
+ if (WARN_ON(test_bit(TASKLET_STATE_SCHED,
+ &engine->execlists.tasklet.state)))
+ tasklet_kill(&engine->execlists.tasklet);
+
+ dev_priv = engine->i915;
+
+ if (engine->buffer) {
+ WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
+ }
+
+ if (engine->cleanup)
+ engine->cleanup(engine);
+
+ intel_engine_cleanup_common(engine);
+
+ lrc_destroy_wa_ctx(engine);
+
+ engine->i915 = NULL;
+ dev_priv->engine[engine->id] = NULL;
+ kfree(engine);
+}
+
+void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
+{
+ engine->submit_request = execlists_submit_request;
+ engine->cancel_requests = execlists_cancel_requests;
+ engine->schedule = i915_schedule;
+ engine->execlists.tasklet.func = execlists_submission_tasklet;
+
+ engine->reset.prepare = execlists_reset_prepare;
+ engine->reset.reset = execlists_reset;
+ engine->reset.finish = execlists_reset_finish;
+
+ engine->park = NULL;
+ engine->unpark = NULL;
+
+ engine->flags |= I915_ENGINE_SUPPORTS_STATS;
+ if (!intel_vgpu_active(engine->i915))
+ engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
+ if (engine->preempt_context &&
+ HAS_LOGICAL_RING_PREEMPTION(engine->i915))
+ engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+}
+
+static void
+logical_ring_default_vfuncs(struct intel_engine_cs *engine)
+{
+ /* Default vfuncs which can be overriden by each engine. */
+ engine->init_hw = gen8_init_common_ring;
+
+ engine->reset.prepare = execlists_reset_prepare;
+ engine->reset.reset = execlists_reset;
+ engine->reset.finish = execlists_reset_finish;
+
+ engine->cops = &execlists_context_ops;
+ engine->request_alloc = execlists_request_alloc;
+
+ engine->emit_flush = gen8_emit_flush;
+ engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
+ engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
+
+ engine->set_default_submission = intel_execlists_set_default_submission;
+
+ if (INTEL_GEN(engine->i915) < 11) {
+ engine->irq_enable = gen8_logical_ring_enable_irq;
+ engine->irq_disable = gen8_logical_ring_disable_irq;
+ } else {
+ /*
+ * TODO: On Gen11 interrupt masks need to be clear
+ * to allow C6 entry. Keep interrupts enabled at
+ * and take the hit of generating extra interrupts
+ * until a more refined solution exists.
+ */
+ }
+ if (IS_GEN(engine->i915, 8))
+ engine->emit_bb_start = gen8_emit_bb_start;
+ else
+ engine->emit_bb_start = gen9_emit_bb_start;
+}
+
+static inline void
+logical_ring_default_irqs(struct intel_engine_cs *engine)
+{
+ unsigned int shift = 0;
+
+ if (INTEL_GEN(engine->i915) < 11) {
+ const u8 irq_shifts[] = {
+ [RCS0] = GEN8_RCS_IRQ_SHIFT,
+ [BCS0] = GEN8_BCS_IRQ_SHIFT,
+ [VCS0] = GEN8_VCS0_IRQ_SHIFT,
+ [VCS1] = GEN8_VCS1_IRQ_SHIFT,
+ [VECS0] = GEN8_VECS_IRQ_SHIFT,
+ };
+
+ shift = irq_shifts[engine->id];
+ }
+
+ engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
+ engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
+}
+
+static int
+logical_ring_setup(struct intel_engine_cs *engine)
+{
+ int err;
+
+ err = intel_engine_setup_common(engine);
+ if (err)
+ return err;
+
+ /* Intentionally left blank. */
+ engine->buffer = NULL;
+
+ tasklet_init(&engine->execlists.tasklet,
+ execlists_submission_tasklet, (unsigned long)engine);
+
+ logical_ring_default_vfuncs(engine);
+ logical_ring_default_irqs(engine);
+
+ return 0;
+}
+
+static int logical_ring_init(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *i915 = engine->i915;
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ u32 base = engine->mmio_base;
+ int ret;
+
+ ret = intel_engine_init_common(engine);
+ if (ret)
+ return ret;
+
+ intel_engine_init_workarounds(engine);
+
+ if (HAS_LOGICAL_RING_ELSQ(i915)) {
+ execlists->submit_reg = i915->uncore.regs +
+ i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
+ execlists->ctrl_reg = i915->uncore.regs +
+ i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
+ } else {
+ execlists->submit_reg = i915->uncore.regs +
+ i915_mmio_reg_offset(RING_ELSP(base));
+ }
+
+ execlists->preempt_complete_status = ~0u;
+ if (engine->preempt_context)
+ execlists->preempt_complete_status =
+ upper_32_bits(engine->preempt_context->lrc_desc);
+
+ execlists->csb_status =
+ &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
+
+ execlists->csb_write =
+ &engine->status_page.addr[intel_hws_csb_write_index(i915)];
+
+ if (INTEL_GEN(engine->i915) < 11)
+ execlists->csb_size = GEN8_CSB_ENTRIES;
+ else
+ execlists->csb_size = GEN11_CSB_ENTRIES;
+
+ reset_csb_pointers(execlists);
+
+ return 0;
+}
+
+int logical_render_ring_init(struct intel_engine_cs *engine)
+{
+ int ret;
+
+ ret = logical_ring_setup(engine);
+ if (ret)
+ return ret;
+
+ /* Override some for render ring. */
+ engine->init_context = gen8_init_rcs_context;
+ engine->emit_flush = gen8_emit_flush_render;
+ engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
+
+ ret = logical_ring_init(engine);
+ if (ret)
+ return ret;
+
+ ret = intel_init_workaround_bb(engine);
+ if (ret) {
+ /*
+ * We continue even if we fail to initialize WA batch
+ * because we only expect rare glitches but nothing
+ * critical to prevent us from using GPU
+ */
+ DRM_ERROR("WA batch buffer initialization failed: %d\n",
+ ret);
+ }
+
+ intel_engine_init_whitelist(engine);
+
+ return 0;
+}
+
+int logical_xcs_ring_init(struct intel_engine_cs *engine)
+{
+ int err;
+
+ err = logical_ring_setup(engine);
+ if (err)
+ return err;
+
+ return logical_ring_init(engine);
+}
+
+static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
+{
+ u32 indirect_ctx_offset;
+
+ switch (INTEL_GEN(engine->i915)) {
+ default:
+ MISSING_CASE(INTEL_GEN(engine->i915));
+ /* fall through */
+ case 11:
+ indirect_ctx_offset =
+ GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ break;
+ case 10:
+ indirect_ctx_offset =
+ GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ break;
+ case 9:
+ indirect_ctx_offset =
+ GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ break;
+ case 8:
+ indirect_ctx_offset =
+ GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ break;
+ }
+
+ return indirect_ctx_offset;
+}
+
+static void execlists_init_reg_state(u32 *regs,
+ struct intel_context *ce,
+ struct intel_engine_cs *engine,
+ struct intel_ring *ring)
+{
+ struct i915_hw_ppgtt *ppgtt = ce->gem_context->ppgtt;
+ bool rcs = engine->class == RENDER_CLASS;
+ u32 base = engine->mmio_base;
+
+ /* A context is actually a big batch buffer with several
+ * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
+ * values we are setting here are only for the first context restore:
+ * on a subsequent save, the GPU will recreate this batchbuffer with new
+ * values (including all the missing MI_LOAD_REGISTER_IMM commands that
+ * we are not initializing here).
+ */
+ regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
+ MI_LRI_FORCE_POSTED;
+
+ CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
+ _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
+ _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
+ if (INTEL_GEN(engine->i915) < 11) {
+ regs[CTX_CONTEXT_CONTROL + 1] |=
+ _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
+ CTX_CTRL_RS_CTX_ENABLE);
+ }
+ CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
+ CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
+ CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
+ CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
+ RING_CTL_SIZE(ring->size) | RING_VALID);
+ CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
+ CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
+ CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
+ CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
+ CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
+ CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
+ if (rcs) {
+ struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
+
+ CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
+ CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
+ RING_INDIRECT_CTX_OFFSET(base), 0);
+ if (wa_ctx->indirect_ctx.size) {
+ u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
+
+ regs[CTX_RCS_INDIRECT_CTX + 1] =
+ (ggtt_offset + wa_ctx->indirect_ctx.offset) |
+ (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
+
+ regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] =
+ intel_lr_indirect_ctx_offset(engine) << 6;
+ }
+
+ CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
+ if (wa_ctx->per_ctx.size) {
+ u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
+
+ regs[CTX_BB_PER_CTX_PTR + 1] =
+ (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
+ }
+ }
+
+ regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
+
+ CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
+ /* PDP values well be assigned later if needed */
+ CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
+ CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
+ CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
+ CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
+ CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
+ CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
+ CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
+ CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
+
+ if (i915_vm_is_4lvl(&ppgtt->vm)) {
+ /* 64b PPGTT (48bit canonical)
+ * PDP0_DESCRIPTOR contains the base address to PML4 and
+ * other PDP Descriptors are ignored.
+ */
+ ASSIGN_CTX_PML4(ppgtt, regs);
+ } else {
+ ASSIGN_CTX_PDP(ppgtt, regs, 3);
+ ASSIGN_CTX_PDP(ppgtt, regs, 2);
+ ASSIGN_CTX_PDP(ppgtt, regs, 1);
+ ASSIGN_CTX_PDP(ppgtt, regs, 0);
+ }
+
+ if (rcs) {
+ regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
+ CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
+
+ i915_oa_init_reg_state(engine, ce, regs);
+ }
+
+ regs[CTX_END] = MI_BATCH_BUFFER_END;
+ if (INTEL_GEN(engine->i915) >= 10)
+ regs[CTX_END] |= BIT(0);
+}
+
+static int
+populate_lr_context(struct intel_context *ce,
+ struct drm_i915_gem_object *ctx_obj,
+ struct intel_engine_cs *engine,
+ struct intel_ring *ring)
+{
+ void *vaddr;
+ u32 *regs;
+ int ret;
+
+ vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
+ if (IS_ERR(vaddr)) {
+ ret = PTR_ERR(vaddr);
+ DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
+ return ret;
+ }
+
+ if (engine->default_state) {
+ /*
+ * We only want to copy over the template context state;
+ * skipping over the headers reserved for GuC communication,
+ * leaving those as zero.
+ */
+ const unsigned long start = LRC_HEADER_PAGES * PAGE_SIZE;
+ void *defaults;
+
+ defaults = i915_gem_object_pin_map(engine->default_state,
+ I915_MAP_WB);
+ if (IS_ERR(defaults)) {
+ ret = PTR_ERR(defaults);
+ goto err_unpin_ctx;
+ }
+
+ memcpy(vaddr + start, defaults + start, engine->context_size);
+ i915_gem_object_unpin_map(engine->default_state);
+ }
+
+ /* The second page of the context object contains some fields which must
+ * be set up prior to the first execution. */
+ regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
+ execlists_init_reg_state(regs, ce, engine, ring);
+ if (!engine->default_state)
+ regs[CTX_CONTEXT_CONTROL + 1] |=
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+ if (ce->gem_context == engine->i915->preempt_context &&
+ INTEL_GEN(engine->i915) < 11)
+ regs[CTX_CONTEXT_CONTROL + 1] |=
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT);
+
+ ret = 0;
+err_unpin_ctx:
+ __i915_gem_object_flush_map(ctx_obj,
+ LRC_HEADER_PAGES * PAGE_SIZE,
+ engine->context_size);
+ i915_gem_object_unpin_map(ctx_obj);
+ return ret;
+}
+
+static struct i915_timeline *get_timeline(struct i915_gem_context *ctx)
+{
+ if (ctx->timeline)
+ return i915_timeline_get(ctx->timeline);
+ else
+ return i915_timeline_create(ctx->i915, NULL);
+}
+
+static int execlists_context_deferred_alloc(struct intel_context *ce,
+ struct intel_engine_cs *engine)
+{
+ struct drm_i915_gem_object *ctx_obj;
+ struct i915_vma *vma;
+ u32 context_size;
+ struct intel_ring *ring;
+ struct i915_timeline *timeline;
+ int ret;
+
+ if (ce->state)
+ return 0;
+
+ context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
+
+ /*
+ * Before the actual start of the context image, we insert a few pages
+ * for our own use and for sharing with the GuC.
+ */
+ context_size += LRC_HEADER_PAGES * PAGE_SIZE;
+
+ ctx_obj = i915_gem_object_create(engine->i915, context_size);
+ if (IS_ERR(ctx_obj))
+ return PTR_ERR(ctx_obj);
+
+ vma = i915_vma_instance(ctx_obj, &engine->i915->ggtt.vm, NULL);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto error_deref_obj;
+ }
+
+ timeline = get_timeline(ce->gem_context);
+ if (IS_ERR(timeline)) {
+ ret = PTR_ERR(timeline);
+ goto error_deref_obj;
+ }
+
+ ring = intel_engine_create_ring(engine,
+ timeline,
+ ce->gem_context->ring_size);
+ i915_timeline_put(timeline);
+ if (IS_ERR(ring)) {
+ ret = PTR_ERR(ring);
+ goto error_deref_obj;
+ }
+
+ ret = populate_lr_context(ce, ctx_obj, engine, ring);
+ if (ret) {
+ DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
+ goto error_ring_free;
+ }
+
+ ce->ring = ring;
+ ce->state = vma;
+
+ return 0;
+
+error_ring_free:
+ intel_ring_put(ring);
+error_deref_obj:
+ i915_gem_object_put(ctx_obj);
+ return ret;
+}
+
+void intel_execlists_show_requests(struct intel_engine_cs *engine,
+ struct drm_printer *m,
+ void (*show_request)(struct drm_printer *m,
+ struct i915_request *rq,
+ const char *prefix),
+ unsigned int max)
+{
+ const struct intel_engine_execlists *execlists = &engine->execlists;
+ struct i915_request *rq, *last;
+ unsigned long flags;
+ unsigned int count;
+ struct rb_node *rb;
+
+ spin_lock_irqsave(&engine->timeline.lock, flags);
+
+ last = NULL;
+ count = 0;
+ list_for_each_entry(rq, &engine->timeline.requests, link) {
+ if (count++ < max - 1)
+ show_request(m, rq, "\t\tE ");
+ else
+ last = rq;
+ }
+ if (last) {
+ if (count > max) {
+ drm_printf(m,
+ "\t\t...skipping %d executing requests...\n",
+ count - max);
+ }
+ show_request(m, last, "\t\tE ");
+ }
+
+ last = NULL;
+ count = 0;
+ if (execlists->queue_priority_hint != INT_MIN)
+ drm_printf(m, "\t\tQueue priority hint: %d\n",
+ execlists->queue_priority_hint);
+ for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
+ struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+ int i;
+
+ priolist_for_each_request(rq, p, i) {
+ if (count++ < max - 1)
+ show_request(m, rq, "\t\tQ ");
+ else
+ last = rq;
+ }
+ }
+ if (last) {
+ if (count > max) {
+ drm_printf(m,
+ "\t\t...skipping %d queued requests...\n",
+ count - max);
+ }
+ show_request(m, last, "\t\tQ ");
+ }
+
+ spin_unlock_irqrestore(&engine->timeline.lock, flags);
+}
+
+void intel_lr_context_reset(struct intel_engine_cs *engine,
+ struct intel_context *ce,
+ u32 head,
+ bool scrub)
+{
+ /*
+ * We want a simple context + ring to execute the breadcrumb update.
+ * We cannot rely on the context being intact across the GPU hang,
+ * so clear it and rebuild just what we need for the breadcrumb.
+ * All pending requests for this context will be zapped, and any
+ * future request will be after userspace has had the opportunity
+ * to recreate its own state.
+ */
+ if (scrub) {
+ u32 *regs = ce->lrc_reg_state;
+
+ if (engine->pinned_default_state) {
+ memcpy(regs, /* skip restoring the vanilla PPHWSP */
+ engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
+ engine->context_size - PAGE_SIZE);
+ }
+ execlists_init_reg_state(regs, ce, engine, ce->ring);
+ }
+
+ /* Rerun the request; its payload has been neutered (if guilty). */
+ ce->ring->head = head;
+ intel_ring_update_space(ce->ring);
+
+ __execlists_update_reg_state(ce, engine);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_lrc.c"
+#endif
--- /dev/null
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _INTEL_LRC_H_
+#define _INTEL_LRC_H_
+
+#include "intel_engine.h"
+
+/* Execlists regs */
+#define RING_ELSP(base) _MMIO((base) + 0x230)
+#define RING_EXECLIST_STATUS_LO(base) _MMIO((base) + 0x234)
+#define RING_EXECLIST_STATUS_HI(base) _MMIO((base) + 0x234 + 4)
+#define RING_CONTEXT_CONTROL(base) _MMIO((base) + 0x244)
+#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH (1 << 3)
+#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0)
+#define CTX_CTRL_RS_CTX_ENABLE (1 << 1)
+#define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2)
+#define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0)
+#define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510)
+#define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550)
+
+#define EL_CTRL_LOAD (1 << 0)
+
+/* The docs specify that the write pointer wraps around after 5h, "After status
+ * is written out to the last available status QW at offset 5h, this pointer
+ * wraps to 0."
+ *
+ * Therefore, one must infer than even though there are 3 bits available, 6 and
+ * 7 appear to be * reserved.
+ */
+#define GEN8_CSB_ENTRIES 6
+#define GEN8_CSB_PTR_MASK 0x7
+#define GEN8_CSB_READ_PTR_MASK (GEN8_CSB_PTR_MASK << 8)
+#define GEN8_CSB_WRITE_PTR_MASK (GEN8_CSB_PTR_MASK << 0)
+
+#define GEN11_CSB_ENTRIES 12
+#define GEN11_CSB_PTR_MASK 0xf
+#define GEN11_CSB_READ_PTR_MASK (GEN11_CSB_PTR_MASK << 8)
+#define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0)
+
+enum {
+ INTEL_CONTEXT_SCHEDULE_IN = 0,
+ INTEL_CONTEXT_SCHEDULE_OUT,
+ INTEL_CONTEXT_SCHEDULE_PREEMPTED,
+};
+
+/* Logical Rings */
+void intel_logical_ring_cleanup(struct intel_engine_cs *engine);
+int logical_render_ring_init(struct intel_engine_cs *engine);
+int logical_xcs_ring_init(struct intel_engine_cs *engine);
+
+/* Logical Ring Contexts */
+
+/*
+ * We allocate a header at the start of the context image for our own
+ * use, therefore the actual location of the logical state is offset
+ * from the start of the VMA. The layout is
+ *
+ * | [guc] | [hwsp] [logical state] |
+ * |<- our header ->|<- context image ->|
+ *
+ */
+/* The first page is used for sharing data with the GuC */
+#define LRC_GUCSHR_PN (0)
+#define LRC_GUCSHR_SZ (1)
+/* At the start of the context image is its per-process HWS page */
+#define LRC_PPHWSP_PN (LRC_GUCSHR_PN + LRC_GUCSHR_SZ)
+#define LRC_PPHWSP_SZ (1)
+/* Finally we have the logical state for the context */
+#define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ)
+
+/*
+ * Currently we include the PPHWSP in __intel_engine_context_size() so
+ * the size of the header is synonymous with the start of the PPHWSP.
+ */
+#define LRC_HEADER_PAGES LRC_PPHWSP_PN
+
+struct drm_printer;
+
+struct drm_i915_private;
+
+void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
+
+void intel_lr_context_reset(struct intel_engine_cs *engine,
+ struct intel_context *ce,
+ u32 head,
+ bool scrub);
+
+void intel_execlists_show_requests(struct intel_engine_cs *engine,
+ struct drm_printer *m,
+ void (*show_request)(struct drm_printer *m,
+ struct i915_request *rq,
+ const char *prefix),
+ unsigned int max);
+
+#endif /* _INTEL_LRC_H_ */
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2018 Intel Corporation
+ */
+
+#ifndef _INTEL_LRC_REG_H_
+#define _INTEL_LRC_REG_H_
+
+#include <linux/types.h>
+
+/* GEN8+ Reg State Context */
+#define CTX_LRI_HEADER_0 0x01
+#define CTX_CONTEXT_CONTROL 0x02
+#define CTX_RING_HEAD 0x04
+#define CTX_RING_TAIL 0x06
+#define CTX_RING_BUFFER_START 0x08
+#define CTX_RING_BUFFER_CONTROL 0x0a
+#define CTX_BB_HEAD_U 0x0c
+#define CTX_BB_HEAD_L 0x0e
+#define CTX_BB_STATE 0x10
+#define CTX_SECOND_BB_HEAD_U 0x12
+#define CTX_SECOND_BB_HEAD_L 0x14
+#define CTX_SECOND_BB_STATE 0x16
+#define CTX_BB_PER_CTX_PTR 0x18
+#define CTX_RCS_INDIRECT_CTX 0x1a
+#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c
+#define CTX_LRI_HEADER_1 0x21
+#define CTX_CTX_TIMESTAMP 0x22
+#define CTX_PDP3_UDW 0x24
+#define CTX_PDP3_LDW 0x26
+#define CTX_PDP2_UDW 0x28
+#define CTX_PDP2_LDW 0x2a
+#define CTX_PDP1_UDW 0x2c
+#define CTX_PDP1_LDW 0x2e
+#define CTX_PDP0_UDW 0x30
+#define CTX_PDP0_LDW 0x32
+#define CTX_LRI_HEADER_2 0x41
+#define CTX_R_PWR_CLK_STATE 0x42
+#define CTX_END 0x44
+
+#define CTX_REG(reg_state, pos, reg, val) do { \
+ u32 *reg_state__ = (reg_state); \
+ const u32 pos__ = (pos); \
+ (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \
+ (reg_state__)[(pos__) + 1] = (val); \
+} while (0)
+
+#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \
+ u32 *reg_state__ = (reg_state); \
+ const u64 addr__ = i915_page_dir_dma_addr((ppgtt), (n)); \
+ (reg_state__)[CTX_PDP ## n ## _UDW + 1] = upper_32_bits(addr__); \
+ (reg_state__)[CTX_PDP ## n ## _LDW + 1] = lower_32_bits(addr__); \
+} while (0)
+
+#define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \
+ u32 *reg_state__ = (reg_state); \
+ const u64 addr__ = px_dma(&ppgtt->pml4); \
+ (reg_state__)[CTX_PDP0_UDW + 1] = upper_32_bits(addr__); \
+ (reg_state__)[CTX_PDP0_LDW + 1] = lower_32_bits(addr__); \
+} while (0)
+
+#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17
+#define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26
+#define GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x19
+#define GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x1A
+
+#endif /* _INTEL_LRC_REG_H_ */
--- /dev/null
+/*
+ * Copyright (c) 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions: *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "i915_drv.h"
+
+#include "intel_engine.h"
+#include "intel_mocs.h"
+#include "intel_lrc.h"
+
+/* structures required */
+struct drm_i915_mocs_entry {
+ u32 control_value;
+ u16 l3cc_value;
+ u16 used;
+};
+
+struct drm_i915_mocs_table {
+ unsigned int size;
+ unsigned int n_entries;
+ const struct drm_i915_mocs_entry *table;
+};
+
+/* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
+#define _LE_CACHEABILITY(value) ((value) << 0)
+#define _LE_TGT_CACHE(value) ((value) << 2)
+#define LE_LRUM(value) ((value) << 4)
+#define LE_AOM(value) ((value) << 6)
+#define LE_RSC(value) ((value) << 7)
+#define LE_SCC(value) ((value) << 8)
+#define LE_PFM(value) ((value) << 11)
+#define LE_SCF(value) ((value) << 14)
+#define LE_COS(value) ((value) << 15)
+#define LE_SSE(value) ((value) << 17)
+
+/* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
+#define L3_ESC(value) ((value) << 0)
+#define L3_SCC(value) ((value) << 1)
+#define _L3_CACHEABILITY(value) ((value) << 4)
+
+/* Helper defines */
+#define GEN9_NUM_MOCS_ENTRIES 62 /* 62 out of 64 - 63 & 64 are reserved. */
+#define GEN11_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */
+
+/* (e)LLC caching options */
+#define LE_0_PAGETABLE _LE_CACHEABILITY(0)
+#define LE_1_UC _LE_CACHEABILITY(1)
+#define LE_2_WT _LE_CACHEABILITY(2)
+#define LE_3_WB _LE_CACHEABILITY(3)
+
+/* Target cache */
+#define LE_TC_0_PAGETABLE _LE_TGT_CACHE(0)
+#define LE_TC_1_LLC _LE_TGT_CACHE(1)
+#define LE_TC_2_LLC_ELLC _LE_TGT_CACHE(2)
+#define LE_TC_3_LLC_ELLC_ALT _LE_TGT_CACHE(3)
+
+/* L3 caching options */
+#define L3_0_DIRECT _L3_CACHEABILITY(0)
+#define L3_1_UC _L3_CACHEABILITY(1)
+#define L3_2_RESERVED _L3_CACHEABILITY(2)
+#define L3_3_WB _L3_CACHEABILITY(3)
+
+#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
+ [__idx] = { \
+ .control_value = __control_value, \
+ .l3cc_value = __l3cc_value, \
+ .used = 1, \
+ }
+
+/*
+ * MOCS tables
+ *
+ * These are the MOCS tables that are programmed across all the rings.
+ * The control value is programmed to all the rings that support the
+ * MOCS registers. While the l3cc_values are only programmed to the
+ * LNCFCMOCS0 - LNCFCMOCS32 registers.
+ *
+ * These tables are intended to be kept reasonably consistent across
+ * HW platforms, and for ICL+, be identical across OSes. To achieve
+ * that, for Icelake and above, list of entries is published as part
+ * of bspec.
+ *
+ * Entries not part of the following tables are undefined as far as
+ * userspace is concerned and shouldn't be relied upon. For the time
+ * being they will be initialized to PTE.
+ *
+ * The last two entries are reserved by the hardware. For ICL+ they
+ * should be initialized according to bspec and never used, for older
+ * platforms they should never be written to.
+ *
+ * NOTE: These tables are part of bspec and defined as part of hardware
+ * interface for ICL+. For older platforms, they are part of kernel
+ * ABI. It is expected that, for specific hardware platform, existing
+ * entries will remain constant and the table will only be updated by
+ * adding new entries, filling unused positions.
+ */
+#define GEN9_MOCS_ENTRIES \
+ MOCS_ENTRY(I915_MOCS_UNCACHED, \
+ LE_1_UC | LE_TC_2_LLC_ELLC, \
+ L3_1_UC), \
+ MOCS_ENTRY(I915_MOCS_PTE, \
+ LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \
+ L3_3_WB)
+
+static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
+ GEN9_MOCS_ENTRIES,
+ MOCS_ENTRY(I915_MOCS_CACHED,
+ LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3),
+ L3_3_WB)
+};
+
+/* NOTE: the LE_TGT_CACHE is not used on Broxton */
+static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
+ GEN9_MOCS_ENTRIES,
+ MOCS_ENTRY(I915_MOCS_CACHED,
+ LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3),
+ L3_3_WB)
+};
+
+#define GEN11_MOCS_ENTRIES \
+ /* Base - Uncached (Deprecated) */ \
+ MOCS_ENTRY(I915_MOCS_UNCACHED, \
+ LE_1_UC | LE_TC_1_LLC, \
+ L3_1_UC), \
+ /* Base - L3 + LeCC:PAT (Deprecated) */ \
+ MOCS_ENTRY(I915_MOCS_PTE, \
+ LE_0_PAGETABLE | LE_TC_1_LLC, \
+ L3_3_WB), \
+ /* Base - L3 + LLC */ \
+ MOCS_ENTRY(2, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+ L3_3_WB), \
+ /* Base - Uncached */ \
+ MOCS_ENTRY(3, \
+ LE_1_UC | LE_TC_1_LLC, \
+ L3_1_UC), \
+ /* Base - L3 */ \
+ MOCS_ENTRY(4, \
+ LE_1_UC | LE_TC_1_LLC, \
+ L3_3_WB), \
+ /* Base - LLC */ \
+ MOCS_ENTRY(5, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+ L3_1_UC), \
+ /* Age 0 - LLC */ \
+ MOCS_ENTRY(6, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
+ L3_1_UC), \
+ /* Age 0 - L3 + LLC */ \
+ MOCS_ENTRY(7, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
+ L3_3_WB), \
+ /* Age: Don't Chg. - LLC */ \
+ MOCS_ENTRY(8, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
+ L3_1_UC), \
+ /* Age: Don't Chg. - L3 + LLC */ \
+ MOCS_ENTRY(9, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
+ L3_3_WB), \
+ /* No AOM - LLC */ \
+ MOCS_ENTRY(10, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
+ L3_1_UC), \
+ /* No AOM - L3 + LLC */ \
+ MOCS_ENTRY(11, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
+ L3_3_WB), \
+ /* No AOM; Age 0 - LLC */ \
+ MOCS_ENTRY(12, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
+ L3_1_UC), \
+ /* No AOM; Age 0 - L3 + LLC */ \
+ MOCS_ENTRY(13, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
+ L3_3_WB), \
+ /* No AOM; Age:DC - LLC */ \
+ MOCS_ENTRY(14, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
+ L3_1_UC), \
+ /* No AOM; Age:DC - L3 + LLC */ \
+ MOCS_ENTRY(15, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
+ L3_3_WB), \
+ /* Self-Snoop - L3 + LLC */ \
+ MOCS_ENTRY(18, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
+ L3_3_WB), \
+ /* Skip Caching - L3 + LLC(12.5%) */ \
+ MOCS_ENTRY(19, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \
+ L3_3_WB), \
+ /* Skip Caching - L3 + LLC(25%) */ \
+ MOCS_ENTRY(20, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \
+ L3_3_WB), \
+ /* Skip Caching - L3 + LLC(50%) */ \
+ MOCS_ENTRY(21, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \
+ L3_3_WB), \
+ /* Skip Caching - L3 + LLC(75%) */ \
+ MOCS_ENTRY(22, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \
+ L3_3_WB), \
+ /* Skip Caching - L3 + LLC(87.5%) */ \
+ MOCS_ENTRY(23, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \
+ L3_3_WB), \
+ /* HW Reserved - SW program but never use */ \
+ MOCS_ENTRY(62, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+ L3_1_UC), \
+ /* HW Reserved - SW program but never use */ \
+ MOCS_ENTRY(63, \
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+ L3_1_UC)
+
+static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
+ GEN11_MOCS_ENTRIES
+};
+
+/**
+ * get_mocs_settings()
+ * @dev_priv: i915 device.
+ * @table: Output table that will be made to point at appropriate
+ * MOCS values for the device.
+ *
+ * This function will return the values of the MOCS table that needs to
+ * be programmed for the platform. It will return the values that need
+ * to be programmed and if they need to be programmed.
+ *
+ * Return: true if there are applicable MOCS settings for the device.
+ */
+static bool get_mocs_settings(struct drm_i915_private *dev_priv,
+ struct drm_i915_mocs_table *table)
+{
+ bool result = false;
+
+ if (INTEL_GEN(dev_priv) >= 11) {
+ table->size = ARRAY_SIZE(icelake_mocs_table);
+ table->table = icelake_mocs_table;
+ table->n_entries = GEN11_NUM_MOCS_ENTRIES;
+ result = true;
+ } else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
+ table->size = ARRAY_SIZE(skylake_mocs_table);
+ table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+ table->table = skylake_mocs_table;
+ result = true;
+ } else if (IS_GEN9_LP(dev_priv)) {
+ table->size = ARRAY_SIZE(broxton_mocs_table);
+ table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+ table->table = broxton_mocs_table;
+ result = true;
+ } else {
+ WARN_ONCE(INTEL_GEN(dev_priv) >= 9,
+ "Platform that should have a MOCS table does not.\n");
+ }
+
+ /* WaDisableSkipCaching:skl,bxt,kbl,glk */
+ if (IS_GEN(dev_priv, 9)) {
+ int i;
+
+ for (i = 0; i < table->size; i++)
+ if (WARN_ON(table->table[i].l3cc_value &
+ (L3_ESC(1) | L3_SCC(0x7))))
+ return false;
+ }
+
+ return result;
+}
+
+static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index)
+{
+ switch (engine_id) {
+ case RCS0:
+ return GEN9_GFX_MOCS(index);
+ case VCS0:
+ return GEN9_MFX0_MOCS(index);
+ case BCS0:
+ return GEN9_BLT_MOCS(index);
+ case VECS0:
+ return GEN9_VEBOX_MOCS(index);
+ case VCS1:
+ return GEN9_MFX1_MOCS(index);
+ case VCS2:
+ return GEN11_MFX2_MOCS(index);
+ default:
+ MISSING_CASE(engine_id);
+ return INVALID_MMIO_REG;
+ }
+}
+
+/*
+ * Get control_value from MOCS entry taking into account when it's not used:
+ * I915_MOCS_PTE's value is returned in this case.
+ */
+static u32 get_entry_control(const struct drm_i915_mocs_table *table,
+ unsigned int index)
+{
+ if (table->table[index].used)
+ return table->table[index].control_value;
+
+ return table->table[I915_MOCS_PTE].control_value;
+}
+
+/**
+ * intel_mocs_init_engine() - emit the mocs control table
+ * @engine: The engine for whom to emit the registers.
+ *
+ * This function simply emits a MI_LOAD_REGISTER_IMM command for the
+ * given table starting at the given address.
+ */
+void intel_mocs_init_engine(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+ struct drm_i915_mocs_table table;
+ unsigned int index;
+ u32 unused_value;
+
+ if (!get_mocs_settings(dev_priv, &table))
+ return;
+
+ /* Set unused values to PTE */
+ unused_value = table.table[I915_MOCS_PTE].control_value;
+
+ for (index = 0; index < table.size; index++) {
+ u32 value = get_entry_control(&table, index);
+
+ I915_WRITE(mocs_register(engine->id, index), value);
+ }
+
+ /* All remaining entries are also unused */
+ for (; index < table.n_entries; index++)
+ I915_WRITE(mocs_register(engine->id, index), unused_value);
+}
+
+/**
+ * emit_mocs_control_table() - emit the mocs control table
+ * @rq: Request to set up the MOCS table for.
+ * @table: The values to program into the control regs.
+ *
+ * This function simply emits a MI_LOAD_REGISTER_IMM command for the
+ * given table starting at the given address.
+ *
+ * Return: 0 on success, otherwise the error status.
+ */
+static int emit_mocs_control_table(struct i915_request *rq,
+ const struct drm_i915_mocs_table *table)
+{
+ enum intel_engine_id engine = rq->engine->id;
+ unsigned int index;
+ u32 unused_value;
+ u32 *cs;
+
+ if (GEM_WARN_ON(table->size > table->n_entries))
+ return -ENODEV;
+
+ /* Set unused values to PTE */
+ unused_value = table->table[I915_MOCS_PTE].control_value;
+
+ cs = intel_ring_begin(rq, 2 + 2 * table->n_entries);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
+
+ for (index = 0; index < table->size; index++) {
+ u32 value = get_entry_control(table, index);
+
+ *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
+ *cs++ = value;
+ }
+
+ /* All remaining entries are also unused */
+ for (; index < table->n_entries; index++) {
+ *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
+ *cs++ = unused_value;
+ }
+
+ *cs++ = MI_NOOP;
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+/*
+ * Get l3cc_value from MOCS entry taking into account when it's not used:
+ * I915_MOCS_PTE's value is returned in this case.
+ */
+static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table,
+ unsigned int index)
+{
+ if (table->table[index].used)
+ return table->table[index].l3cc_value;
+
+ return table->table[I915_MOCS_PTE].l3cc_value;
+}
+
+static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
+ u16 low,
+ u16 high)
+{
+ return low | high << 16;
+}
+
+/**
+ * emit_mocs_l3cc_table() - emit the mocs control table
+ * @rq: Request to set up the MOCS table for.
+ * @table: The values to program into the control regs.
+ *
+ * This function simply emits a MI_LOAD_REGISTER_IMM command for the
+ * given table starting at the given address. This register set is
+ * programmed in pairs.
+ *
+ * Return: 0 on success, otherwise the error status.
+ */
+static int emit_mocs_l3cc_table(struct i915_request *rq,
+ const struct drm_i915_mocs_table *table)
+{
+ u16 unused_value;
+ unsigned int i;
+ u32 *cs;
+
+ if (GEM_WARN_ON(table->size > table->n_entries))
+ return -ENODEV;
+
+ /* Set unused values to PTE */
+ unused_value = table->table[I915_MOCS_PTE].l3cc_value;
+
+ cs = intel_ring_begin(rq, 2 + table->n_entries);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
+
+ for (i = 0; i < table->size / 2; i++) {
+ u16 low = get_entry_l3cc(table, 2 * i);
+ u16 high = get_entry_l3cc(table, 2 * i + 1);
+
+ *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
+ *cs++ = l3cc_combine(table, low, high);
+ }
+
+ /* Odd table size - 1 left over */
+ if (table->size & 0x01) {
+ u16 low = get_entry_l3cc(table, 2 * i);
+
+ *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
+ *cs++ = l3cc_combine(table, low, unused_value);
+ i++;
+ }
+
+ /* All remaining entries are also unused */
+ for (; i < table->n_entries / 2; i++) {
+ *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
+ *cs++ = l3cc_combine(table, unused_value, unused_value);
+ }
+
+ *cs++ = MI_NOOP;
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+/**
+ * intel_mocs_init_l3cc_table() - program the mocs control table
+ * @dev_priv: i915 device private
+ *
+ * This function simply programs the mocs registers for the given table
+ * starting at the given address. This register set is programmed in pairs.
+ *
+ * These registers may get programmed more than once, it is simpler to
+ * re-program 32 registers than maintain the state of when they were programmed.
+ * We are always reprogramming with the same values and this only on context
+ * start.
+ *
+ * Return: Nothing.
+ */
+void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
+{
+ struct drm_i915_mocs_table table;
+ unsigned int i;
+ u16 unused_value;
+
+ if (!get_mocs_settings(dev_priv, &table))
+ return;
+
+ /* Set unused values to PTE */
+ unused_value = table.table[I915_MOCS_PTE].l3cc_value;
+
+ for (i = 0; i < table.size / 2; i++) {
+ u16 low = get_entry_l3cc(&table, 2 * i);
+ u16 high = get_entry_l3cc(&table, 2 * i + 1);
+
+ I915_WRITE(GEN9_LNCFCMOCS(i),
+ l3cc_combine(&table, low, high));
+ }
+
+ /* Odd table size - 1 left over */
+ if (table.size & 0x01) {
+ u16 low = get_entry_l3cc(&table, 2 * i);
+
+ I915_WRITE(GEN9_LNCFCMOCS(i),
+ l3cc_combine(&table, low, unused_value));
+ i++;
+ }
+
+ /* All remaining entries are also unused */
+ for (; i < table.n_entries / 2; i++)
+ I915_WRITE(GEN9_LNCFCMOCS(i),
+ l3cc_combine(&table, unused_value, unused_value));
+}
+
+/**
+ * intel_rcs_context_init_mocs() - program the MOCS register.
+ * @rq: Request to set up the MOCS tables for.
+ *
+ * This function will emit a batch buffer with the values required for
+ * programming the MOCS register values for all the currently supported
+ * rings.
+ *
+ * These registers are partially stored in the RCS context, so they are
+ * emitted at the same time so that when a context is created these registers
+ * are set up. These registers have to be emitted into the start of the
+ * context as setting the ELSP will re-init some of these registers back
+ * to the hw values.
+ *
+ * Return: 0 on success, otherwise the error status.
+ */
+int intel_rcs_context_init_mocs(struct i915_request *rq)
+{
+ struct drm_i915_mocs_table t;
+ int ret;
+
+ if (get_mocs_settings(rq->i915, &t)) {
+ /* Program the RCS control registers */
+ ret = emit_mocs_control_table(rq, &t);
+ if (ret)
+ return ret;
+
+ /* Now program the l3cc registers */
+ ret = emit_mocs_l3cc_table(rq, &t);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (c) 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef INTEL_MOCS_H
+#define INTEL_MOCS_H
+
+/**
+ * DOC: Memory Objects Control State (MOCS)
+ *
+ * Motivation:
+ * In previous Gens the MOCS settings was a value that was set by user land as
+ * part of the batch. In Gen9 this has changed to be a single table (per ring)
+ * that all batches now reference by index instead of programming the MOCS
+ * directly.
+ *
+ * The one wrinkle in this is that only PART of the MOCS tables are included
+ * in context (The GFX_MOCS_0 - GFX_MOCS_64 and the LNCFCMOCS0 - LNCFCMOCS32
+ * registers). The rest are not (the settings for the other rings).
+ *
+ * This table needs to be set at system start-up because the way the table
+ * interacts with the contexts and the GmmLib interface.
+ *
+ *
+ * Implementation:
+ *
+ * The tables (one per supported platform) are defined in intel_mocs.c
+ * and are programmed in the first batch after the context is loaded
+ * (with the hardware workarounds). This will then let the usual
+ * context handling keep the MOCS in step.
+ */
+
+struct drm_i915_private;
+struct i915_request;
+struct intel_engine_cs;
+
+int intel_rcs_context_init_mocs(struct i915_request *rq);
+void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv);
+void intel_mocs_init_engine(struct intel_engine_cs *engine);
+
+#endif
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2008-2018 Intel Corporation
+ */
+
+#include <linux/sched/mm.h>
+#include <linux/stop_machine.h>
+
+#include "i915_drv.h"
+#include "i915_gpu_error.h"
+#include "intel_reset.h"
+
+#include "intel_guc.h"
+
+#define RESET_MAX_RETRIES 3
+
+/* XXX How to handle concurrent GGTT updates using tiling registers? */
+#define RESET_UNDER_STOP_MACHINE 0
+
+static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set)
+{
+ intel_uncore_rmw(uncore, reg, 0, set);
+}
+
+static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
+{
+ intel_uncore_rmw(uncore, reg, clr, 0);
+}
+
+static void rmw_set_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 set)
+{
+ intel_uncore_rmw_fw(uncore, reg, 0, set);
+}
+
+static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
+{
+ intel_uncore_rmw_fw(uncore, reg, clr, 0);
+}
+
+static void engine_skip_context(struct i915_request *rq)
+{
+ struct intel_engine_cs *engine = rq->engine;
+ struct i915_gem_context *hung_ctx = rq->gem_context;
+
+ lockdep_assert_held(&engine->timeline.lock);
+
+ if (!i915_request_is_active(rq))
+ return;
+
+ list_for_each_entry_continue(rq, &engine->timeline.requests, link)
+ if (rq->gem_context == hung_ctx)
+ i915_request_skip(rq, -EIO);
+}
+
+static void client_mark_guilty(struct drm_i915_file_private *file_priv,
+ const struct i915_gem_context *ctx)
+{
+ unsigned int score;
+ unsigned long prev_hang;
+
+ if (i915_gem_context_is_banned(ctx))
+ score = I915_CLIENT_SCORE_CONTEXT_BAN;
+ else
+ score = 0;
+
+ prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
+ if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
+ score += I915_CLIENT_SCORE_HANG_FAST;
+
+ if (score) {
+ atomic_add(score, &file_priv->ban_score);
+
+ DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
+ ctx->name, score,
+ atomic_read(&file_priv->ban_score));
+ }
+}
+
+static bool context_mark_guilty(struct i915_gem_context *ctx)
+{
+ unsigned long prev_hang;
+ bool banned;
+ int i;
+
+ atomic_inc(&ctx->guilty_count);
+
+ /* Cool contexts are too cool to be banned! (Used for reset testing.) */
+ if (!i915_gem_context_is_bannable(ctx))
+ return false;
+
+ /* Record the timestamp for the last N hangs */
+ prev_hang = ctx->hang_timestamp[0];
+ for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp) - 1; i++)
+ ctx->hang_timestamp[i] = ctx->hang_timestamp[i + 1];
+ ctx->hang_timestamp[i] = jiffies;
+
+ /* If we have hung N+1 times in rapid succession, we ban the context! */
+ banned = !i915_gem_context_is_recoverable(ctx);
+ if (time_before(jiffies, prev_hang + CONTEXT_FAST_HANG_JIFFIES))
+ banned = true;
+ if (banned) {
+ DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n",
+ ctx->name, atomic_read(&ctx->guilty_count));
+ i915_gem_context_set_banned(ctx);
+ }
+
+ if (!IS_ERR_OR_NULL(ctx->file_priv))
+ client_mark_guilty(ctx->file_priv, ctx);
+
+ return banned;
+}
+
+static void context_mark_innocent(struct i915_gem_context *ctx)
+{
+ atomic_inc(&ctx->active_count);
+}
+
+void i915_reset_request(struct i915_request *rq, bool guilty)
+{
+ GEM_TRACE("%s rq=%llx:%lld, guilty? %s\n",
+ rq->engine->name,
+ rq->fence.context,
+ rq->fence.seqno,
+ yesno(guilty));
+
+ lockdep_assert_held(&rq->engine->timeline.lock);
+ GEM_BUG_ON(i915_request_completed(rq));
+
+ if (guilty) {
+ i915_request_skip(rq, -EIO);
+ if (context_mark_guilty(rq->gem_context))
+ engine_skip_context(rq);
+ } else {
+ dma_fence_set_error(&rq->fence, -EAGAIN);
+ context_mark_innocent(rq->gem_context);
+ }
+}
+
+static void gen3_stop_engine(struct intel_engine_cs *engine)
+{
+ struct intel_uncore *uncore = engine->uncore;
+ const u32 base = engine->mmio_base;
+
+ GEM_TRACE("%s\n", engine->name);
+
+ if (intel_engine_stop_cs(engine))
+ GEM_TRACE("%s: timed out on STOP_RING\n", engine->name);
+
+ intel_uncore_write_fw(uncore,
+ RING_HEAD(base),
+ intel_uncore_read_fw(uncore, RING_TAIL(base)));
+ intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */
+
+ intel_uncore_write_fw(uncore, RING_HEAD(base), 0);
+ intel_uncore_write_fw(uncore, RING_TAIL(base), 0);
+ intel_uncore_posting_read_fw(uncore, RING_TAIL(base));
+
+ /* The ring must be empty before it is disabled */
+ intel_uncore_write_fw(uncore, RING_CTL(base), 0);
+
+ /* Check acts as a post */
+ if (intel_uncore_read_fw(uncore, RING_HEAD(base)))
+ GEM_TRACE("%s: ring head [%x] not parked\n",
+ engine->name,
+ intel_uncore_read_fw(uncore, RING_HEAD(base)));
+}
+
+static void i915_stop_engines(struct drm_i915_private *i915,
+ intel_engine_mask_t engine_mask)
+{
+ struct intel_engine_cs *engine;
+ intel_engine_mask_t tmp;
+
+ if (INTEL_GEN(i915) < 3)
+ return;
+
+ for_each_engine_masked(engine, i915, engine_mask, tmp)
+ gen3_stop_engine(engine);
+}
+
+static bool i915_in_reset(struct pci_dev *pdev)
+{
+ u8 gdrst;
+
+ pci_read_config_byte(pdev, I915_GDRST, &gdrst);
+ return gdrst & GRDOM_RESET_STATUS;
+}
+
+static int i915_do_reset(struct drm_i915_private *i915,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
+{
+ struct pci_dev *pdev = i915->drm.pdev;
+ int err;
+
+ /* Assert reset for at least 20 usec, and wait for acknowledgement. */
+ pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
+ udelay(50);
+ err = wait_for_atomic(i915_in_reset(pdev), 50);
+
+ /* Clear the reset request. */
+ pci_write_config_byte(pdev, I915_GDRST, 0);
+ udelay(50);
+ if (!err)
+ err = wait_for_atomic(!i915_in_reset(pdev), 50);
+
+ return err;
+}
+
+static bool g4x_reset_complete(struct pci_dev *pdev)
+{
+ u8 gdrst;
+
+ pci_read_config_byte(pdev, I915_GDRST, &gdrst);
+ return (gdrst & GRDOM_RESET_ENABLE) == 0;
+}
+
+static int g33_do_reset(struct drm_i915_private *i915,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
+{
+ struct pci_dev *pdev = i915->drm.pdev;
+
+ pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
+ return wait_for_atomic(g4x_reset_complete(pdev), 50);
+}
+
+static int g4x_do_reset(struct drm_i915_private *i915,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
+{
+ struct pci_dev *pdev = i915->drm.pdev;
+ struct intel_uncore *uncore = &i915->uncore;
+ int ret;
+
+ /* WaVcpClkGateDisableForMediaReset:ctg,elk */
+ rmw_set_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE);
+ intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D);
+
+ pci_write_config_byte(pdev, I915_GDRST,
+ GRDOM_MEDIA | GRDOM_RESET_ENABLE);
+ ret = wait_for_atomic(g4x_reset_complete(pdev), 50);
+ if (ret) {
+ DRM_DEBUG_DRIVER("Wait for media reset failed\n");
+ goto out;
+ }
+
+ pci_write_config_byte(pdev, I915_GDRST,
+ GRDOM_RENDER | GRDOM_RESET_ENABLE);
+ ret = wait_for_atomic(g4x_reset_complete(pdev), 50);
+ if (ret) {
+ DRM_DEBUG_DRIVER("Wait for render reset failed\n");
+ goto out;
+ }
+
+out:
+ pci_write_config_byte(pdev, I915_GDRST, 0);
+
+ rmw_clear_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE);
+ intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D);
+
+ return ret;
+}
+
+static int ironlake_do_reset(struct drm_i915_private *i915,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
+{
+ struct intel_uncore *uncore = &i915->uncore;
+ int ret;
+
+ intel_uncore_write_fw(uncore, ILK_GDSR,
+ ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
+ ret = __intel_wait_for_register_fw(uncore, ILK_GDSR,
+ ILK_GRDOM_RESET_ENABLE, 0,
+ 5000, 0,
+ NULL);
+ if (ret) {
+ DRM_DEBUG_DRIVER("Wait for render reset failed\n");
+ goto out;
+ }
+
+ intel_uncore_write_fw(uncore, ILK_GDSR,
+ ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
+ ret = __intel_wait_for_register_fw(uncore, ILK_GDSR,
+ ILK_GRDOM_RESET_ENABLE, 0,
+ 5000, 0,
+ NULL);
+ if (ret) {
+ DRM_DEBUG_DRIVER("Wait for media reset failed\n");
+ goto out;
+ }
+
+out:
+ intel_uncore_write_fw(uncore, ILK_GDSR, 0);
+ intel_uncore_posting_read_fw(uncore, ILK_GDSR);
+ return ret;
+}
+
+/* Reset the hardware domains (GENX_GRDOM_*) specified by mask */
+static int gen6_hw_domain_reset(struct drm_i915_private *i915,
+ u32 hw_domain_mask)
+{
+ struct intel_uncore *uncore = &i915->uncore;
+ int err;
+
+ /*
+ * GEN6_GDRST is not in the gt power well, no need to check
+ * for fifo space for the write or forcewake the chip for
+ * the read
+ */
+ intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
+
+ /* Wait for the device to ack the reset requests */
+ err = __intel_wait_for_register_fw(uncore,
+ GEN6_GDRST, hw_domain_mask, 0,
+ 500, 0,
+ NULL);
+ if (err)
+ DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n",
+ hw_domain_mask);
+
+ return err;
+}
+
+static int gen6_reset_engines(struct drm_i915_private *i915,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
+{
+ struct intel_engine_cs *engine;
+ const u32 hw_engine_mask[] = {
+ [RCS0] = GEN6_GRDOM_RENDER,
+ [BCS0] = GEN6_GRDOM_BLT,
+ [VCS0] = GEN6_GRDOM_MEDIA,
+ [VCS1] = GEN8_GRDOM_MEDIA2,
+ [VECS0] = GEN6_GRDOM_VECS,
+ };
+ u32 hw_mask;
+
+ if (engine_mask == ALL_ENGINES) {
+ hw_mask = GEN6_GRDOM_FULL;
+ } else {
+ intel_engine_mask_t tmp;
+
+ hw_mask = 0;
+ for_each_engine_masked(engine, i915, engine_mask, tmp) {
+ GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
+ hw_mask |= hw_engine_mask[engine->id];
+ }
+ }
+
+ return gen6_hw_domain_reset(i915, hw_mask);
+}
+
+static u32 gen11_lock_sfc(struct intel_engine_cs *engine)
+{
+ struct intel_uncore *uncore = engine->uncore;
+ u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access;
+ i915_reg_t sfc_forced_lock, sfc_forced_lock_ack;
+ u32 sfc_forced_lock_bit, sfc_forced_lock_ack_bit;
+ i915_reg_t sfc_usage;
+ u32 sfc_usage_bit;
+ u32 sfc_reset_bit;
+
+ switch (engine->class) {
+ case VIDEO_DECODE_CLASS:
+ if ((BIT(engine->instance) & vdbox_sfc_access) == 0)
+ return 0;
+
+ sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine);
+ sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT;
+
+ sfc_forced_lock_ack = GEN11_VCS_SFC_LOCK_STATUS(engine);
+ sfc_forced_lock_ack_bit = GEN11_VCS_SFC_LOCK_ACK_BIT;
+
+ sfc_usage = GEN11_VCS_SFC_LOCK_STATUS(engine);
+ sfc_usage_bit = GEN11_VCS_SFC_USAGE_BIT;
+ sfc_reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance);
+ break;
+
+ case VIDEO_ENHANCEMENT_CLASS:
+ sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine);
+ sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT;
+
+ sfc_forced_lock_ack = GEN11_VECS_SFC_LOCK_ACK(engine);
+ sfc_forced_lock_ack_bit = GEN11_VECS_SFC_LOCK_ACK_BIT;
+
+ sfc_usage = GEN11_VECS_SFC_USAGE(engine);
+ sfc_usage_bit = GEN11_VECS_SFC_USAGE_BIT;
+ sfc_reset_bit = GEN11_VECS_SFC_RESET_BIT(engine->instance);
+ break;
+
+ default:
+ return 0;
+ }
+
+ /*
+ * Tell the engine that a software reset is going to happen. The engine
+ * will then try to force lock the SFC (if currently locked, it will
+ * remain so until we tell the engine it is safe to unlock; if currently
+ * unlocked, it will ignore this and all new lock requests). If SFC
+ * ends up being locked to the engine we want to reset, we have to reset
+ * it as well (we will unlock it once the reset sequence is completed).
+ */
+ rmw_set_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit);
+
+ if (__intel_wait_for_register_fw(uncore,
+ sfc_forced_lock_ack,
+ sfc_forced_lock_ack_bit,
+ sfc_forced_lock_ack_bit,
+ 1000, 0, NULL)) {
+ DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n");
+ return 0;
+ }
+
+ if (intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)
+ return sfc_reset_bit;
+
+ return 0;
+}
+
+static void gen11_unlock_sfc(struct intel_engine_cs *engine)
+{
+ struct intel_uncore *uncore = engine->uncore;
+ u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access;
+ i915_reg_t sfc_forced_lock;
+ u32 sfc_forced_lock_bit;
+
+ switch (engine->class) {
+ case VIDEO_DECODE_CLASS:
+ if ((BIT(engine->instance) & vdbox_sfc_access) == 0)
+ return;
+
+ sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine);
+ sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT;
+ break;
+
+ case VIDEO_ENHANCEMENT_CLASS:
+ sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine);
+ sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT;
+ break;
+
+ default:
+ return;
+ }
+
+ rmw_clear_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit);
+}
+
+static int gen11_reset_engines(struct drm_i915_private *i915,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
+{
+ const u32 hw_engine_mask[] = {
+ [RCS0] = GEN11_GRDOM_RENDER,
+ [BCS0] = GEN11_GRDOM_BLT,
+ [VCS0] = GEN11_GRDOM_MEDIA,
+ [VCS1] = GEN11_GRDOM_MEDIA2,
+ [VCS2] = GEN11_GRDOM_MEDIA3,
+ [VCS3] = GEN11_GRDOM_MEDIA4,
+ [VECS0] = GEN11_GRDOM_VECS,
+ [VECS1] = GEN11_GRDOM_VECS2,
+ };
+ struct intel_engine_cs *engine;
+ intel_engine_mask_t tmp;
+ u32 hw_mask;
+ int ret;
+
+ if (engine_mask == ALL_ENGINES) {
+ hw_mask = GEN11_GRDOM_FULL;
+ } else {
+ hw_mask = 0;
+ for_each_engine_masked(engine, i915, engine_mask, tmp) {
+ GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
+ hw_mask |= hw_engine_mask[engine->id];
+ hw_mask |= gen11_lock_sfc(engine);
+ }
+ }
+
+ ret = gen6_hw_domain_reset(i915, hw_mask);
+
+ if (engine_mask != ALL_ENGINES)
+ for_each_engine_masked(engine, i915, engine_mask, tmp)
+ gen11_unlock_sfc(engine);
+
+ return ret;
+}
+
+static int gen8_engine_reset_prepare(struct intel_engine_cs *engine)
+{
+ struct intel_uncore *uncore = engine->uncore;
+ const i915_reg_t reg = RING_RESET_CTL(engine->mmio_base);
+ u32 request, mask, ack;
+ int ret;
+
+ ack = intel_uncore_read_fw(uncore, reg);
+ if (ack & RESET_CTL_CAT_ERROR) {
+ /*
+ * For catastrophic errors, ready-for-reset sequence
+ * needs to be bypassed: HAS#396813
+ */
+ request = RESET_CTL_CAT_ERROR;
+ mask = RESET_CTL_CAT_ERROR;
+
+ /* Catastrophic errors need to be cleared by HW */
+ ack = 0;
+ } else if (!(ack & RESET_CTL_READY_TO_RESET)) {
+ request = RESET_CTL_REQUEST_RESET;
+ mask = RESET_CTL_READY_TO_RESET;
+ ack = RESET_CTL_READY_TO_RESET;
+ } else {
+ return 0;
+ }
+
+ intel_uncore_write_fw(uncore, reg, _MASKED_BIT_ENABLE(request));
+ ret = __intel_wait_for_register_fw(uncore, reg, mask, ack,
+ 700, 0, NULL);
+ if (ret)
+ DRM_ERROR("%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n",
+ engine->name, request,
+ intel_uncore_read_fw(uncore, reg));
+
+ return ret;
+}
+
+static void gen8_engine_reset_cancel(struct intel_engine_cs *engine)
+{
+ intel_uncore_write_fw(engine->uncore,
+ RING_RESET_CTL(engine->mmio_base),
+ _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET));
+}
+
+static int gen8_reset_engines(struct drm_i915_private *i915,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
+{
+ struct intel_engine_cs *engine;
+ const bool reset_non_ready = retry >= 1;
+ intel_engine_mask_t tmp;
+ int ret;
+
+ for_each_engine_masked(engine, i915, engine_mask, tmp) {
+ ret = gen8_engine_reset_prepare(engine);
+ if (ret && !reset_non_ready)
+ goto skip_reset;
+
+ /*
+ * If this is not the first failed attempt to prepare,
+ * we decide to proceed anyway.
+ *
+ * By doing so we risk context corruption and with
+ * some gens (kbl), possible system hang if reset
+ * happens during active bb execution.
+ *
+ * We rather take context corruption instead of
+ * failed reset with a wedged driver/gpu. And
+ * active bb execution case should be covered by
+ * i915_stop_engines we have before the reset.
+ */
+ }
+
+ if (INTEL_GEN(i915) >= 11)
+ ret = gen11_reset_engines(i915, engine_mask, retry);
+ else
+ ret = gen6_reset_engines(i915, engine_mask, retry);
+
+skip_reset:
+ for_each_engine_masked(engine, i915, engine_mask, tmp)
+ gen8_engine_reset_cancel(engine);
+
+ return ret;
+}
+
+typedef int (*reset_func)(struct drm_i915_private *,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry);
+
+static reset_func intel_get_gpu_reset(struct drm_i915_private *i915)
+{
+ if (INTEL_GEN(i915) >= 8)
+ return gen8_reset_engines;
+ else if (INTEL_GEN(i915) >= 6)
+ return gen6_reset_engines;
+ else if (INTEL_GEN(i915) >= 5)
+ return ironlake_do_reset;
+ else if (IS_G4X(i915))
+ return g4x_do_reset;
+ else if (IS_G33(i915) || IS_PINEVIEW(i915))
+ return g33_do_reset;
+ else if (INTEL_GEN(i915) >= 3)
+ return i915_do_reset;
+ else
+ return NULL;
+}
+
+int intel_gpu_reset(struct drm_i915_private *i915,
+ intel_engine_mask_t engine_mask)
+{
+ const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1;
+ reset_func reset;
+ int ret = -ETIMEDOUT;
+ int retry;
+
+ reset = intel_get_gpu_reset(i915);
+ if (!reset)
+ return -ENODEV;
+
+ /*
+ * If the power well sleeps during the reset, the reset
+ * request may be dropped and never completes (causing -EIO).
+ */
+ intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
+ for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) {
+ /*
+ * We stop engines, otherwise we might get failed reset and a
+ * dead gpu (on elk). Also as modern gpu as kbl can suffer
+ * from system hang if batchbuffer is progressing when
+ * the reset is issued, regardless of READY_TO_RESET ack.
+ * Thus assume it is best to stop engines on all gens
+ * where we have a gpu reset.
+ *
+ * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
+ *
+ * WaMediaResetMainRingCleanup:ctg,elk (presumably)
+ *
+ * FIXME: Wa for more modern gens needs to be validated
+ */
+ if (retry)
+ i915_stop_engines(i915, engine_mask);
+
+ GEM_TRACE("engine_mask=%x\n", engine_mask);
+ preempt_disable();
+ ret = reset(i915, engine_mask, retry);
+ preempt_enable();
+ }
+ intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
+
+ return ret;
+}
+
+bool intel_has_gpu_reset(struct drm_i915_private *i915)
+{
+ if (!i915_modparams.reset)
+ return NULL;
+
+ return intel_get_gpu_reset(i915);
+}
+
+bool intel_has_reset_engine(struct drm_i915_private *i915)
+{
+ return INTEL_INFO(i915)->has_reset_engine && i915_modparams.reset >= 2;
+}
+
+int intel_reset_guc(struct drm_i915_private *i915)
+{
+ u32 guc_domain =
+ INTEL_GEN(i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC;
+ int ret;
+
+ GEM_BUG_ON(!HAS_GUC(i915));
+
+ intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
+ ret = gen6_hw_domain_reset(i915, guc_domain);
+ intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
+
+ return ret;
+}
+
+/*
+ * Ensure irq handler finishes, and not run again.
+ * Also return the active request so that we only search for it once.
+ */
+static void reset_prepare_engine(struct intel_engine_cs *engine)
+{
+ /*
+ * During the reset sequence, we must prevent the engine from
+ * entering RC6. As the context state is undefined until we restart
+ * the engine, if it does enter RC6 during the reset, the state
+ * written to the powercontext is undefined and so we may lose
+ * GPU state upon resume, i.e. fail to restart after a reset.
+ */
+ intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
+ engine->reset.prepare(engine);
+}
+
+static void revoke_mmaps(struct drm_i915_private *i915)
+{
+ int i;
+
+ for (i = 0; i < i915->num_fence_regs; i++) {
+ struct drm_vma_offset_node *node;
+ struct i915_vma *vma;
+ u64 vma_offset;
+
+ vma = READ_ONCE(i915->fence_regs[i].vma);
+ if (!vma)
+ continue;
+
+ if (!i915_vma_has_userfault(vma))
+ continue;
+
+ GEM_BUG_ON(vma->fence != &i915->fence_regs[i]);
+ node = &vma->obj->base.vma_node;
+ vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT;
+ unmap_mapping_range(i915->drm.anon_inode->i_mapping,
+ drm_vma_node_offset_addr(node) + vma_offset,
+ vma->size,
+ 1);
+ }
+}
+
+static void reset_prepare(struct drm_i915_private *i915)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine(engine, i915, id)
+ reset_prepare_engine(engine);
+
+ intel_uc_reset_prepare(i915);
+}
+
+static void gt_revoke(struct drm_i915_private *i915)
+{
+ revoke_mmaps(i915);
+}
+
+static int gt_reset(struct drm_i915_private *i915,
+ intel_engine_mask_t stalled_mask)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err;
+
+ /*
+ * Everything depends on having the GTT running, so we need to start
+ * there.
+ */
+ err = i915_ggtt_enable_hw(i915);
+ if (err)
+ return err;
+
+ for_each_engine(engine, i915, id)
+ intel_engine_reset(engine, stalled_mask & engine->mask);
+
+ i915_gem_restore_fences(i915);
+
+ return err;
+}
+
+static void reset_finish_engine(struct intel_engine_cs *engine)
+{
+ engine->reset.finish(engine);
+ intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
+}
+
+struct i915_gpu_restart {
+ struct work_struct work;
+ struct drm_i915_private *i915;
+};
+
+static void restart_work(struct work_struct *work)
+{
+ struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work);
+ struct drm_i915_private *i915 = arg->i915;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ intel_wakeref_t wakeref;
+
+ wakeref = intel_runtime_pm_get(i915);
+ mutex_lock(&i915->drm.struct_mutex);
+ WRITE_ONCE(i915->gpu_error.restart, NULL);
+
+ for_each_engine(engine, i915, id) {
+ struct i915_request *rq;
+
+ /*
+ * Ostensibily, we always want a context loaded for powersaving,
+ * so if the engine is idle after the reset, send a request
+ * to load our scratch kernel_context.
+ */
+ if (!intel_engine_is_idle(engine))
+ continue;
+
+ rq = i915_request_alloc(engine, i915->kernel_context);
+ if (!IS_ERR(rq))
+ i915_request_add(rq);
+ }
+
+ mutex_unlock(&i915->drm.struct_mutex);
+ intel_runtime_pm_put(i915, wakeref);
+
+ kfree(arg);
+}
+
+static void reset_finish(struct drm_i915_private *i915)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine(engine, i915, id) {
+ reset_finish_engine(engine);
+ intel_engine_signal_breadcrumbs(engine);
+ }
+}
+
+static void reset_restart(struct drm_i915_private *i915)
+{
+ struct i915_gpu_restart *arg;
+
+ /*
+ * Following the reset, ensure that we always reload context for
+ * powersaving, and to correct engine->last_retired_context. Since
+ * this requires us to submit a request, queue a worker to do that
+ * task for us to evade any locking here.
+ */
+ if (READ_ONCE(i915->gpu_error.restart))
+ return;
+
+ arg = kmalloc(sizeof(*arg), GFP_KERNEL);
+ if (arg) {
+ arg->i915 = i915;
+ INIT_WORK(&arg->work, restart_work);
+
+ WRITE_ONCE(i915->gpu_error.restart, arg);
+ queue_work(i915->wq, &arg->work);
+ }
+}
+
+static void nop_submit_request(struct i915_request *request)
+{
+ struct intel_engine_cs *engine = request->engine;
+ unsigned long flags;
+
+ GEM_TRACE("%s fence %llx:%lld -> -EIO\n",
+ engine->name, request->fence.context, request->fence.seqno);
+ dma_fence_set_error(&request->fence, -EIO);
+
+ spin_lock_irqsave(&engine->timeline.lock, flags);
+ __i915_request_submit(request);
+ i915_request_mark_complete(request);
+ spin_unlock_irqrestore(&engine->timeline.lock, flags);
+
+ intel_engine_queue_breadcrumbs(engine);
+}
+
+static void __i915_gem_set_wedged(struct drm_i915_private *i915)
+{
+ struct i915_gpu_error *error = &i915->gpu_error;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ if (test_bit(I915_WEDGED, &error->flags))
+ return;
+
+ if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) {
+ struct drm_printer p = drm_debug_printer(__func__);
+
+ for_each_engine(engine, i915, id)
+ intel_engine_dump(engine, &p, "%s\n", engine->name);
+ }
+
+ GEM_TRACE("start\n");
+
+ /*
+ * First, stop submission to hw, but do not yet complete requests by
+ * rolling the global seqno forward (since this would complete requests
+ * for which we haven't set the fence error to EIO yet).
+ */
+ reset_prepare(i915);
+
+ /* Even if the GPU reset fails, it should still stop the engines */
+ if (!INTEL_INFO(i915)->gpu_reset_clobbers_display)
+ intel_gpu_reset(i915, ALL_ENGINES);
+
+ for_each_engine(engine, i915, id) {
+ engine->submit_request = nop_submit_request;
+ engine->schedule = NULL;
+ }
+ i915->caps.scheduler = 0;
+
+ /*
+ * Make sure no request can slip through without getting completed by
+ * either this call here to intel_engine_write_global_seqno, or the one
+ * in nop_submit_request.
+ */
+ synchronize_rcu_expedited();
+
+ /* Mark all executing requests as skipped */
+ for_each_engine(engine, i915, id)
+ engine->cancel_requests(engine);
+
+ reset_finish(i915);
+
+ smp_mb__before_atomic();
+ set_bit(I915_WEDGED, &error->flags);
+
+ GEM_TRACE("end\n");
+}
+
+void i915_gem_set_wedged(struct drm_i915_private *i915)
+{
+ struct i915_gpu_error *error = &i915->gpu_error;
+ intel_wakeref_t wakeref;
+
+ mutex_lock(&error->wedge_mutex);
+ with_intel_runtime_pm(i915, wakeref)
+ __i915_gem_set_wedged(i915);
+ mutex_unlock(&error->wedge_mutex);
+}
+
+static bool __i915_gem_unset_wedged(struct drm_i915_private *i915)
+{
+ struct i915_gpu_error *error = &i915->gpu_error;
+ struct i915_timeline *tl;
+
+ if (!test_bit(I915_WEDGED, &error->flags))
+ return true;
+
+ if (!i915->gt.scratch) /* Never full initialised, recovery impossible */
+ return false;
+
+ GEM_TRACE("start\n");
+
+ /*
+ * Before unwedging, make sure that all pending operations
+ * are flushed and errored out - we may have requests waiting upon
+ * third party fences. We marked all inflight requests as EIO, and
+ * every execbuf since returned EIO, for consistency we want all
+ * the currently pending requests to also be marked as EIO, which
+ * is done inside our nop_submit_request - and so we must wait.
+ *
+ * No more can be submitted until we reset the wedged bit.
+ */
+ mutex_lock(&i915->gt.timelines.mutex);
+ list_for_each_entry(tl, &i915->gt.timelines.active_list, link) {
+ struct i915_request *rq;
+
+ rq = i915_active_request_get_unlocked(&tl->last_request);
+ if (!rq)
+ continue;
+
+ /*
+ * All internal dependencies (i915_requests) will have
+ * been flushed by the set-wedge, but we may be stuck waiting
+ * for external fences. These should all be capped to 10s
+ * (I915_FENCE_TIMEOUT) so this wait should not be unbounded
+ * in the worst case.
+ */
+ dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT);
+ i915_request_put(rq);
+ }
+ mutex_unlock(&i915->gt.timelines.mutex);
+
+ intel_engines_sanitize(i915, false);
+
+ /*
+ * Undo nop_submit_request. We prevent all new i915 requests from
+ * being queued (by disallowing execbuf whilst wedged) so having
+ * waited for all active requests above, we know the system is idle
+ * and do not have to worry about a thread being inside
+ * engine->submit_request() as we swap over. So unlike installing
+ * the nop_submit_request on reset, we can do this from normal
+ * context and do not require stop_machine().
+ */
+ intel_engines_reset_default_submission(i915);
+
+ GEM_TRACE("end\n");
+
+ smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
+ clear_bit(I915_WEDGED, &i915->gpu_error.flags);
+
+ return true;
+}
+
+bool i915_gem_unset_wedged(struct drm_i915_private *i915)
+{
+ struct i915_gpu_error *error = &i915->gpu_error;
+ bool result;
+
+ mutex_lock(&error->wedge_mutex);
+ result = __i915_gem_unset_wedged(i915);
+ mutex_unlock(&error->wedge_mutex);
+
+ return result;
+}
+
+static int do_reset(struct drm_i915_private *i915,
+ intel_engine_mask_t stalled_mask)
+{
+ int err, i;
+
+ gt_revoke(i915);
+
+ err = intel_gpu_reset(i915, ALL_ENGINES);
+ for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
+ msleep(10 * (i + 1));
+ err = intel_gpu_reset(i915, ALL_ENGINES);
+ }
+ if (err)
+ return err;
+
+ return gt_reset(i915, stalled_mask);
+}
+
+/**
+ * i915_reset - reset chip after a hang
+ * @i915: #drm_i915_private to reset
+ * @stalled_mask: mask of the stalled engines with the guilty requests
+ * @reason: user error message for why we are resetting
+ *
+ * Reset the chip. Useful if a hang is detected. Marks the device as wedged
+ * on failure.
+ *
+ * Procedure is fairly simple:
+ * - reset the chip using the reset reg
+ * - re-init context state
+ * - re-init hardware status page
+ * - re-init ring buffer
+ * - re-init interrupt state
+ * - re-init display
+ */
+void i915_reset(struct drm_i915_private *i915,
+ intel_engine_mask_t stalled_mask,
+ const char *reason)
+{
+ struct i915_gpu_error *error = &i915->gpu_error;
+ int ret;
+
+ GEM_TRACE("flags=%lx\n", error->flags);
+
+ might_sleep();
+ assert_rpm_wakelock_held(i915);
+ GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
+
+ /* Clear any previous failed attempts at recovery. Time to try again. */
+ if (!__i915_gem_unset_wedged(i915))
+ return;
+
+ if (reason)
+ dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
+ error->reset_count++;
+
+ reset_prepare(i915);
+
+ if (!intel_has_gpu_reset(i915)) {
+ if (i915_modparams.reset)
+ dev_err(i915->drm.dev, "GPU reset not supported\n");
+ else
+ DRM_DEBUG_DRIVER("GPU reset disabled\n");
+ goto error;
+ }
+
+ if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
+ intel_runtime_pm_disable_interrupts(i915);
+
+ if (do_reset(i915, stalled_mask)) {
+ dev_err(i915->drm.dev, "Failed to reset chip\n");
+ goto taint;
+ }
+
+ if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
+ intel_runtime_pm_enable_interrupts(i915);
+
+ intel_overlay_reset(i915);
+
+ /*
+ * Next we need to restore the context, but we don't use those
+ * yet either...
+ *
+ * Ring buffer needs to be re-initialized in the KMS case, or if X
+ * was running at the time of the reset (i.e. we weren't VT
+ * switched away).
+ */
+ ret = i915_gem_init_hw(i915);
+ if (ret) {
+ DRM_ERROR("Failed to initialise HW following reset (%d)\n",
+ ret);
+ goto error;
+ }
+
+ i915_queue_hangcheck(i915);
+
+finish:
+ reset_finish(i915);
+ if (!__i915_wedged(error))
+ reset_restart(i915);
+ return;
+
+taint:
+ /*
+ * History tells us that if we cannot reset the GPU now, we
+ * never will. This then impacts everything that is run
+ * subsequently. On failing the reset, we mark the driver
+ * as wedged, preventing further execution on the GPU.
+ * We also want to go one step further and add a taint to the
+ * kernel so that any subsequent faults can be traced back to
+ * this failure. This is important for CI, where if the
+ * GPU/driver fails we would like to reboot and restart testing
+ * rather than continue on into oblivion. For everyone else,
+ * the system should still plod along, but they have been warned!
+ */
+ add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
+error:
+ __i915_gem_set_wedged(i915);
+ goto finish;
+}
+
+static inline int intel_gt_reset_engine(struct drm_i915_private *i915,
+ struct intel_engine_cs *engine)
+{
+ return intel_gpu_reset(i915, engine->mask);
+}
+
+/**
+ * i915_reset_engine - reset GPU engine to recover from a hang
+ * @engine: engine to reset
+ * @msg: reason for GPU reset; or NULL for no dev_notice()
+ *
+ * Reset a specific GPU engine. Useful if a hang is detected.
+ * Returns zero on successful reset or otherwise an error code.
+ *
+ * Procedure is:
+ * - identifies the request that caused the hang and it is dropped
+ * - reset engine (which will force the engine to idle)
+ * - re-init/configure engine
+ */
+int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
+{
+ struct i915_gpu_error *error = &engine->i915->gpu_error;
+ int ret;
+
+ GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
+ GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
+
+ reset_prepare_engine(engine);
+
+ if (msg)
+ dev_notice(engine->i915->drm.dev,
+ "Resetting %s for %s\n", engine->name, msg);
+ error->reset_engine_count[engine->id]++;
+
+ if (!engine->i915->guc.execbuf_client)
+ ret = intel_gt_reset_engine(engine->i915, engine);
+ else
+ ret = intel_guc_reset_engine(&engine->i915->guc, engine);
+ if (ret) {
+ /* If we fail here, we expect to fallback to a global reset */
+ DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
+ engine->i915->guc.execbuf_client ? "GuC " : "",
+ engine->name, ret);
+ goto out;
+ }
+
+ /*
+ * The request that caused the hang is stuck on elsp, we know the
+ * active request and can drop it, adjust head to skip the offending
+ * request to resume executing remaining requests in the queue.
+ */
+ intel_engine_reset(engine, true);
+
+ /*
+ * The engine and its registers (and workarounds in case of render)
+ * have been reset to their default values. Follow the init_ring
+ * process to program RING_MODE, HWSP and re-enable submission.
+ */
+ ret = engine->init_hw(engine);
+ if (ret)
+ goto out;
+
+out:
+ intel_engine_cancel_stop_cs(engine);
+ reset_finish_engine(engine);
+ return ret;
+}
+
+static void i915_reset_device(struct drm_i915_private *i915,
+ u32 engine_mask,
+ const char *reason)
+{
+ struct i915_gpu_error *error = &i915->gpu_error;
+ struct kobject *kobj = &i915->drm.primary->kdev->kobj;
+ char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
+ char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
+ char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
+ struct i915_wedge_me w;
+
+ kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
+
+ DRM_DEBUG_DRIVER("resetting chip\n");
+ kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
+
+ /* Use a watchdog to ensure that our reset completes */
+ i915_wedge_on_timeout(&w, i915, 5 * HZ) {
+ intel_prepare_reset(i915);
+
+ /* Flush everyone using a resource about to be clobbered */
+ synchronize_srcu_expedited(&error->reset_backoff_srcu);
+
+ mutex_lock(&error->wedge_mutex);
+ i915_reset(i915, engine_mask, reason);
+ mutex_unlock(&error->wedge_mutex);
+
+ intel_finish_reset(i915);
+ }
+
+ if (!test_bit(I915_WEDGED, &error->flags))
+ kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event);
+}
+
+static void clear_register(struct intel_uncore *uncore, i915_reg_t reg)
+{
+ intel_uncore_rmw(uncore, reg, 0, 0);
+}
+
+void i915_clear_error_registers(struct drm_i915_private *i915)
+{
+ struct intel_uncore *uncore = &i915->uncore;
+ u32 eir;
+
+ if (!IS_GEN(i915, 2))
+ clear_register(uncore, PGTBL_ER);
+
+ if (INTEL_GEN(i915) < 4)
+ clear_register(uncore, IPEIR(RENDER_RING_BASE));
+ else
+ clear_register(uncore, IPEIR_I965);
+
+ clear_register(uncore, EIR);
+ eir = intel_uncore_read(uncore, EIR);
+ if (eir) {
+ /*
+ * some errors might have become stuck,
+ * mask them.
+ */
+ DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
+ rmw_set(uncore, EMR, eir);
+ intel_uncore_write(uncore, GEN2_IIR,
+ I915_MASTER_ERROR_INTERRUPT);
+ }
+
+ if (INTEL_GEN(i915) >= 8) {
+ rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID);
+ intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG);
+ } else if (INTEL_GEN(i915) >= 6) {
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine(engine, i915, id) {
+ rmw_clear(uncore,
+ RING_FAULT_REG(engine), RING_FAULT_VALID);
+ intel_uncore_posting_read(uncore,
+ RING_FAULT_REG(engine));
+ }
+ }
+}
+
+/**
+ * i915_handle_error - handle a gpu error
+ * @i915: i915 device private
+ * @engine_mask: mask representing engines that are hung
+ * @flags: control flags
+ * @fmt: Error message format string
+ *
+ * Do some basic checking of register state at error time and
+ * dump it to the syslog. Also call i915_capture_error_state() to make
+ * sure we get a record and make it available in debugfs. Fire a uevent
+ * so userspace knows something bad happened (should trigger collection
+ * of a ring dump etc.).
+ */
+void i915_handle_error(struct drm_i915_private *i915,
+ intel_engine_mask_t engine_mask,
+ unsigned long flags,
+ const char *fmt, ...)
+{
+ struct i915_gpu_error *error = &i915->gpu_error;
+ struct intel_engine_cs *engine;
+ intel_wakeref_t wakeref;
+ intel_engine_mask_t tmp;
+ char error_msg[80];
+ char *msg = NULL;
+
+ if (fmt) {
+ va_list args;
+
+ va_start(args, fmt);
+ vscnprintf(error_msg, sizeof(error_msg), fmt, args);
+ va_end(args);
+
+ msg = error_msg;
+ }
+
+ /*
+ * In most cases it's guaranteed that we get here with an RPM
+ * reference held, for example because there is a pending GPU
+ * request that won't finish until the reset is done. This
+ * isn't the case at least when we get here by doing a
+ * simulated reset via debugfs, so get an RPM reference.
+ */
+ wakeref = intel_runtime_pm_get(i915);
+
+ engine_mask &= INTEL_INFO(i915)->engine_mask;
+
+ if (flags & I915_ERROR_CAPTURE) {
+ i915_capture_error_state(i915, engine_mask, msg);
+ i915_clear_error_registers(i915);
+ }
+
+ /*
+ * Try engine reset when available. We fall back to full reset if
+ * single reset fails.
+ */
+ if (intel_has_reset_engine(i915) && !__i915_wedged(error)) {
+ for_each_engine_masked(engine, i915, engine_mask, tmp) {
+ BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
+ if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
+ &error->flags))
+ continue;
+
+ if (i915_reset_engine(engine, msg) == 0)
+ engine_mask &= ~engine->mask;
+
+ clear_bit(I915_RESET_ENGINE + engine->id,
+ &error->flags);
+ wake_up_bit(&error->flags,
+ I915_RESET_ENGINE + engine->id);
+ }
+ }
+
+ if (!engine_mask)
+ goto out;
+
+ /* Full reset needs the mutex, stop any other user trying to do so. */
+ if (test_and_set_bit(I915_RESET_BACKOFF, &error->flags)) {
+ wait_event(error->reset_queue,
+ !test_bit(I915_RESET_BACKOFF, &error->flags));
+ goto out; /* piggy-back on the other reset */
+ }
+
+ /* Make sure i915_reset_trylock() sees the I915_RESET_BACKOFF */
+ synchronize_rcu_expedited();
+
+ /* Prevent any other reset-engine attempt. */
+ for_each_engine(engine, i915, tmp) {
+ while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
+ &error->flags))
+ wait_on_bit(&error->flags,
+ I915_RESET_ENGINE + engine->id,
+ TASK_UNINTERRUPTIBLE);
+ }
+
+ i915_reset_device(i915, engine_mask, msg);
+
+ for_each_engine(engine, i915, tmp) {
+ clear_bit(I915_RESET_ENGINE + engine->id,
+ &error->flags);
+ }
+
+ clear_bit(I915_RESET_BACKOFF, &error->flags);
+ wake_up_all(&error->reset_queue);
+
+out:
+ intel_runtime_pm_put(i915, wakeref);
+}
+
+int i915_reset_trylock(struct drm_i915_private *i915)
+{
+ struct i915_gpu_error *error = &i915->gpu_error;
+ int srcu;
+
+ might_lock(&error->reset_backoff_srcu);
+ might_sleep();
+
+ rcu_read_lock();
+ while (test_bit(I915_RESET_BACKOFF, &error->flags)) {
+ rcu_read_unlock();
+
+ if (wait_event_interruptible(error->reset_queue,
+ !test_bit(I915_RESET_BACKOFF,
+ &error->flags)))
+ return -EINTR;
+
+ rcu_read_lock();
+ }
+ srcu = srcu_read_lock(&error->reset_backoff_srcu);
+ rcu_read_unlock();
+
+ return srcu;
+}
+
+void i915_reset_unlock(struct drm_i915_private *i915, int tag)
+__releases(&i915->gpu_error.reset_backoff_srcu)
+{
+ struct i915_gpu_error *error = &i915->gpu_error;
+
+ srcu_read_unlock(&error->reset_backoff_srcu, tag);
+}
+
+int i915_terminally_wedged(struct drm_i915_private *i915)
+{
+ struct i915_gpu_error *error = &i915->gpu_error;
+
+ might_sleep();
+
+ if (!__i915_wedged(error))
+ return 0;
+
+ /* Reset still in progress? Maybe we will recover? */
+ if (!test_bit(I915_RESET_BACKOFF, &error->flags))
+ return -EIO;
+
+ /* XXX intel_reset_finish() still takes struct_mutex!!! */
+ if (mutex_is_locked(&i915->drm.struct_mutex))
+ return -EAGAIN;
+
+ if (wait_event_interruptible(error->reset_queue,
+ !test_bit(I915_RESET_BACKOFF,
+ &error->flags)))
+ return -EINTR;
+
+ return __i915_wedged(error) ? -EIO : 0;
+}
+
+bool i915_reset_flush(struct drm_i915_private *i915)
+{
+ int err;
+
+ cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
+
+ flush_workqueue(i915->wq);
+ GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart));
+
+ mutex_lock(&i915->drm.struct_mutex);
+ err = i915_gem_wait_for_idle(i915,
+ I915_WAIT_LOCKED |
+ I915_WAIT_FOR_IDLE_BOOST,
+ MAX_SCHEDULE_TIMEOUT);
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ return !err;
+}
+
+static void i915_wedge_me(struct work_struct *work)
+{
+ struct i915_wedge_me *w = container_of(work, typeof(*w), work.work);
+
+ dev_err(w->i915->drm.dev,
+ "%s timed out, cancelling all in-flight rendering.\n",
+ w->name);
+ i915_gem_set_wedged(w->i915);
+}
+
+void __i915_init_wedge(struct i915_wedge_me *w,
+ struct drm_i915_private *i915,
+ long timeout,
+ const char *name)
+{
+ w->i915 = i915;
+ w->name = name;
+
+ INIT_DELAYED_WORK_ONSTACK(&w->work, i915_wedge_me);
+ schedule_delayed_work(&w->work, timeout);
+}
+
+void __i915_fini_wedge(struct i915_wedge_me *w)
+{
+ cancel_delayed_work_sync(&w->work);
+ destroy_delayed_work_on_stack(&w->work);
+ w->i915 = NULL;
+}
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2008-2018 Intel Corporation
+ */
+
+#ifndef I915_RESET_H
+#define I915_RESET_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/srcu.h>
+
+#include "gt/intel_engine_types.h"
+
+struct drm_i915_private;
+struct i915_request;
+struct intel_engine_cs;
+struct intel_guc;
+
+__printf(4, 5)
+void i915_handle_error(struct drm_i915_private *i915,
+ intel_engine_mask_t engine_mask,
+ unsigned long flags,
+ const char *fmt, ...);
+#define I915_ERROR_CAPTURE BIT(0)
+
+void i915_clear_error_registers(struct drm_i915_private *i915);
+
+void i915_reset(struct drm_i915_private *i915,
+ intel_engine_mask_t stalled_mask,
+ const char *reason);
+int i915_reset_engine(struct intel_engine_cs *engine,
+ const char *reason);
+
+void i915_reset_request(struct i915_request *rq, bool guilty);
+bool i915_reset_flush(struct drm_i915_private *i915);
+
+int __must_check i915_reset_trylock(struct drm_i915_private *i915);
+void i915_reset_unlock(struct drm_i915_private *i915, int tag);
+
+int i915_terminally_wedged(struct drm_i915_private *i915);
+
+bool intel_has_gpu_reset(struct drm_i915_private *i915);
+bool intel_has_reset_engine(struct drm_i915_private *i915);
+
+int intel_gpu_reset(struct drm_i915_private *i915,
+ intel_engine_mask_t engine_mask);
+
+int intel_reset_guc(struct drm_i915_private *i915);
+
+struct i915_wedge_me {
+ struct delayed_work work;
+ struct drm_i915_private *i915;
+ const char *name;
+};
+
+void __i915_init_wedge(struct i915_wedge_me *w,
+ struct drm_i915_private *i915,
+ long timeout,
+ const char *name);
+void __i915_fini_wedge(struct i915_wedge_me *w);
+
+#define i915_wedge_on_timeout(W, DEV, TIMEOUT) \
+ for (__i915_init_wedge((W), (DEV), (TIMEOUT), __func__); \
+ (W)->i915; \
+ __i915_fini_wedge((W)))
+
+#endif /* I915_RESET_H */
--- /dev/null
+/*
+ * Copyright © 2008-2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ * Zou Nan hai <nanhai.zou@intel.com>
+ * Xiang Hai hao<haihao.xiang@intel.com>
+ *
+ */
+
+#include <linux/log2.h>
+
+#include <drm/i915_drm.h>
+
+#include "i915_drv.h"
+#include "i915_gem_render_state.h"
+#include "i915_trace.h"
+#include "intel_reset.h"
+#include "intel_workarounds.h"
+
+/* Rough estimate of the typical request size, performing a flush,
+ * set-context and then emitting the batch.
+ */
+#define LEGACY_REQUEST_SIZE 200
+
+unsigned int intel_ring_update_space(struct intel_ring *ring)
+{
+ unsigned int space;
+
+ space = __intel_ring_space(ring->head, ring->emit, ring->size);
+
+ ring->space = space;
+ return space;
+}
+
+static int
+gen2_render_ring_flush(struct i915_request *rq, u32 mode)
+{
+ unsigned int num_store_dw;
+ u32 cmd, *cs;
+
+ cmd = MI_FLUSH;
+ num_store_dw = 0;
+ if (mode & EMIT_INVALIDATE)
+ cmd |= MI_READ_FLUSH;
+ if (mode & EMIT_FLUSH)
+ num_store_dw = 4;
+
+ cs = intel_ring_begin(rq, 2 + 3 * num_store_dw);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = cmd;
+ while (num_store_dw--) {
+ *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+ *cs++ = i915_scratch_offset(rq->i915);
+ *cs++ = 0;
+ }
+ *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int
+gen4_render_ring_flush(struct i915_request *rq, u32 mode)
+{
+ u32 cmd, *cs;
+ int i;
+
+ /*
+ * read/write caches:
+ *
+ * I915_GEM_DOMAIN_RENDER is always invalidated, but is
+ * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
+ * also flushed at 2d versus 3d pipeline switches.
+ *
+ * read-only caches:
+ *
+ * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
+ * MI_READ_FLUSH is set, and is always flushed on 965.
+ *
+ * I915_GEM_DOMAIN_COMMAND may not exist?
+ *
+ * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
+ * invalidated when MI_EXE_FLUSH is set.
+ *
+ * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
+ * invalidated with every MI_FLUSH.
+ *
+ * TLBs:
+ *
+ * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
+ * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
+ * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
+ * are flushed at any MI_FLUSH.
+ */
+
+ cmd = MI_FLUSH;
+ if (mode & EMIT_INVALIDATE) {
+ cmd |= MI_EXE_FLUSH;
+ if (IS_G4X(rq->i915) || IS_GEN(rq->i915, 5))
+ cmd |= MI_INVALIDATE_ISP;
+ }
+
+ i = 2;
+ if (mode & EMIT_INVALIDATE)
+ i += 20;
+
+ cs = intel_ring_begin(rq, i);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = cmd;
+
+ /*
+ * A random delay to let the CS invalidate take effect? Without this
+ * delay, the GPU relocation path fails as the CS does not see
+ * the updated contents. Just as important, if we apply the flushes
+ * to the EMIT_FLUSH branch (i.e. immediately after the relocation
+ * write and before the invalidate on the next batch), the relocations
+ * still fail. This implies that is a delay following invalidation
+ * that is required to reset the caches as opposed to a delay to
+ * ensure the memory is written.
+ */
+ if (mode & EMIT_INVALIDATE) {
+ *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
+ *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = 0;
+ *cs++ = 0;
+
+ for (i = 0; i < 12; i++)
+ *cs++ = MI_FLUSH;
+
+ *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
+ *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = 0;
+ *cs++ = 0;
+ }
+
+ *cs++ = cmd;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+/*
+ * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
+ * implementing two workarounds on gen6. From section 1.4.7.1
+ * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
+ *
+ * [DevSNB-C+{W/A}] Before any depth stall flush (including those
+ * produced by non-pipelined state commands), software needs to first
+ * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
+ * 0.
+ *
+ * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
+ * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
+ *
+ * And the workaround for these two requires this workaround first:
+ *
+ * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
+ * BEFORE the pipe-control with a post-sync op and no write-cache
+ * flushes.
+ *
+ * And this last workaround is tricky because of the requirements on
+ * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
+ * volume 2 part 1:
+ *
+ * "1 of the following must also be set:
+ * - Render Target Cache Flush Enable ([12] of DW1)
+ * - Depth Cache Flush Enable ([0] of DW1)
+ * - Stall at Pixel Scoreboard ([1] of DW1)
+ * - Depth Stall ([13] of DW1)
+ * - Post-Sync Operation ([13] of DW1)
+ * - Notify Enable ([8] of DW1)"
+ *
+ * The cache flushes require the workaround flush that triggered this
+ * one, so we can't use it. Depth stall would trigger the same.
+ * Post-sync nonzero is what triggered this second workaround, so we
+ * can't use that one either. Notify enable is IRQs, which aren't
+ * really our business. That leaves only stall at scoreboard.
+ */
+static int
+gen6_emit_post_sync_nonzero_flush(struct i915_request *rq)
+{
+ u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = GFX_OP_PIPE_CONTROL(5);
+ *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
+ *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = 0; /* low dword */
+ *cs++ = 0; /* high dword */
+ *cs++ = MI_NOOP;
+ intel_ring_advance(rq, cs);
+
+ cs = intel_ring_begin(rq, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = GFX_OP_PIPE_CONTROL(5);
+ *cs++ = PIPE_CONTROL_QW_WRITE;
+ *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = MI_NOOP;
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int
+gen6_render_ring_flush(struct i915_request *rq, u32 mode)
+{
+ u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
+ u32 *cs, flags = 0;
+ int ret;
+
+ /* Force SNB workarounds for PIPE_CONTROL flushes */
+ ret = gen6_emit_post_sync_nonzero_flush(rq);
+ if (ret)
+ return ret;
+
+ /* Just flush everything. Experiments have shown that reducing the
+ * number of bits based on the write domains has little performance
+ * impact.
+ */
+ if (mode & EMIT_FLUSH) {
+ flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ /*
+ * Ensure that any following seqno writes only happen
+ * when the render cache is indeed flushed.
+ */
+ flags |= PIPE_CONTROL_CS_STALL;
+ }
+ if (mode & EMIT_INVALIDATE) {
+ flags |= PIPE_CONTROL_TLB_INVALIDATE;
+ flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+ /*
+ * TLB invalidate requires a post-sync write.
+ */
+ flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
+ }
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = flags;
+ *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = 0;
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+ /* First we do the gen6_emit_post_sync_nonzero_flush w/a */
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
+ *cs++ = 0;
+ *cs++ = 0;
+
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = PIPE_CONTROL_QW_WRITE;
+ *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = 0;
+
+ /* Finally we can flush and with it emit the breadcrumb */
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_QW_WRITE |
+ PIPE_CONTROL_CS_STALL);
+ *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = rq->fence.seqno;
+
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_STORE_DATA_INDEX;
+ *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
+ *cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_NOOP;
+
+ rq->tail = intel_ring_offset(rq, cs);
+ assert_ring_tail_valid(rq->ring, rq->tail);
+
+ return cs;
+}
+
+static int
+gen7_render_ring_cs_stall_wa(struct i915_request *rq)
+{
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
+ *cs++ = 0;
+ *cs++ = 0;
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int
+gen7_render_ring_flush(struct i915_request *rq, u32 mode)
+{
+ u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
+ u32 *cs, flags = 0;
+
+ /*
+ * Ensure that any following seqno writes only happen when the render
+ * cache is indeed flushed.
+ *
+ * Workaround: 4th PIPE_CONTROL command (except the ones with only
+ * read-cache invalidate bits set) must have the CS_STALL bit set. We
+ * don't try to be clever and just set it unconditionally.
+ */
+ flags |= PIPE_CONTROL_CS_STALL;
+
+ /* Just flush everything. Experiments have shown that reducing the
+ * number of bits based on the write domains has little performance
+ * impact.
+ */
+ if (mode & EMIT_FLUSH) {
+ flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+ flags |= PIPE_CONTROL_FLUSH_ENABLE;
+ }
+ if (mode & EMIT_INVALIDATE) {
+ flags |= PIPE_CONTROL_TLB_INVALIDATE;
+ flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
+ /*
+ * TLB invalidate requires a post-sync write.
+ */
+ flags |= PIPE_CONTROL_QW_WRITE;
+ flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+
+ flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
+
+ /* Workaround: we must issue a pipe_control with CS-stall bit
+ * set before a pipe_control command that has the state cache
+ * invalidate bit set. */
+ gen7_render_ring_cs_stall_wa(rq);
+ }
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = flags;
+ *cs++ = scratch_addr;
+ *cs++ = 0;
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_FLUSH_ENABLE |
+ PIPE_CONTROL_QW_WRITE |
+ PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_CS_STALL);
+ *cs++ = rq->timeline->hwsp_offset;
+ *cs++ = rq->fence.seqno;
+
+ *cs++ = GFX_OP_PIPE_CONTROL(4);
+ *cs++ = (PIPE_CONTROL_QW_WRITE |
+ PIPE_CONTROL_STORE_DATA_INDEX |
+ PIPE_CONTROL_GLOBAL_GTT_IVB);
+ *cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
+ *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
+ *cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_NOOP;
+
+ rq->tail = intel_ring_offset(rq, cs);
+ assert_ring_tail_valid(rq->ring, rq->tail);
+
+ return cs;
+}
+
+static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+ GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
+ *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+ *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
+ *cs++ = rq->fence.seqno;
+
+ *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+ *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT;
+ *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
+ *cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_NOOP;
+
+ rq->tail = intel_ring_offset(rq, cs);
+ assert_ring_tail_valid(rq->ring, rq->tail);
+
+ return cs;
+}
+
+#define GEN7_XCS_WA 32
+static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+ int i;
+
+ GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
+ *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+ *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
+ *cs++ = rq->fence.seqno;
+
+ *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+ *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT;
+ *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
+ for (i = 0; i < GEN7_XCS_WA; i++) {
+ *cs++ = MI_STORE_DWORD_INDEX;
+ *cs++ = I915_GEM_HWS_SEQNO_ADDR;
+ *cs++ = rq->fence.seqno;
+ }
+
+ *cs++ = MI_FLUSH_DW;
+ *cs++ = 0;
+ *cs++ = 0;
+
+ *cs++ = MI_USER_INTERRUPT;
+
+ rq->tail = intel_ring_offset(rq, cs);
+ assert_ring_tail_valid(rq->ring, rq->tail);
+
+ return cs;
+}
+#undef GEN7_XCS_WA
+
+static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
+{
+ /*
+ * Keep the render interrupt unmasked as this papers over
+ * lost interrupts following a reset.
+ */
+ if (engine->class == RENDER_CLASS) {
+ if (INTEL_GEN(engine->i915) >= 6)
+ mask &= ~BIT(0);
+ else
+ mask &= ~I915_USER_INTERRUPT;
+ }
+
+ intel_engine_set_hwsp_writemask(engine, mask);
+}
+
+static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+ u32 addr;
+
+ addr = lower_32_bits(phys);
+ if (INTEL_GEN(dev_priv) >= 4)
+ addr |= (phys >> 28) & 0xf0;
+
+ I915_WRITE(HWS_PGA, addr);
+}
+
+static struct page *status_page(struct intel_engine_cs *engine)
+{
+ struct drm_i915_gem_object *obj = engine->status_page.vma->obj;
+
+ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+ return sg_page(obj->mm.pages->sgl);
+}
+
+static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
+{
+ set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
+ set_hwstam(engine, ~0u);
+}
+
+static void set_hwsp(struct intel_engine_cs *engine, u32 offset)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+ i915_reg_t hwsp;
+
+ /*
+ * The ring status page addresses are no longer next to the rest of
+ * the ring registers as of gen7.
+ */
+ if (IS_GEN(dev_priv, 7)) {
+ switch (engine->id) {
+ /*
+ * No more rings exist on Gen7. Default case is only to shut up
+ * gcc switch check warning.
+ */
+ default:
+ GEM_BUG_ON(engine->id);
+ /* fallthrough */
+ case RCS0:
+ hwsp = RENDER_HWS_PGA_GEN7;
+ break;
+ case BCS0:
+ hwsp = BLT_HWS_PGA_GEN7;
+ break;
+ case VCS0:
+ hwsp = BSD_HWS_PGA_GEN7;
+ break;
+ case VECS0:
+ hwsp = VEBOX_HWS_PGA_GEN7;
+ break;
+ }
+ } else if (IS_GEN(dev_priv, 6)) {
+ hwsp = RING_HWS_PGA_GEN6(engine->mmio_base);
+ } else {
+ hwsp = RING_HWS_PGA(engine->mmio_base);
+ }
+
+ I915_WRITE(hwsp, offset);
+ POSTING_READ(hwsp);
+}
+
+static void flush_cs_tlb(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+
+ if (!IS_GEN_RANGE(dev_priv, 6, 7))
+ return;
+
+ /* ring should be idle before issuing a sync flush*/
+ WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
+
+ ENGINE_WRITE(engine, RING_INSTPM,
+ _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
+ INSTPM_SYNC_FLUSH));
+ if (intel_wait_for_register(engine->uncore,
+ RING_INSTPM(engine->mmio_base),
+ INSTPM_SYNC_FLUSH, 0,
+ 1000))
+ DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
+ engine->name);
+}
+
+static void ring_setup_status_page(struct intel_engine_cs *engine)
+{
+ set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma));
+ set_hwstam(engine, ~0u);
+
+ flush_cs_tlb(engine);
+}
+
+static bool stop_ring(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+
+ if (INTEL_GEN(dev_priv) > 2) {
+ ENGINE_WRITE(engine,
+ RING_MI_MODE, _MASKED_BIT_ENABLE(STOP_RING));
+ if (intel_wait_for_register(engine->uncore,
+ RING_MI_MODE(engine->mmio_base),
+ MODE_IDLE,
+ MODE_IDLE,
+ 1000)) {
+ DRM_ERROR("%s : timed out trying to stop ring\n",
+ engine->name);
+
+ /*
+ * Sometimes we observe that the idle flag is not
+ * set even though the ring is empty. So double
+ * check before giving up.
+ */
+ if (ENGINE_READ(engine, RING_HEAD) !=
+ ENGINE_READ(engine, RING_TAIL))
+ return false;
+ }
+ }
+
+ ENGINE_WRITE(engine, RING_HEAD, ENGINE_READ(engine, RING_TAIL));
+
+ ENGINE_WRITE(engine, RING_HEAD, 0);
+ ENGINE_WRITE(engine, RING_TAIL, 0);
+
+ /* The ring must be empty before it is disabled */
+ ENGINE_WRITE(engine, RING_CTL, 0);
+
+ return (ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) == 0;
+}
+
+static int init_ring_common(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+ struct intel_ring *ring = engine->buffer;
+ int ret = 0;
+
+ intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
+
+ if (!stop_ring(engine)) {
+ /* G45 ring initialization often fails to reset head to zero */
+ DRM_DEBUG_DRIVER("%s head not reset to zero "
+ "ctl %08x head %08x tail %08x start %08x\n",
+ engine->name,
+ ENGINE_READ(engine, RING_CTL),
+ ENGINE_READ(engine, RING_HEAD),
+ ENGINE_READ(engine, RING_TAIL),
+ ENGINE_READ(engine, RING_START));
+
+ if (!stop_ring(engine)) {
+ DRM_ERROR("failed to set %s head to zero "
+ "ctl %08x head %08x tail %08x start %08x\n",
+ engine->name,
+ ENGINE_READ(engine, RING_CTL),
+ ENGINE_READ(engine, RING_HEAD),
+ ENGINE_READ(engine, RING_TAIL),
+ ENGINE_READ(engine, RING_START));
+ ret = -EIO;
+ goto out;
+ }
+ }
+
+ if (HWS_NEEDS_PHYSICAL(dev_priv))
+ ring_setup_phys_status_page(engine);
+ else
+ ring_setup_status_page(engine);
+
+ intel_engine_reset_breadcrumbs(engine);
+
+ /* Enforce ordering by reading HEAD register back */
+ ENGINE_READ(engine, RING_HEAD);
+
+ /* Initialize the ring. This must happen _after_ we've cleared the ring
+ * registers with the above sequence (the readback of the HEAD registers
+ * also enforces ordering), otherwise the hw might lose the new ring
+ * register values. */
+ ENGINE_WRITE(engine, RING_START, i915_ggtt_offset(ring->vma));
+
+ /* WaClearRingBufHeadRegAtInit:ctg,elk */
+ if (ENGINE_READ(engine, RING_HEAD))
+ DRM_DEBUG_DRIVER("%s initialization failed [head=%08x], fudging\n",
+ engine->name, ENGINE_READ(engine, RING_HEAD));
+
+ /* Check that the ring offsets point within the ring! */
+ GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
+ GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
+ intel_ring_update_space(ring);
+
+ /* First wake the ring up to an empty/idle ring */
+ ENGINE_WRITE(engine, RING_HEAD, ring->head);
+ ENGINE_WRITE(engine, RING_TAIL, ring->head);
+ ENGINE_POSTING_READ(engine, RING_TAIL);
+
+ ENGINE_WRITE(engine, RING_CTL, RING_CTL_SIZE(ring->size) | RING_VALID);
+
+ /* If the head is still not zero, the ring is dead */
+ if (intel_wait_for_register(engine->uncore,
+ RING_CTL(engine->mmio_base),
+ RING_VALID, RING_VALID,
+ 50)) {
+ DRM_ERROR("%s initialization failed "
+ "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
+ engine->name,
+ ENGINE_READ(engine, RING_CTL),
+ ENGINE_READ(engine, RING_CTL) & RING_VALID,
+ ENGINE_READ(engine, RING_HEAD), ring->head,
+ ENGINE_READ(engine, RING_TAIL), ring->tail,
+ ENGINE_READ(engine, RING_START),
+ i915_ggtt_offset(ring->vma));
+ ret = -EIO;
+ goto out;
+ }
+
+ if (INTEL_GEN(dev_priv) > 2)
+ ENGINE_WRITE(engine,
+ RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
+
+ /* Now awake, let it get started */
+ if (ring->tail != ring->head) {
+ ENGINE_WRITE(engine, RING_TAIL, ring->tail);
+ ENGINE_POSTING_READ(engine, RING_TAIL);
+ }
+
+ /* Papering over lost _interrupts_ immediately following the restart */
+ intel_engine_queue_breadcrumbs(engine);
+out:
+ intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
+
+ return ret;
+}
+
+static void reset_prepare(struct intel_engine_cs *engine)
+{
+ intel_engine_stop_cs(engine);
+}
+
+static void reset_ring(struct intel_engine_cs *engine, bool stalled)
+{
+ struct i915_timeline *tl = &engine->timeline;
+ struct i915_request *pos, *rq;
+ unsigned long flags;
+ u32 head;
+
+ rq = NULL;
+ spin_lock_irqsave(&tl->lock, flags);
+ list_for_each_entry(pos, &tl->requests, link) {
+ if (!i915_request_completed(pos)) {
+ rq = pos;
+ break;
+ }
+ }
+
+ /*
+ * The guilty request will get skipped on a hung engine.
+ *
+ * Users of client default contexts do not rely on logical
+ * state preserved between batches so it is safe to execute
+ * queued requests following the hang. Non default contexts
+ * rely on preserved state, so skipping a batch loses the
+ * evolution of the state and it needs to be considered corrupted.
+ * Executing more queued batches on top of corrupted state is
+ * risky. But we take the risk by trying to advance through
+ * the queued requests in order to make the client behaviour
+ * more predictable around resets, by not throwing away random
+ * amount of batches it has prepared for execution. Sophisticated
+ * clients can use gem_reset_stats_ioctl and dma fence status
+ * (exported via sync_file info ioctl on explicit fences) to observe
+ * when it loses the context state and should rebuild accordingly.
+ *
+ * The context ban, and ultimately the client ban, mechanism are safety
+ * valves if client submission ends up resulting in nothing more than
+ * subsequent hangs.
+ */
+
+ if (rq) {
+ /*
+ * Try to restore the logical GPU state to match the
+ * continuation of the request queue. If we skip the
+ * context/PD restore, then the next request may try to execute
+ * assuming that its context is valid and loaded on the GPU and
+ * so may try to access invalid memory, prompting repeated GPU
+ * hangs.
+ *
+ * If the request was guilty, we still restore the logical
+ * state in case the next request requires it (e.g. the
+ * aliasing ppgtt), but skip over the hung batch.
+ *
+ * If the request was innocent, we try to replay the request
+ * with the restored context.
+ */
+ i915_reset_request(rq, stalled);
+
+ GEM_BUG_ON(rq->ring != engine->buffer);
+ head = rq->head;
+ } else {
+ head = engine->buffer->tail;
+ }
+ engine->buffer->head = intel_ring_wrap(engine->buffer, head);
+
+ spin_unlock_irqrestore(&tl->lock, flags);
+}
+
+static void reset_finish(struct intel_engine_cs *engine)
+{
+}
+
+static int intel_rcs_ctx_init(struct i915_request *rq)
+{
+ int ret;
+
+ ret = intel_engine_emit_ctx_wa(rq);
+ if (ret != 0)
+ return ret;
+
+ ret = i915_gem_render_state_emit(rq);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int init_render_ring(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+
+ /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
+ if (IS_GEN_RANGE(dev_priv, 4, 6))
+ I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
+
+ /* We need to disable the AsyncFlip performance optimisations in order
+ * to use MI_WAIT_FOR_EVENT within the CS. It should already be
+ * programmed to '1' on all products.
+ *
+ * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
+ */
+ if (IS_GEN_RANGE(dev_priv, 6, 7))
+ I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
+
+ /* Required for the hardware to program scanline values for waiting */
+ /* WaEnableFlushTlbInvalidationMode:snb */
+ if (IS_GEN(dev_priv, 6))
+ I915_WRITE(GFX_MODE,
+ _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
+
+ /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
+ if (IS_GEN(dev_priv, 7))
+ I915_WRITE(GFX_MODE_GEN7,
+ _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
+ _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
+
+ if (IS_GEN(dev_priv, 6)) {
+ /* From the Sandybridge PRM, volume 1 part 3, page 24:
+ * "If this bit is set, STCunit will have LRA as replacement
+ * policy. [...] This bit must be reset. LRA replacement
+ * policy is not supported."
+ */
+ I915_WRITE(CACHE_MODE_0,
+ _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
+ }
+
+ if (IS_GEN_RANGE(dev_priv, 6, 7))
+ I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
+
+ return init_ring_common(engine);
+}
+
+static void cancel_requests(struct intel_engine_cs *engine)
+{
+ struct i915_request *request;
+ unsigned long flags;
+
+ spin_lock_irqsave(&engine->timeline.lock, flags);
+
+ /* Mark all submitted requests as skipped. */
+ list_for_each_entry(request, &engine->timeline.requests, link) {
+ if (!i915_request_signaled(request))
+ dma_fence_set_error(&request->fence, -EIO);
+
+ i915_request_mark_complete(request);
+ }
+
+ /* Remaining _unready_ requests will be nop'ed when submitted */
+
+ spin_unlock_irqrestore(&engine->timeline.lock, flags);
+}
+
+static void i9xx_submit_request(struct i915_request *request)
+{
+ i915_request_submit(request);
+
+ ENGINE_WRITE(request->engine, RING_TAIL,
+ intel_ring_set_tail(request->ring, request->tail));
+}
+
+static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+ GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
+ *cs++ = MI_FLUSH;
+
+ *cs++ = MI_STORE_DWORD_INDEX;
+ *cs++ = I915_GEM_HWS_SEQNO_ADDR;
+ *cs++ = rq->fence.seqno;
+
+ *cs++ = MI_STORE_DWORD_INDEX;
+ *cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
+ *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
+ *cs++ = MI_USER_INTERRUPT;
+
+ rq->tail = intel_ring_offset(rq, cs);
+ assert_ring_tail_valid(rq->ring, rq->tail);
+
+ return cs;
+}
+
+#define GEN5_WA_STORES 8 /* must be at least 1! */
+static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+ int i;
+
+ GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+
+ *cs++ = MI_FLUSH;
+
+ *cs++ = MI_STORE_DWORD_INDEX;
+ *cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
+ *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
+ BUILD_BUG_ON(GEN5_WA_STORES < 1);
+ for (i = 0; i < GEN5_WA_STORES; i++) {
+ *cs++ = MI_STORE_DWORD_INDEX;
+ *cs++ = I915_GEM_HWS_SEQNO_ADDR;
+ *cs++ = rq->fence.seqno;
+ }
+
+ *cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_NOOP;
+
+ rq->tail = intel_ring_offset(rq, cs);
+ assert_ring_tail_valid(rq->ring, rq->tail);
+
+ return cs;
+}
+#undef GEN5_WA_STORES
+
+static void
+gen5_irq_enable(struct intel_engine_cs *engine)
+{
+ gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask);
+}
+
+static void
+gen5_irq_disable(struct intel_engine_cs *engine)
+{
+ gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask);
+}
+
+static void
+i9xx_irq_enable(struct intel_engine_cs *engine)
+{
+ engine->i915->irq_mask &= ~engine->irq_enable_mask;
+ intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
+ intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
+}
+
+static void
+i9xx_irq_disable(struct intel_engine_cs *engine)
+{
+ engine->i915->irq_mask |= engine->irq_enable_mask;
+ intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
+}
+
+static void
+i8xx_irq_enable(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+
+ dev_priv->irq_mask &= ~engine->irq_enable_mask;
+ I915_WRITE16(GEN2_IMR, dev_priv->irq_mask);
+ POSTING_READ16(RING_IMR(engine->mmio_base));
+}
+
+static void
+i8xx_irq_disable(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+
+ dev_priv->irq_mask |= engine->irq_enable_mask;
+ I915_WRITE16(GEN2_IMR, dev_priv->irq_mask);
+}
+
+static int
+bsd_ring_flush(struct i915_request *rq, u32 mode)
+{
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_FLUSH;
+ *cs++ = MI_NOOP;
+ intel_ring_advance(rq, cs);
+ return 0;
+}
+
+static void
+gen6_irq_enable(struct intel_engine_cs *engine)
+{
+ ENGINE_WRITE(engine, RING_IMR,
+ ~(engine->irq_enable_mask | engine->irq_keep_mask));
+
+ /* Flush/delay to ensure the RING_IMR is active before the GT IMR */
+ ENGINE_POSTING_READ(engine, RING_IMR);
+
+ gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask);
+}
+
+static void
+gen6_irq_disable(struct intel_engine_cs *engine)
+{
+ ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
+ gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask);
+}
+
+static void
+hsw_vebox_irq_enable(struct intel_engine_cs *engine)
+{
+ ENGINE_WRITE(engine, RING_IMR, ~engine->irq_enable_mask);
+
+ /* Flush/delay to ensure the RING_IMR is active before the GT IMR */
+ ENGINE_POSTING_READ(engine, RING_IMR);
+
+ gen6_unmask_pm_irq(engine->i915, engine->irq_enable_mask);
+}
+
+static void
+hsw_vebox_irq_disable(struct intel_engine_cs *engine)
+{
+ ENGINE_WRITE(engine, RING_IMR, ~0);
+ gen6_mask_pm_irq(engine->i915, engine->irq_enable_mask);
+}
+
+static int
+i965_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 length,
+ unsigned int dispatch_flags)
+{
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags &
+ I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965);
+ *cs++ = offset;
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
+#define I830_BATCH_LIMIT SZ_256K
+#define I830_TLB_ENTRIES (2)
+#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
+static int
+i830_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ unsigned int dispatch_flags)
+{
+ u32 *cs, cs_offset = i915_scratch_offset(rq->i915);
+
+ GEM_BUG_ON(rq->i915->gt.scratch->size < I830_WA_SIZE);
+
+ cs = intel_ring_begin(rq, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /* Evict the invalid PTE TLBs */
+ *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA;
+ *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096;
+ *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */
+ *cs++ = cs_offset;
+ *cs++ = 0xdeadbeef;
+ *cs++ = MI_NOOP;
+ intel_ring_advance(rq, cs);
+
+ if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
+ if (len > I830_BATCH_LIMIT)
+ return -ENOSPC;
+
+ cs = intel_ring_begin(rq, 6 + 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /* Blit the batch (which has now all relocs applied) to the
+ * stable batch scratch bo area (so that the CS never
+ * stumbles over its tlb invalidation bug) ...
+ */
+ *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA;
+ *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096;
+ *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096;
+ *cs++ = cs_offset;
+ *cs++ = 4096;
+ *cs++ = offset;
+
+ *cs++ = MI_FLUSH;
+ *cs++ = MI_NOOP;
+ intel_ring_advance(rq, cs);
+
+ /* ... and execute it. */
+ offset = cs_offset;
+ }
+
+ cs = intel_ring_begin(rq, 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
+ *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
+ MI_BATCH_NON_SECURE);
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int
+i915_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ unsigned int dispatch_flags)
+{
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
+ *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
+ MI_BATCH_NON_SECURE);
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+int intel_ring_pin(struct intel_ring *ring)
+{
+ struct i915_vma *vma = ring->vma;
+ enum i915_map_type map = i915_coherent_map_type(vma->vm->i915);
+ unsigned int flags;
+ void *addr;
+ int ret;
+
+ GEM_BUG_ON(ring->vaddr);
+
+ ret = i915_timeline_pin(ring->timeline);
+ if (ret)
+ return ret;
+
+ flags = PIN_GLOBAL;
+
+ /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
+ flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+
+ if (vma->obj->stolen)
+ flags |= PIN_MAPPABLE;
+ else
+ flags |= PIN_HIGH;
+
+ ret = i915_vma_pin(vma, 0, 0, flags);
+ if (unlikely(ret))
+ goto unpin_timeline;
+
+ if (i915_vma_is_map_and_fenceable(vma))
+ addr = (void __force *)i915_vma_pin_iomap(vma);
+ else
+ addr = i915_gem_object_pin_map(vma->obj, map);
+ if (IS_ERR(addr)) {
+ ret = PTR_ERR(addr);
+ goto unpin_ring;
+ }
+
+ vma->obj->pin_global++;
+
+ ring->vaddr = addr;
+ return 0;
+
+unpin_ring:
+ i915_vma_unpin(vma);
+unpin_timeline:
+ i915_timeline_unpin(ring->timeline);
+ return ret;
+}
+
+void intel_ring_reset(struct intel_ring *ring, u32 tail)
+{
+ GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
+
+ ring->tail = tail;
+ ring->head = tail;
+ ring->emit = tail;
+ intel_ring_update_space(ring);
+}
+
+void intel_ring_unpin(struct intel_ring *ring)
+{
+ GEM_BUG_ON(!ring->vma);
+ GEM_BUG_ON(!ring->vaddr);
+
+ /* Discard any unused bytes beyond that submitted to hw. */
+ intel_ring_reset(ring, ring->tail);
+
+ if (i915_vma_is_map_and_fenceable(ring->vma))
+ i915_vma_unpin_iomap(ring->vma);
+ else
+ i915_gem_object_unpin_map(ring->vma->obj);
+ ring->vaddr = NULL;
+
+ ring->vma->obj->pin_global--;
+ i915_vma_unpin(ring->vma);
+
+ i915_timeline_unpin(ring->timeline);
+}
+
+static struct i915_vma *
+intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
+{
+ struct i915_address_space *vm = &dev_priv->ggtt.vm;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+
+ obj = i915_gem_object_create_stolen(dev_priv, size);
+ if (!obj)
+ obj = i915_gem_object_create_internal(dev_priv, size);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ /*
+ * Mark ring buffers as read-only from GPU side (so no stray overwrites)
+ * if supported by the platform's GGTT.
+ */
+ if (vm->has_read_only)
+ i915_gem_object_set_readonly(obj);
+
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma))
+ goto err;
+
+ return vma;
+
+err:
+ i915_gem_object_put(obj);
+ return vma;
+}
+
+struct intel_ring *
+intel_engine_create_ring(struct intel_engine_cs *engine,
+ struct i915_timeline *timeline,
+ int size)
+{
+ struct intel_ring *ring;
+ struct i915_vma *vma;
+
+ GEM_BUG_ON(!is_power_of_2(size));
+ GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
+ GEM_BUG_ON(timeline == &engine->timeline);
+ lockdep_assert_held(&engine->i915->drm.struct_mutex);
+
+ ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+ if (!ring)
+ return ERR_PTR(-ENOMEM);
+
+ kref_init(&ring->ref);
+ INIT_LIST_HEAD(&ring->request_list);
+ ring->timeline = i915_timeline_get(timeline);
+
+ ring->size = size;
+ /* Workaround an erratum on the i830 which causes a hang if
+ * the TAIL pointer points to within the last 2 cachelines
+ * of the buffer.
+ */
+ ring->effective_size = size;
+ if (IS_I830(engine->i915) || IS_I845G(engine->i915))
+ ring->effective_size -= 2 * CACHELINE_BYTES;
+
+ intel_ring_update_space(ring);
+
+ vma = intel_ring_create_vma(engine->i915, size);
+ if (IS_ERR(vma)) {
+ kfree(ring);
+ return ERR_CAST(vma);
+ }
+ ring->vma = vma;
+
+ return ring;
+}
+
+void intel_ring_free(struct kref *ref)
+{
+ struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
+ struct drm_i915_gem_object *obj = ring->vma->obj;
+
+ i915_vma_close(ring->vma);
+ __i915_gem_object_release_unless_active(obj);
+
+ i915_timeline_put(ring->timeline);
+ kfree(ring);
+}
+
+static void __ring_context_fini(struct intel_context *ce)
+{
+ GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
+ i915_gem_object_put(ce->state->obj);
+}
+
+static void ring_context_destroy(struct kref *ref)
+{
+ struct intel_context *ce = container_of(ref, typeof(*ce), ref);
+
+ GEM_BUG_ON(intel_context_is_pinned(ce));
+
+ if (ce->state)
+ __ring_context_fini(ce);
+
+ intel_context_free(ce);
+}
+
+static int __context_pin_ppgtt(struct i915_gem_context *ctx)
+{
+ struct i915_hw_ppgtt *ppgtt;
+ int err = 0;
+
+ ppgtt = ctx->ppgtt ?: ctx->i915->mm.aliasing_ppgtt;
+ if (ppgtt)
+ err = gen6_ppgtt_pin(ppgtt);
+
+ return err;
+}
+
+static void __context_unpin_ppgtt(struct i915_gem_context *ctx)
+{
+ struct i915_hw_ppgtt *ppgtt;
+
+ ppgtt = ctx->ppgtt ?: ctx->i915->mm.aliasing_ppgtt;
+ if (ppgtt)
+ gen6_ppgtt_unpin(ppgtt);
+}
+
+static int __context_pin(struct intel_context *ce)
+{
+ struct i915_vma *vma;
+ int err;
+
+ vma = ce->state;
+ if (!vma)
+ return 0;
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ if (err)
+ return err;
+
+ /*
+ * And mark is as a globally pinned object to let the shrinker know
+ * it cannot reclaim the object until we release it.
+ */
+ vma->obj->pin_global++;
+ vma->obj->mm.dirty = true;
+
+ return 0;
+}
+
+static void __context_unpin(struct intel_context *ce)
+{
+ struct i915_vma *vma;
+
+ vma = ce->state;
+ if (!vma)
+ return;
+
+ vma->obj->pin_global--;
+ i915_vma_unpin(vma);
+}
+
+static void ring_context_unpin(struct intel_context *ce)
+{
+ __context_unpin_ppgtt(ce->gem_context);
+ __context_unpin(ce);
+}
+
+static struct i915_vma *
+alloc_context_vma(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *i915 = engine->i915;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int err;
+
+ obj = i915_gem_object_create(i915, engine->context_size);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ /*
+ * Try to make the context utilize L3 as well as LLC.
+ *
+ * On VLV we don't have L3 controls in the PTEs so we
+ * shouldn't touch the cache level, especially as that
+ * would make the object snooped which might have a
+ * negative performance impact.
+ *
+ * Snooping is required on non-llc platforms in execlist
+ * mode, but since all GGTT accesses use PAT entry 0 we
+ * get snooping anyway regardless of cache_level.
+ *
+ * This is only applicable for Ivy Bridge devices since
+ * later platforms don't have L3 control bits in the PTE.
+ */
+ if (IS_IVYBRIDGE(i915))
+ i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC);
+
+ if (engine->default_state) {
+ void *defaults, *vaddr;
+
+ vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+ if (IS_ERR(vaddr)) {
+ err = PTR_ERR(vaddr);
+ goto err_obj;
+ }
+
+ defaults = i915_gem_object_pin_map(engine->default_state,
+ I915_MAP_WB);
+ if (IS_ERR(defaults)) {
+ err = PTR_ERR(defaults);
+ goto err_map;
+ }
+
+ memcpy(vaddr, defaults, engine->context_size);
+ i915_gem_object_unpin_map(engine->default_state);
+
+ i915_gem_object_flush_map(obj);
+ i915_gem_object_unpin_map(obj);
+ }
+
+ vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_obj;
+ }
+
+ return vma;
+
+err_map:
+ i915_gem_object_unpin_map(obj);
+err_obj:
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+}
+
+static int ring_context_pin(struct intel_context *ce)
+{
+ struct intel_engine_cs *engine = ce->engine;
+ int err;
+
+ /* One ringbuffer to rule them all */
+ GEM_BUG_ON(!engine->buffer);
+ ce->ring = engine->buffer;
+
+ if (!ce->state && engine->context_size) {
+ struct i915_vma *vma;
+
+ vma = alloc_context_vma(engine);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+
+ ce->state = vma;
+ }
+
+ err = __context_pin(ce);
+ if (err)
+ return err;
+
+ err = __context_pin_ppgtt(ce->gem_context);
+ if (err)
+ goto err_unpin;
+
+ return 0;
+
+err_unpin:
+ __context_unpin(ce);
+ return err;
+}
+
+static void ring_context_reset(struct intel_context *ce)
+{
+ intel_ring_reset(ce->ring, 0);
+}
+
+static const struct intel_context_ops ring_context_ops = {
+ .pin = ring_context_pin,
+ .unpin = ring_context_unpin,
+
+ .reset = ring_context_reset,
+ .destroy = ring_context_destroy,
+};
+
+static int intel_init_ring_buffer(struct intel_engine_cs *engine)
+{
+ struct i915_timeline *timeline;
+ struct intel_ring *ring;
+ int err;
+
+ err = intel_engine_setup_common(engine);
+ if (err)
+ return err;
+
+ timeline = i915_timeline_create(engine->i915, engine->status_page.vma);
+ if (IS_ERR(timeline)) {
+ err = PTR_ERR(timeline);
+ goto err;
+ }
+ GEM_BUG_ON(timeline->has_initial_breadcrumb);
+
+ ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE);
+ i915_timeline_put(timeline);
+ if (IS_ERR(ring)) {
+ err = PTR_ERR(ring);
+ goto err;
+ }
+
+ err = intel_ring_pin(ring);
+ if (err)
+ goto err_ring;
+
+ GEM_BUG_ON(engine->buffer);
+ engine->buffer = ring;
+
+ err = intel_engine_init_common(engine);
+ if (err)
+ goto err_unpin;
+
+ GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma);
+
+ return 0;
+
+err_unpin:
+ intel_ring_unpin(ring);
+err_ring:
+ intel_ring_put(ring);
+err:
+ intel_engine_cleanup_common(engine);
+ return err;
+}
+
+void intel_engine_cleanup(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+
+ WARN_ON(INTEL_GEN(dev_priv) > 2 &&
+ (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
+
+ intel_ring_unpin(engine->buffer);
+ intel_ring_put(engine->buffer);
+
+ if (engine->cleanup)
+ engine->cleanup(engine);
+
+ intel_engine_cleanup_common(engine);
+
+ dev_priv->engine[engine->id] = NULL;
+ kfree(engine);
+}
+
+static int load_pd_dir(struct i915_request *rq,
+ const struct i915_hw_ppgtt *ppgtt)
+{
+ const struct intel_engine_cs * const engine = rq->engine;
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
+ *cs++ = PP_DIR_DCLV_2G;
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
+ *cs++ = ppgtt->pd.base.ggtt_offset << 10;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int flush_pd_dir(struct i915_request *rq)
+{
+ const struct intel_engine_cs * const engine = rq->engine;
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /* Stall until the page table load is complete */
+ *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+ *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
+ *cs++ = i915_scratch_offset(rq->i915);
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+ return 0;
+}
+
+static inline int mi_set_context(struct i915_request *rq, u32 flags)
+{
+ struct drm_i915_private *i915 = rq->i915;
+ struct intel_engine_cs *engine = rq->engine;
+ enum intel_engine_id id;
+ const int num_engines =
+ IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
+ bool force_restore = false;
+ int len;
+ u32 *cs;
+
+ flags |= MI_MM_SPACE_GTT;
+ if (IS_HASWELL(i915))
+ /* These flags are for resource streamer on HSW+ */
+ flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN;
+ else
+ flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;
+
+ len = 4;
+ if (IS_GEN(i915, 7))
+ len += 2 + (num_engines ? 4 * num_engines + 6 : 0);
+ if (flags & MI_FORCE_RESTORE) {
+ GEM_BUG_ON(flags & MI_RESTORE_INHIBIT);
+ flags &= ~MI_FORCE_RESTORE;
+ force_restore = true;
+ len += 2;
+ }
+
+ cs = intel_ring_begin(rq, len);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
+ if (IS_GEN(i915, 7)) {
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+ if (num_engines) {
+ struct intel_engine_cs *signaller;
+
+ *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
+ for_each_engine(signaller, i915, id) {
+ if (signaller == engine)
+ continue;
+
+ *cs++ = i915_mmio_reg_offset(
+ RING_PSMI_CTL(signaller->mmio_base));
+ *cs++ = _MASKED_BIT_ENABLE(
+ GEN6_PSMI_SLEEP_MSG_DISABLE);
+ }
+ }
+ }
+
+ if (force_restore) {
+ /*
+ * The HW doesn't handle being told to restore the current
+ * context very well. Quite often it likes goes to go off and
+ * sulk, especially when it is meant to be reloading PP_DIR.
+ * A very simple fix to force the reload is to simply switch
+ * away from the current context and back again.
+ *
+ * Note that the kernel_context will contain random state
+ * following the INHIBIT_RESTORE. We accept this since we
+ * never use the kernel_context state; it is merely a
+ * placeholder we use to flush other contexts.
+ */
+ *cs++ = MI_SET_CONTEXT;
+ *cs++ = i915_ggtt_offset(engine->kernel_context->state) |
+ MI_MM_SPACE_GTT |
+ MI_RESTORE_INHIBIT;
+ }
+
+ *cs++ = MI_NOOP;
+ *cs++ = MI_SET_CONTEXT;
+ *cs++ = i915_ggtt_offset(rq->hw_context->state) | flags;
+ /*
+ * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
+ * WaMiSetContext_Hang:snb,ivb,vlv
+ */
+ *cs++ = MI_NOOP;
+
+ if (IS_GEN(i915, 7)) {
+ if (num_engines) {
+ struct intel_engine_cs *signaller;
+ i915_reg_t last_reg = {}; /* keep gcc quiet */
+
+ *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
+ for_each_engine(signaller, i915, id) {
+ if (signaller == engine)
+ continue;
+
+ last_reg = RING_PSMI_CTL(signaller->mmio_base);
+ *cs++ = i915_mmio_reg_offset(last_reg);
+ *cs++ = _MASKED_BIT_DISABLE(
+ GEN6_PSMI_SLEEP_MSG_DISABLE);
+ }
+
+ /* Insert a delay before the next switch! */
+ *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+ *cs++ = i915_mmio_reg_offset(last_reg);
+ *cs++ = i915_scratch_offset(rq->i915);
+ *cs++ = MI_NOOP;
+ }
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ }
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int remap_l3(struct i915_request *rq, int slice)
+{
+ u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice];
+ int i;
+
+ if (!remap_info)
+ return 0;
+
+ cs = intel_ring_begin(rq, GEN7_L3LOG_SIZE/4 * 2 + 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /*
+ * Note: We do not worry about the concurrent register cacheline hang
+ * here because no other code should access these registers other than
+ * at initialization time.
+ */
+ *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
+ for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
+ *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
+ *cs++ = remap_info[i];
+ }
+ *cs++ = MI_NOOP;
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int switch_context(struct i915_request *rq)
+{
+ struct intel_engine_cs *engine = rq->engine;
+ struct i915_gem_context *ctx = rq->gem_context;
+ struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
+ unsigned int unwind_mm = 0;
+ u32 hw_flags = 0;
+ int ret, i;
+
+ lockdep_assert_held(&rq->i915->drm.struct_mutex);
+ GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+
+ if (ppgtt) {
+ int loops;
+
+ /*
+ * Baytail takes a little more convincing that it really needs
+ * to reload the PD between contexts. It is not just a little
+ * longer, as adding more stalls after the load_pd_dir (i.e.
+ * adding a long loop around flush_pd_dir) is not as effective
+ * as reloading the PD umpteen times. 32 is derived from
+ * experimentation (gem_exec_parallel/fds) and has no good
+ * explanation.
+ */
+ loops = 1;
+ if (engine->id == BCS0 && IS_VALLEYVIEW(engine->i915))
+ loops = 32;
+
+ do {
+ ret = load_pd_dir(rq, ppgtt);
+ if (ret)
+ goto err;
+ } while (--loops);
+
+ if (ppgtt->pd_dirty_engines & engine->mask) {
+ unwind_mm = engine->mask;
+ ppgtt->pd_dirty_engines &= ~unwind_mm;
+ hw_flags = MI_FORCE_RESTORE;
+ }
+ }
+
+ if (rq->hw_context->state) {
+ GEM_BUG_ON(engine->id != RCS0);
+
+ /*
+ * The kernel context(s) is treated as pure scratch and is not
+ * expected to retain any state (as we sacrifice it during
+ * suspend and on resume it may be corrupted). This is ok,
+ * as nothing actually executes using the kernel context; it
+ * is purely used for flushing user contexts.
+ */
+ if (i915_gem_context_is_kernel(ctx))
+ hw_flags = MI_RESTORE_INHIBIT;
+
+ ret = mi_set_context(rq, hw_flags);
+ if (ret)
+ goto err_mm;
+ }
+
+ if (ppgtt) {
+ ret = engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (ret)
+ goto err_mm;
+
+ ret = flush_pd_dir(rq);
+ if (ret)
+ goto err_mm;
+
+ /*
+ * Not only do we need a full barrier (post-sync write) after
+ * invalidating the TLBs, but we need to wait a little bit
+ * longer. Whether this is merely delaying us, or the
+ * subsequent flush is a key part of serialising with the
+ * post-sync op, this extra pass appears vital before a
+ * mm switch!
+ */
+ ret = engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (ret)
+ goto err_mm;
+
+ ret = engine->emit_flush(rq, EMIT_FLUSH);
+ if (ret)
+ goto err_mm;
+ }
+
+ if (ctx->remap_slice) {
+ for (i = 0; i < MAX_L3_SLICES; i++) {
+ if (!(ctx->remap_slice & BIT(i)))
+ continue;
+
+ ret = remap_l3(rq, i);
+ if (ret)
+ goto err_mm;
+ }
+
+ ctx->remap_slice = 0;
+ }
+
+ return 0;
+
+err_mm:
+ if (unwind_mm)
+ ppgtt->pd_dirty_engines |= unwind_mm;
+err:
+ return ret;
+}
+
+static int ring_request_alloc(struct i915_request *request)
+{
+ int ret;
+
+ GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
+ GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
+
+ /*
+ * Flush enough space to reduce the likelihood of waiting after
+ * we start building the request - in which case we will just
+ * have to repeat work.
+ */
+ request->reserved_space += LEGACY_REQUEST_SIZE;
+
+ ret = switch_context(request);
+ if (ret)
+ return ret;
+
+ /* Unconditionally invalidate GPU caches and TLBs. */
+ ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
+ if (ret)
+ return ret;
+
+ request->reserved_space -= LEGACY_REQUEST_SIZE;
+ return 0;
+}
+
+static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
+{
+ struct i915_request *target;
+ long timeout;
+
+ lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex);
+
+ if (intel_ring_update_space(ring) >= bytes)
+ return 0;
+
+ GEM_BUG_ON(list_empty(&ring->request_list));
+ list_for_each_entry(target, &ring->request_list, ring_link) {
+ /* Would completion of this request free enough space? */
+ if (bytes <= __intel_ring_space(target->postfix,
+ ring->emit, ring->size))
+ break;
+ }
+
+ if (WARN_ON(&target->ring_link == &ring->request_list))
+ return -ENOSPC;
+
+ timeout = i915_request_wait(target,
+ I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+ MAX_SCHEDULE_TIMEOUT);
+ if (timeout < 0)
+ return timeout;
+
+ i915_request_retire_upto(target);
+
+ intel_ring_update_space(ring);
+ GEM_BUG_ON(ring->space < bytes);
+ return 0;
+}
+
+u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
+{
+ struct intel_ring *ring = rq->ring;
+ const unsigned int remain_usable = ring->effective_size - ring->emit;
+ const unsigned int bytes = num_dwords * sizeof(u32);
+ unsigned int need_wrap = 0;
+ unsigned int total_bytes;
+ u32 *cs;
+
+ /* Packets must be qword aligned. */
+ GEM_BUG_ON(num_dwords & 1);
+
+ total_bytes = bytes + rq->reserved_space;
+ GEM_BUG_ON(total_bytes > ring->effective_size);
+
+ if (unlikely(total_bytes > remain_usable)) {
+ const int remain_actual = ring->size - ring->emit;
+
+ if (bytes > remain_usable) {
+ /*
+ * Not enough space for the basic request. So need to
+ * flush out the remainder and then wait for
+ * base + reserved.
+ */
+ total_bytes += remain_actual;
+ need_wrap = remain_actual | 1;
+ } else {
+ /*
+ * The base request will fit but the reserved space
+ * falls off the end. So we don't need an immediate
+ * wrap and only need to effectively wait for the
+ * reserved size from the start of ringbuffer.
+ */
+ total_bytes = rq->reserved_space + remain_actual;
+ }
+ }
+
+ if (unlikely(total_bytes > ring->space)) {
+ int ret;
+
+ /*
+ * Space is reserved in the ringbuffer for finalising the
+ * request, as that cannot be allowed to fail. During request
+ * finalisation, reserved_space is set to 0 to stop the
+ * overallocation and the assumption is that then we never need
+ * to wait (which has the risk of failing with EINTR).
+ *
+ * See also i915_request_alloc() and i915_request_add().
+ */
+ GEM_BUG_ON(!rq->reserved_space);
+
+ ret = wait_for_space(ring, total_bytes);
+ if (unlikely(ret))
+ return ERR_PTR(ret);
+ }
+
+ if (unlikely(need_wrap)) {
+ need_wrap &= ~1;
+ GEM_BUG_ON(need_wrap > ring->space);
+ GEM_BUG_ON(ring->emit + need_wrap > ring->size);
+ GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
+
+ /* Fill the tail with MI_NOOP */
+ memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
+ ring->space -= need_wrap;
+ ring->emit = 0;
+ }
+
+ GEM_BUG_ON(ring->emit > ring->size - bytes);
+ GEM_BUG_ON(ring->space < bytes);
+ cs = ring->vaddr + ring->emit;
+ GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
+ ring->emit += bytes;
+ ring->space -= bytes;
+
+ return cs;
+}
+
+/* Align the ring tail to a cacheline boundary */
+int intel_ring_cacheline_align(struct i915_request *rq)
+{
+ int num_dwords;
+ void *cs;
+
+ num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
+ if (num_dwords == 0)
+ return 0;
+
+ num_dwords = CACHELINE_DWORDS - num_dwords;
+ GEM_BUG_ON(num_dwords & 1);
+
+ cs = intel_ring_begin(rq, num_dwords);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
+ intel_ring_advance(rq, cs);
+
+ GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
+ return 0;
+}
+
+static void gen6_bsd_submit_request(struct i915_request *request)
+{
+ struct intel_uncore *uncore = request->engine->uncore;
+
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+ /* Every tail move must follow the sequence below */
+
+ /* Disable notification that the ring is IDLE. The GT
+ * will then assume that it is busy and bring it out of rc6.
+ */
+ intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL,
+ _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
+
+ /* Clear the context id. Here be magic! */
+ intel_uncore_write64_fw(uncore, GEN6_BSD_RNCID, 0x0);
+
+ /* Wait for the ring not to be idle, i.e. for it to wake up. */
+ if (__intel_wait_for_register_fw(uncore,
+ GEN6_BSD_SLEEP_PSMI_CONTROL,
+ GEN6_BSD_SLEEP_INDICATOR,
+ 0,
+ 1000, 0, NULL))
+ DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
+
+ /* Now that the ring is fully powered up, update the tail */
+ i9xx_submit_request(request);
+
+ /* Let the ring send IDLE messages to the GT again,
+ * and so let it sleep to conserve power when idle.
+ */
+ intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL,
+ _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
+
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+}
+
+static int mi_flush_dw(struct i915_request *rq, u32 flags)
+{
+ u32 cmd, *cs;
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cmd = MI_FLUSH_DW;
+
+ /*
+ * We always require a command barrier so that subsequent
+ * commands, such as breadcrumb interrupts, are strictly ordered
+ * wrt the contents of the write cache being flushed to memory
+ * (and thus being coherent from the CPU).
+ */
+ cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
+ /*
+ * Bspec vol 1c.3 - blitter engine command streamer:
+ * "If ENABLED, all TLBs will be invalidated once the flush
+ * operation is complete. This bit is only valid when the
+ * Post-Sync Operation field is a value of 1h or 3h."
+ */
+ cmd |= flags;
+
+ *cs++ = cmd;
+ *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
+ *cs++ = 0;
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int gen6_flush_dw(struct i915_request *rq, u32 mode, u32 invflags)
+{
+ return mi_flush_dw(rq, mode & EMIT_INVALIDATE ? invflags : 0);
+}
+
+static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode)
+{
+ return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB | MI_INVALIDATE_BSD);
+}
+
+static int
+hsw_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ unsigned int dispatch_flags)
+{
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
+ 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW);
+ /* bit0-7 is the length on GEN6+ */
+ *cs++ = offset;
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int
+gen6_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ unsigned int dispatch_flags)
+{
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
+ 0 : MI_BATCH_NON_SECURE_I965);
+ /* bit0-7 is the length on GEN6+ */
+ *cs++ = offset;
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+/* Blitter support (SandyBridge+) */
+
+static int gen6_ring_flush(struct i915_request *rq, u32 mode)
+{
+ return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
+}
+
+static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
+ struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(dev_priv) >= 6) {
+ engine->irq_enable = gen6_irq_enable;
+ engine->irq_disable = gen6_irq_disable;
+ } else if (INTEL_GEN(dev_priv) >= 5) {
+ engine->irq_enable = gen5_irq_enable;
+ engine->irq_disable = gen5_irq_disable;
+ } else if (INTEL_GEN(dev_priv) >= 3) {
+ engine->irq_enable = i9xx_irq_enable;
+ engine->irq_disable = i9xx_irq_disable;
+ } else {
+ engine->irq_enable = i8xx_irq_enable;
+ engine->irq_disable = i8xx_irq_disable;
+ }
+}
+
+static void i9xx_set_default_submission(struct intel_engine_cs *engine)
+{
+ engine->submit_request = i9xx_submit_request;
+ engine->cancel_requests = cancel_requests;
+
+ engine->park = NULL;
+ engine->unpark = NULL;
+}
+
+static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
+{
+ i9xx_set_default_submission(engine);
+ engine->submit_request = gen6_bsd_submit_request;
+}
+
+static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
+ struct intel_engine_cs *engine)
+{
+ /* gen8+ are only supported with execlists */
+ GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8);
+
+ intel_ring_init_irq(dev_priv, engine);
+
+ engine->init_hw = init_ring_common;
+ engine->reset.prepare = reset_prepare;
+ engine->reset.reset = reset_ring;
+ engine->reset.finish = reset_finish;
+
+ engine->cops = &ring_context_ops;
+ engine->request_alloc = ring_request_alloc;
+
+ /*
+ * Using a global execution timeline; the previous final breadcrumb is
+ * equivalent to our next initial bread so we can elide
+ * engine->emit_init_breadcrumb().
+ */
+ engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb;
+ if (IS_GEN(dev_priv, 5))
+ engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
+
+ engine->set_default_submission = i9xx_set_default_submission;
+
+ if (INTEL_GEN(dev_priv) >= 6)
+ engine->emit_bb_start = gen6_emit_bb_start;
+ else if (INTEL_GEN(dev_priv) >= 4)
+ engine->emit_bb_start = i965_emit_bb_start;
+ else if (IS_I830(dev_priv) || IS_I845G(dev_priv))
+ engine->emit_bb_start = i830_emit_bb_start;
+ else
+ engine->emit_bb_start = i915_emit_bb_start;
+}
+
+int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+ int ret;
+
+ intel_ring_default_vfuncs(dev_priv, engine);
+
+ if (HAS_L3_DPF(dev_priv))
+ engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
+
+ engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
+
+ if (INTEL_GEN(dev_priv) >= 7) {
+ engine->init_context = intel_rcs_ctx_init;
+ engine->emit_flush = gen7_render_ring_flush;
+ engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb;
+ } else if (IS_GEN(dev_priv, 6)) {
+ engine->init_context = intel_rcs_ctx_init;
+ engine->emit_flush = gen6_render_ring_flush;
+ engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb;
+ } else if (IS_GEN(dev_priv, 5)) {
+ engine->emit_flush = gen4_render_ring_flush;
+ } else {
+ if (INTEL_GEN(dev_priv) < 4)
+ engine->emit_flush = gen2_render_ring_flush;
+ else
+ engine->emit_flush = gen4_render_ring_flush;
+ engine->irq_enable_mask = I915_USER_INTERRUPT;
+ }
+
+ if (IS_HASWELL(dev_priv))
+ engine->emit_bb_start = hsw_emit_bb_start;
+
+ engine->init_hw = init_render_ring;
+
+ ret = intel_init_ring_buffer(engine);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+
+ intel_ring_default_vfuncs(dev_priv, engine);
+
+ if (INTEL_GEN(dev_priv) >= 6) {
+ /* gen6 bsd needs a special wa for tail updates */
+ if (IS_GEN(dev_priv, 6))
+ engine->set_default_submission = gen6_bsd_set_default_submission;
+ engine->emit_flush = gen6_bsd_ring_flush;
+ engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
+
+ if (IS_GEN(dev_priv, 6))
+ engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
+ else
+ engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
+ } else {
+ engine->emit_flush = bsd_ring_flush;
+ if (IS_GEN(dev_priv, 5))
+ engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
+ else
+ engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
+ }
+
+ return intel_init_ring_buffer(engine);
+}
+
+int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+
+ GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
+
+ intel_ring_default_vfuncs(dev_priv, engine);
+
+ engine->emit_flush = gen6_ring_flush;
+ engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
+
+ if (IS_GEN(dev_priv, 6))
+ engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
+ else
+ engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
+
+ return intel_init_ring_buffer(engine);
+}
+
+int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+
+ GEM_BUG_ON(INTEL_GEN(dev_priv) < 7);
+
+ intel_ring_default_vfuncs(dev_priv, engine);
+
+ engine->emit_flush = gen6_ring_flush;
+ engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
+ engine->irq_enable = hsw_vebox_irq_enable;
+ engine->irq_disable = hsw_vebox_irq_disable;
+
+ engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
+
+ return intel_init_ring_buffer(engine);
+}
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_lrc_reg.h"
+#include "intel_sseu.h"
+
+u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
+ const struct intel_sseu *req_sseu)
+{
+ const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
+ bool subslice_pg = sseu->has_subslice_pg;
+ struct intel_sseu ctx_sseu;
+ u8 slices, subslices;
+ u32 rpcs = 0;
+
+ /*
+ * No explicit RPCS request is needed to ensure full
+ * slice/subslice/EU enablement prior to Gen9.
+ */
+ if (INTEL_GEN(i915) < 9)
+ return 0;
+
+ /*
+ * If i915/perf is active, we want a stable powergating configuration
+ * on the system.
+ *
+ * We could choose full enablement, but on ICL we know there are use
+ * cases which disable slices for functional, apart for performance
+ * reasons. So in this case we select a known stable subset.
+ */
+ if (!i915->perf.oa.exclusive_stream) {
+ ctx_sseu = *req_sseu;
+ } else {
+ ctx_sseu = intel_sseu_from_device_info(sseu);
+
+ if (IS_GEN(i915, 11)) {
+ /*
+ * We only need subslice count so it doesn't matter
+ * which ones we select - just turn off low bits in the
+ * amount of half of all available subslices per slice.
+ */
+ ctx_sseu.subslice_mask =
+ ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
+ ctx_sseu.slice_mask = 0x1;
+ }
+ }
+
+ slices = hweight8(ctx_sseu.slice_mask);
+ subslices = hweight8(ctx_sseu.subslice_mask);
+
+ /*
+ * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
+ * wide and Icelake has up to eight subslices, specfial programming is
+ * needed in order to correctly enable all subslices.
+ *
+ * According to documentation software must consider the configuration
+ * as 2x4x8 and hardware will translate this to 1x8x8.
+ *
+ * Furthemore, even though SScount is three bits, maximum documented
+ * value for it is four. From this some rules/restrictions follow:
+ *
+ * 1.
+ * If enabled subslice count is greater than four, two whole slices must
+ * be enabled instead.
+ *
+ * 2.
+ * When more than one slice is enabled, hardware ignores the subslice
+ * count altogether.
+ *
+ * From these restrictions it follows that it is not possible to enable
+ * a count of subslices between the SScount maximum of four restriction,
+ * and the maximum available number on a particular SKU. Either all
+ * subslices are enabled, or a count between one and four on the first
+ * slice.
+ */
+ if (IS_GEN(i915, 11) &&
+ slices == 1 &&
+ subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
+ GEM_BUG_ON(subslices & 1);
+
+ subslice_pg = false;
+ slices *= 2;
+ }
+
+ /*
+ * Starting in Gen9, render power gating can leave
+ * slice/subslice/EU in a partially enabled state. We
+ * must make an explicit request through RPCS for full
+ * enablement.
+ */
+ if (sseu->has_slice_pg) {
+ u32 mask, val = slices;
+
+ if (INTEL_GEN(i915) >= 11) {
+ mask = GEN11_RPCS_S_CNT_MASK;
+ val <<= GEN11_RPCS_S_CNT_SHIFT;
+ } else {
+ mask = GEN8_RPCS_S_CNT_MASK;
+ val <<= GEN8_RPCS_S_CNT_SHIFT;
+ }
+
+ GEM_BUG_ON(val & ~mask);
+ val &= mask;
+
+ rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val;
+ }
+
+ if (subslice_pg) {
+ u32 val = subslices;
+
+ val <<= GEN8_RPCS_SS_CNT_SHIFT;
+
+ GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK);
+ val &= GEN8_RPCS_SS_CNT_MASK;
+
+ rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val;
+ }
+
+ if (sseu->has_eu_pg) {
+ u32 val;
+
+ val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
+ GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
+ val &= GEN8_RPCS_EU_MIN_MASK;
+
+ rpcs |= val;
+
+ val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
+ GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
+ val &= GEN8_RPCS_EU_MAX_MASK;
+
+ rpcs |= val;
+
+ rpcs |= GEN8_RPCS_ENABLE;
+ }
+
+ return rpcs;
+}
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_SSEU_H__
+#define __INTEL_SSEU_H__
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+
+#define GEN_MAX_SLICES (6) /* CNL upper bound */
+#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */
+
+struct sseu_dev_info {
+ u8 slice_mask;
+ u8 subslice_mask[GEN_MAX_SLICES];
+ u16 eu_total;
+ u8 eu_per_subslice;
+ u8 min_eu_in_pool;
+ /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
+ u8 subslice_7eu[3];
+ u8 has_slice_pg:1;
+ u8 has_subslice_pg:1;
+ u8 has_eu_pg:1;
+
+ /* Topology fields */
+ u8 max_slices;
+ u8 max_subslices;
+ u8 max_eus_per_subslice;
+
+ /* We don't have more than 8 eus per subslice at the moment and as we
+ * store eus enabled using bits, no need to multiply by eus per
+ * subslice.
+ */
+ u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
+};
+
+/*
+ * Powergating configuration for a particular (context,engine).
+ */
+struct intel_sseu {
+ u8 slice_mask;
+ u8 subslice_mask;
+ u8 min_eus_per_subslice;
+ u8 max_eus_per_subslice;
+};
+
+static inline struct intel_sseu
+intel_sseu_from_device_info(const struct sseu_dev_info *sseu)
+{
+ struct intel_sseu value = {
+ .slice_mask = sseu->slice_mask,
+ .subslice_mask = sseu->subslice_mask[0],
+ .min_eus_per_subslice = sseu->max_eus_per_subslice,
+ .max_eus_per_subslice = sseu->max_eus_per_subslice,
+ };
+
+ return value;
+}
+
+u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
+ const struct intel_sseu *req_sseu);
+
+#endif /* __INTEL_SSEU_H__ */
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2018 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_workarounds.h"
+
+/**
+ * DOC: Hardware workarounds
+ *
+ * This file is intended as a central place to implement most [1]_ of the
+ * required workarounds for hardware to work as originally intended. They fall
+ * in five basic categories depending on how/when they are applied:
+ *
+ * - Workarounds that touch registers that are saved/restored to/from the HW
+ * context image. The list is emitted (via Load Register Immediate commands)
+ * everytime a new context is created.
+ * - GT workarounds. The list of these WAs is applied whenever these registers
+ * revert to default values (on GPU reset, suspend/resume [2]_, etc..).
+ * - Display workarounds. The list is applied during display clock-gating
+ * initialization.
+ * - Workarounds that whitelist a privileged register, so that UMDs can manage
+ * them directly. This is just a special case of a MMMIO workaround (as we
+ * write the list of these to/be-whitelisted registers to some special HW
+ * registers).
+ * - Workaround batchbuffers, that get executed automatically by the hardware
+ * on every HW context restore.
+ *
+ * .. [1] Please notice that there are other WAs that, due to their nature,
+ * cannot be applied from a central place. Those are peppered around the rest
+ * of the code, as needed.
+ *
+ * .. [2] Technically, some registers are powercontext saved & restored, so they
+ * survive a suspend/resume. In practice, writing them again is not too
+ * costly and simplifies things. We can revisit this in the future.
+ *
+ * Layout
+ * ''''''
+ *
+ * Keep things in this file ordered by WA type, as per the above (context, GT,
+ * display, register whitelist, batchbuffer). Then, inside each type, keep the
+ * following order:
+ *
+ * - Infrastructure functions and macros
+ * - WAs per platform in standard gen/chrono order
+ * - Public functions to init or apply the given workaround type.
+ */
+
+static void wa_init_start(struct i915_wa_list *wal, const char *name)
+{
+ wal->name = name;
+}
+
+#define WA_LIST_CHUNK (1 << 4)
+
+static void wa_init_finish(struct i915_wa_list *wal)
+{
+ /* Trim unused entries. */
+ if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
+ struct i915_wa *list = kmemdup(wal->list,
+ wal->count * sizeof(*list),
+ GFP_KERNEL);
+
+ if (list) {
+ kfree(wal->list);
+ wal->list = list;
+ }
+ }
+
+ if (!wal->count)
+ return;
+
+ DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n",
+ wal->wa_count, wal->name);
+}
+
+static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
+{
+ unsigned int addr = i915_mmio_reg_offset(wa->reg);
+ unsigned int start = 0, end = wal->count;
+ const unsigned int grow = WA_LIST_CHUNK;
+ struct i915_wa *wa_;
+
+ GEM_BUG_ON(!is_power_of_2(grow));
+
+ if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
+ struct i915_wa *list;
+
+ list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
+ GFP_KERNEL);
+ if (!list) {
+ DRM_ERROR("No space for workaround init!\n");
+ return;
+ }
+
+ if (wal->list)
+ memcpy(list, wal->list, sizeof(*wa) * wal->count);
+
+ wal->list = list;
+ }
+
+ while (start < end) {
+ unsigned int mid = start + (end - start) / 2;
+
+ if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
+ start = mid + 1;
+ } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
+ end = mid;
+ } else {
+ wa_ = &wal->list[mid];
+
+ if ((wa->mask & ~wa_->mask) == 0) {
+ DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
+ i915_mmio_reg_offset(wa_->reg),
+ wa_->mask, wa_->val);
+
+ wa_->val &= ~wa->mask;
+ }
+
+ wal->wa_count++;
+ wa_->val |= wa->val;
+ wa_->mask |= wa->mask;
+ wa_->read |= wa->read;
+ return;
+ }
+ }
+
+ wal->wa_count++;
+ wa_ = &wal->list[wal->count++];
+ *wa_ = *wa;
+
+ while (wa_-- > wal->list) {
+ GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
+ i915_mmio_reg_offset(wa_[1].reg));
+ if (i915_mmio_reg_offset(wa_[1].reg) >
+ i915_mmio_reg_offset(wa_[0].reg))
+ break;
+
+ swap(wa_[1], wa_[0]);
+ }
+}
+
+static void
+wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
+ u32 val)
+{
+ struct i915_wa wa = {
+ .reg = reg,
+ .mask = mask,
+ .val = val,
+ .read = mask,
+ };
+
+ _wa_add(wal, &wa);
+}
+
+static void
+wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+{
+ wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
+}
+
+static void
+wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+{
+ wa_write_masked_or(wal, reg, ~0, val);
+}
+
+static void
+wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+{
+ wa_write_masked_or(wal, reg, val, val);
+}
+
+static void
+ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
+{
+ struct i915_wa wa = {
+ .reg = reg,
+ .mask = mask,
+ .val = val,
+ /* Bonkers HW, skip verifying */
+ };
+
+ _wa_add(wal, &wa);
+}
+
+#define WA_SET_BIT_MASKED(addr, mask) \
+ wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
+
+#define WA_CLR_BIT_MASKED(addr, mask) \
+ wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
+
+#define WA_SET_FIELD_MASKED(addr, mask, value) \
+ wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
+
+static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine)
+{
+ struct i915_wa_list *wal = &engine->ctx_wa_list;
+
+ WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
+
+ /* WaDisableAsyncFlipPerfMode:bdw,chv */
+ WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
+
+ /* WaDisablePartialInstShootdown:bdw,chv */
+ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
+ PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
+
+ /* Use Force Non-Coherent whenever executing a 3D context. This is a
+ * workaround for for a possible hang in the unlikely event a TLB
+ * invalidation occurs during a PSD flush.
+ */
+ /* WaForceEnableNonCoherent:bdw,chv */
+ /* WaHdcDisableFetchWhenMasked:bdw,chv */
+ WA_SET_BIT_MASKED(HDC_CHICKEN0,
+ HDC_DONOT_FETCH_MEM_WHEN_MASKED |
+ HDC_FORCE_NON_COHERENT);
+
+ /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
+ * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
+ * polygons in the same 8x4 pixel/sample area to be processed without
+ * stalling waiting for the earlier ones to write to Hierarchical Z
+ * buffer."
+ *
+ * This optimization is off by default for BDW and CHV; turn it on.
+ */
+ WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
+
+ /* Wa4x4STCOptimizationDisable:bdw,chv */
+ WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
+
+ /*
+ * BSpec recommends 8x4 when MSAA is used,
+ * however in practice 16x4 seems fastest.
+ *
+ * Note that PS/WM thread counts depend on the WIZ hashing
+ * disable bit, which we don't touch here, but it's good
+ * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+ */
+ WA_SET_FIELD_MASKED(GEN7_GT_MODE,
+ GEN6_WIZ_HASHING_MASK,
+ GEN6_WIZ_HASHING_16x4);
+}
+
+static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *i915 = engine->i915;
+ struct i915_wa_list *wal = &engine->ctx_wa_list;
+
+ gen8_ctx_workarounds_init(engine);
+
+ /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
+ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
+
+ /* WaDisableDopClockGating:bdw
+ *
+ * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
+ * to disable EUTC clock gating.
+ */
+ WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
+ DOP_CLOCK_GATING_DISABLE);
+
+ WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
+ GEN8_SAMPLER_POWER_BYPASS_DIS);
+
+ WA_SET_BIT_MASKED(HDC_CHICKEN0,
+ /* WaForceContextSaveRestoreNonCoherent:bdw */
+ HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
+ /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
+ (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
+}
+
+static void chv_ctx_workarounds_init(struct intel_engine_cs *engine)
+{
+ struct i915_wa_list *wal = &engine->ctx_wa_list;
+
+ gen8_ctx_workarounds_init(engine);
+
+ /* WaDisableThreadStallDopClockGating:chv */
+ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
+
+ /* Improve HiZ throughput on CHV. */
+ WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
+}
+
+static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *i915 = engine->i915;
+ struct i915_wa_list *wal = &engine->ctx_wa_list;
+
+ if (HAS_LLC(i915)) {
+ /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
+ *
+ * Must match Display Engine. See
+ * WaCompressedResourceDisplayNewHashMode.
+ */
+ WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+ GEN9_PBE_COMPRESSED_HASH_SELECTION);
+ WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
+ GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
+ }
+
+ /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
+ /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
+ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
+ FLOW_CONTROL_ENABLE |
+ PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
+
+ /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
+ if (!IS_COFFEELAKE(i915))
+ WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
+ GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
+
+ /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
+ /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
+ WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
+ GEN9_ENABLE_YV12_BUGFIX |
+ GEN9_ENABLE_GPGPU_PREEMPTION);
+
+ /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
+ /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
+ WA_SET_BIT_MASKED(CACHE_MODE_1,
+ GEN8_4x4_STC_OPTIMIZATION_DISABLE |
+ GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
+
+ /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
+ WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
+ GEN9_CCS_TLB_PREFETCH_ENABLE);
+
+ /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
+ WA_SET_BIT_MASKED(HDC_CHICKEN0,
+ HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
+ HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
+
+ /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
+ * both tied to WaForceContextSaveRestoreNonCoherent
+ * in some hsds for skl. We keep the tie for all gen9. The
+ * documentation is a bit hazy and so we want to get common behaviour,
+ * even though there is no clear evidence we would need both on kbl/bxt.
+ * This area has been source of system hangs so we play it safe
+ * and mimic the skl regardless of what bspec says.
+ *
+ * Use Force Non-Coherent whenever executing a 3D context. This
+ * is a workaround for a possible hang in the unlikely event
+ * a TLB invalidation occurs during a PSD flush.
+ */
+
+ /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
+ WA_SET_BIT_MASKED(HDC_CHICKEN0,
+ HDC_FORCE_NON_COHERENT);
+
+ /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
+ if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915))
+ WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
+ GEN8_SAMPLER_POWER_BYPASS_DIS);
+
+ /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
+ WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
+
+ /*
+ * Supporting preemption with fine-granularity requires changes in the
+ * batch buffer programming. Since we can't break old userspace, we
+ * need to set our default preemption level to safe value. Userspace is
+ * still able to use more fine-grained preemption levels, since in
+ * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
+ * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
+ * not real HW workarounds, but merely a way to start using preemption
+ * while maintaining old contract with userspace.
+ */
+
+ /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
+ WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
+
+ /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
+ WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
+ GEN9_PREEMPT_GPGPU_LEVEL_MASK,
+ GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
+
+ /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
+ if (IS_GEN9_LP(i915))
+ WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
+}
+
+static void skl_tune_iz_hashing(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *i915 = engine->i915;
+ struct i915_wa_list *wal = &engine->ctx_wa_list;
+ u8 vals[3] = { 0, 0, 0 };
+ unsigned int i;
+
+ for (i = 0; i < 3; i++) {
+ u8 ss;
+
+ /*
+ * Only consider slices where one, and only one, subslice has 7
+ * EUs
+ */
+ if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]))
+ continue;
+
+ /*
+ * subslice_7eu[i] != 0 (because of the check above) and
+ * ss_max == 4 (maximum number of subslices possible per slice)
+ *
+ * -> 0 <= ss <= 3;
+ */
+ ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1;
+ vals[i] = 3 - ss;
+ }
+
+ if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
+ return;
+
+ /* Tune IZ hashing. See intel_device_info_runtime_init() */
+ WA_SET_FIELD_MASKED(GEN7_GT_MODE,
+ GEN9_IZ_HASHING_MASK(2) |
+ GEN9_IZ_HASHING_MASK(1) |
+ GEN9_IZ_HASHING_MASK(0),
+ GEN9_IZ_HASHING(2, vals[2]) |
+ GEN9_IZ_HASHING(1, vals[1]) |
+ GEN9_IZ_HASHING(0, vals[0]));
+}
+
+static void skl_ctx_workarounds_init(struct intel_engine_cs *engine)
+{
+ gen9_ctx_workarounds_init(engine);
+ skl_tune_iz_hashing(engine);
+}
+
+static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine)
+{
+ struct i915_wa_list *wal = &engine->ctx_wa_list;
+
+ gen9_ctx_workarounds_init(engine);
+
+ /* WaDisableThreadStallDopClockGating:bxt */
+ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
+ STALL_DOP_GATING_DISABLE);
+
+ /* WaToEnableHwFixForPushConstHWBug:bxt */
+ WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+ GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
+}
+
+static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *i915 = engine->i915;
+ struct i915_wa_list *wal = &engine->ctx_wa_list;
+
+ gen9_ctx_workarounds_init(engine);
+
+ /* WaToEnableHwFixForPushConstHWBug:kbl */
+ if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
+ WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+ GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
+
+ /* WaDisableSbeCacheDispatchPortSharing:kbl */
+ WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
+ GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
+}
+
+static void glk_ctx_workarounds_init(struct intel_engine_cs *engine)
+{
+ struct i915_wa_list *wal = &engine->ctx_wa_list;
+
+ gen9_ctx_workarounds_init(engine);
+
+ /* WaToEnableHwFixForPushConstHWBug:glk */
+ WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+ GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
+}
+
+static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine)
+{
+ struct i915_wa_list *wal = &engine->ctx_wa_list;
+
+ gen9_ctx_workarounds_init(engine);
+
+ /* WaToEnableHwFixForPushConstHWBug:cfl */
+ WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+ GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
+
+ /* WaDisableSbeCacheDispatchPortSharing:cfl */
+ WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
+ GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
+}
+
+static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *i915 = engine->i915;
+ struct i915_wa_list *wal = &engine->ctx_wa_list;
+
+ /* WaForceContextSaveRestoreNonCoherent:cnl */
+ WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
+ HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
+
+ /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
+ if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
+ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
+
+ /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
+ WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+ GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
+
+ /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
+ if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
+ WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+ GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
+
+ /* WaPushConstantDereferenceHoldDisable:cnl */
+ WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
+
+ /* FtrEnableFastAnisoL1BankingFix:cnl */
+ WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
+
+ /* WaDisable3DMidCmdPreemption:cnl */
+ WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
+
+ /* WaDisableGPGPUMidCmdPreemption:cnl */
+ WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
+ GEN9_PREEMPT_GPGPU_LEVEL_MASK,
+ GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
+
+ /* WaDisableEarlyEOT:cnl */
+ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
+}
+
+static void icl_ctx_workarounds_init(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *i915 = engine->i915;
+ struct i915_wa_list *wal = &engine->ctx_wa_list;
+
+ /* Wa_1604370585:icl (pre-prod)
+ * Formerly known as WaPushConstantDereferenceHoldDisable
+ */
+ if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
+ WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
+ PUSH_CONSTANT_DEREF_DISABLE);
+
+ /* WaForceEnableNonCoherent:icl
+ * This is not the same workaround as in early Gen9 platforms, where
+ * lacking this could cause system hangs, but coherency performance
+ * overhead is high and only a few compute workloads really need it
+ * (the register is whitelisted in hardware now, so UMDs can opt in
+ * for coherency if they have a good reason).
+ */
+ WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
+
+ /* Wa_2006611047:icl (pre-prod)
+ * Formerly known as WaDisableImprovedTdlClkGating
+ */
+ if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
+ WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
+ GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
+
+ /* WaEnableStateCacheRedirectToCS:icl */
+ WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN1,
+ GEN11_STATE_CACHE_REDIRECT_TO_CS);
+
+ /* Wa_2006665173:icl (pre-prod) */
+ if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
+ WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
+ GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
+
+ /* WaEnableFloatBlendOptimization:icl */
+ wa_write_masked_or(wal,
+ GEN10_CACHE_MODE_SS,
+ 0, /* write-only, so skip validation */
+ _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
+
+ /* WaDisableGPGPUMidThreadPreemption:icl */
+ WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
+ GEN9_PREEMPT_GPGPU_LEVEL_MASK,
+ GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
+}
+
+void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *i915 = engine->i915;
+ struct i915_wa_list *wal = &engine->ctx_wa_list;
+
+ wa_init_start(wal, "context");
+
+ if (IS_GEN(i915, 11))
+ icl_ctx_workarounds_init(engine);
+ else if (IS_CANNONLAKE(i915))
+ cnl_ctx_workarounds_init(engine);
+ else if (IS_COFFEELAKE(i915))
+ cfl_ctx_workarounds_init(engine);
+ else if (IS_GEMINILAKE(i915))
+ glk_ctx_workarounds_init(engine);
+ else if (IS_KABYLAKE(i915))
+ kbl_ctx_workarounds_init(engine);
+ else if (IS_BROXTON(i915))
+ bxt_ctx_workarounds_init(engine);
+ else if (IS_SKYLAKE(i915))
+ skl_ctx_workarounds_init(engine);
+ else if (IS_CHERRYVIEW(i915))
+ chv_ctx_workarounds_init(engine);
+ else if (IS_BROADWELL(i915))
+ bdw_ctx_workarounds_init(engine);
+ else if (INTEL_GEN(i915) < 8)
+ return;
+ else
+ MISSING_CASE(INTEL_GEN(i915));
+
+ wa_init_finish(wal);
+}
+
+int intel_engine_emit_ctx_wa(struct i915_request *rq)
+{
+ struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
+ struct i915_wa *wa;
+ unsigned int i;
+ u32 *cs;
+ int ret;
+
+ if (wal->count == 0)
+ return 0;
+
+ ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
+ if (ret)
+ return ret;
+
+ cs = intel_ring_begin(rq, (wal->count * 2 + 2));
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
+ for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
+ *cs++ = i915_mmio_reg_offset(wa->reg);
+ *cs++ = wa->val;
+ }
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+
+ ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static void
+gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ /* WaDisableKillLogic:bxt,skl,kbl */
+ if (!IS_COFFEELAKE(i915))
+ wa_write_or(wal,
+ GAM_ECOCHK,
+ ECOCHK_DIS_TLB);
+
+ if (HAS_LLC(i915)) {
+ /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
+ *
+ * Must match Display Engine. See
+ * WaCompressedResourceDisplayNewHashMode.
+ */
+ wa_write_or(wal,
+ MMCD_MISC_CTRL,
+ MMCD_PCLA | MMCD_HOTSPOT_EN);
+ }
+
+ /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
+ wa_write_or(wal,
+ GAM_ECOCHK,
+ BDW_DISABLE_HDC_INVALIDATION);
+}
+
+static void
+skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ gen9_gt_workarounds_init(i915, wal);
+
+ /* WaDisableGafsUnitClkGating:skl */
+ wa_write_or(wal,
+ GEN7_UCGCTL4,
+ GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
+
+ /* WaInPlaceDecompressionHang:skl */
+ if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
+ wa_write_or(wal,
+ GEN9_GAMT_ECO_REG_RW_IA,
+ GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+}
+
+static void
+bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ gen9_gt_workarounds_init(i915, wal);
+
+ /* WaInPlaceDecompressionHang:bxt */
+ wa_write_or(wal,
+ GEN9_GAMT_ECO_REG_RW_IA,
+ GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+}
+
+static void
+kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ gen9_gt_workarounds_init(i915, wal);
+
+ /* WaDisableDynamicCreditSharing:kbl */
+ if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
+ wa_write_or(wal,
+ GAMT_CHKN_BIT_REG,
+ GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
+
+ /* WaDisableGafsUnitClkGating:kbl */
+ wa_write_or(wal,
+ GEN7_UCGCTL4,
+ GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
+
+ /* WaInPlaceDecompressionHang:kbl */
+ wa_write_or(wal,
+ GEN9_GAMT_ECO_REG_RW_IA,
+ GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+}
+
+static void
+glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ gen9_gt_workarounds_init(i915, wal);
+}
+
+static void
+cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ gen9_gt_workarounds_init(i915, wal);
+
+ /* WaDisableGafsUnitClkGating:cfl */
+ wa_write_or(wal,
+ GEN7_UCGCTL4,
+ GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
+
+ /* WaInPlaceDecompressionHang:cfl */
+ wa_write_or(wal,
+ GEN9_GAMT_ECO_REG_RW_IA,
+ GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+}
+
+static void
+wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
+ u32 mcr_slice_subslice_mask;
+
+ /*
+ * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
+ * L3Banks could be fused off in single slice scenario. If that is
+ * the case, we might need to program MCR select to a valid L3Bank
+ * by default, to make sure we correctly read certain registers
+ * later on (in the range 0xB100 - 0xB3FF).
+ * This might be incompatible with
+ * WaProgramMgsrForCorrectSliceSpecificMmioReads.
+ * Fortunately, this should not happen in production hardware, so
+ * we only assert that this is the case (instead of implementing
+ * something more complex that requires checking the range of every
+ * MMIO read).
+ */
+ if (INTEL_GEN(i915) >= 10 &&
+ is_power_of_2(sseu->slice_mask)) {
+ /*
+ * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches
+ * enabled subslice, no need to redirect MCR packet
+ */
+ u32 slice = fls(sseu->slice_mask);
+ u32 fuse3 =
+ intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3);
+ u8 ss_mask = sseu->subslice_mask[slice];
+
+ u8 enabled_mask = (ss_mask | ss_mask >>
+ GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
+ u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK;
+
+ /*
+ * Production silicon should have matched L3Bank and
+ * subslice enabled
+ */
+ WARN_ON((enabled_mask & disabled_mask) != enabled_mask);
+ }
+
+ if (INTEL_GEN(i915) >= 11)
+ mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
+ GEN11_MCR_SUBSLICE_MASK;
+ else
+ mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
+ GEN8_MCR_SUBSLICE_MASK;
+ /*
+ * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
+ * Before any MMIO read into slice/subslice specific registers, MCR
+ * packet control register needs to be programmed to point to any
+ * enabled s/ss pair. Otherwise, incorrect values will be returned.
+ * This means each subsequent MMIO read will be forwarded to an
+ * specific s/ss combination, but this is OK since these registers
+ * are consistent across s/ss in almost all cases. In the rare
+ * occasions, such as INSTDONE, where this value is dependent
+ * on s/ss combo, the read should be done with read_subslice_reg.
+ */
+ wa_write_masked_or(wal,
+ GEN8_MCR_SELECTOR,
+ mcr_slice_subslice_mask,
+ intel_calculate_mcr_s_ss_select(i915));
+}
+
+static void
+cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ wa_init_mcr(i915, wal);
+
+ /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
+ if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
+ wa_write_or(wal,
+ GAMT_CHKN_BIT_REG,
+ GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
+
+ /* WaInPlaceDecompressionHang:cnl */
+ wa_write_or(wal,
+ GEN9_GAMT_ECO_REG_RW_IA,
+ GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+}
+
+static void
+icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ wa_init_mcr(i915, wal);
+
+ /* WaInPlaceDecompressionHang:icl */
+ wa_write_or(wal,
+ GEN9_GAMT_ECO_REG_RW_IA,
+ GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+
+ /* WaModifyGamTlbPartitioning:icl */
+ wa_write_masked_or(wal,
+ GEN11_GACB_PERF_CTRL,
+ GEN11_HASH_CTRL_MASK,
+ GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
+
+ /* Wa_1405766107:icl
+ * Formerly known as WaCL2SFHalfMaxAlloc
+ */
+ wa_write_or(wal,
+ GEN11_LSN_UNSLCVC,
+ GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
+ GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
+
+ /* Wa_220166154:icl
+ * Formerly known as WaDisCtxReload
+ */
+ wa_write_or(wal,
+ GEN8_GAMW_ECO_DEV_RW_IA,
+ GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
+
+ /* Wa_1405779004:icl (pre-prod) */
+ if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
+ wa_write_or(wal,
+ SLICE_UNIT_LEVEL_CLKGATE,
+ MSCUNIT_CLKGATE_DIS);
+
+ /* Wa_1406680159:icl */
+ wa_write_or(wal,
+ SUBSLICE_UNIT_LEVEL_CLKGATE,
+ GWUNIT_CLKGATE_DIS);
+
+ /* Wa_1406838659:icl (pre-prod) */
+ if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
+ wa_write_or(wal,
+ INF_UNIT_LEVEL_CLKGATE,
+ CGPSF_CLKGATE_DIS);
+
+ /* Wa_1406463099:icl
+ * Formerly known as WaGamTlbPendError
+ */
+ wa_write_or(wal,
+ GAMT_CHKN_BIT_REG,
+ GAMT_CHKN_DISABLE_L3_COH_PIPE);
+}
+
+static void
+gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ if (IS_GEN(i915, 11))
+ icl_gt_workarounds_init(i915, wal);
+ else if (IS_CANNONLAKE(i915))
+ cnl_gt_workarounds_init(i915, wal);
+ else if (IS_COFFEELAKE(i915))
+ cfl_gt_workarounds_init(i915, wal);
+ else if (IS_GEMINILAKE(i915))
+ glk_gt_workarounds_init(i915, wal);
+ else if (IS_KABYLAKE(i915))
+ kbl_gt_workarounds_init(i915, wal);
+ else if (IS_BROXTON(i915))
+ bxt_gt_workarounds_init(i915, wal);
+ else if (IS_SKYLAKE(i915))
+ skl_gt_workarounds_init(i915, wal);
+ else if (INTEL_GEN(i915) <= 8)
+ return;
+ else
+ MISSING_CASE(INTEL_GEN(i915));
+}
+
+void intel_gt_init_workarounds(struct drm_i915_private *i915)
+{
+ struct i915_wa_list *wal = &i915->gt_wa_list;
+
+ wa_init_start(wal, "GT");
+ gt_init_workarounds(i915, wal);
+ wa_init_finish(wal);
+}
+
+static enum forcewake_domains
+wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
+{
+ enum forcewake_domains fw = 0;
+ struct i915_wa *wa;
+ unsigned int i;
+
+ for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
+ fw |= intel_uncore_forcewake_for_reg(uncore,
+ wa->reg,
+ FW_REG_READ |
+ FW_REG_WRITE);
+
+ return fw;
+}
+
+static bool
+wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
+{
+ if ((cur ^ wa->val) & wa->read) {
+ DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
+ name, from, i915_mmio_reg_offset(wa->reg),
+ cur, cur & wa->read,
+ wa->val, wa->mask);
+
+ return false;
+ }
+
+ return true;
+}
+
+static void
+wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
+{
+ enum forcewake_domains fw;
+ unsigned long flags;
+ struct i915_wa *wa;
+ unsigned int i;
+
+ if (!wal->count)
+ return;
+
+ fw = wal_get_fw_for_rmw(uncore, wal);
+
+ spin_lock_irqsave(&uncore->lock, flags);
+ intel_uncore_forcewake_get__locked(uncore, fw);
+
+ for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
+ intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val);
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ wa_verify(wa,
+ intel_uncore_read_fw(uncore, wa->reg),
+ wal->name, "application");
+ }
+
+ intel_uncore_forcewake_put__locked(uncore, fw);
+ spin_unlock_irqrestore(&uncore->lock, flags);
+}
+
+void intel_gt_apply_workarounds(struct drm_i915_private *i915)
+{
+ wa_list_apply(&i915->uncore, &i915->gt_wa_list);
+}
+
+static bool wa_list_verify(struct intel_uncore *uncore,
+ const struct i915_wa_list *wal,
+ const char *from)
+{
+ struct i915_wa *wa;
+ unsigned int i;
+ bool ok = true;
+
+ for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
+ ok &= wa_verify(wa,
+ intel_uncore_read(uncore, wa->reg),
+ wal->name, from);
+
+ return ok;
+}
+
+bool intel_gt_verify_workarounds(struct drm_i915_private *i915,
+ const char *from)
+{
+ return wa_list_verify(&i915->uncore, &i915->gt_wa_list, from);
+}
+
+static void
+whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
+{
+ struct i915_wa wa = {
+ .reg = reg
+ };
+
+ if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
+ return;
+
+ _wa_add(wal, &wa);
+}
+
+static void gen9_whitelist_build(struct i915_wa_list *w)
+{
+ /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
+ whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
+
+ /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
+ whitelist_reg(w, GEN8_CS_CHICKEN1);
+
+ /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
+ whitelist_reg(w, GEN8_HDC_CHICKEN1);
+}
+
+static void skl_whitelist_build(struct i915_wa_list *w)
+{
+ gen9_whitelist_build(w);
+
+ /* WaDisableLSQCROPERFforOCL:skl */
+ whitelist_reg(w, GEN8_L3SQCREG4);
+}
+
+static void bxt_whitelist_build(struct i915_wa_list *w)
+{
+ gen9_whitelist_build(w);
+}
+
+static void kbl_whitelist_build(struct i915_wa_list *w)
+{
+ gen9_whitelist_build(w);
+
+ /* WaDisableLSQCROPERFforOCL:kbl */
+ whitelist_reg(w, GEN8_L3SQCREG4);
+}
+
+static void glk_whitelist_build(struct i915_wa_list *w)
+{
+ gen9_whitelist_build(w);
+
+ /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
+ whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
+}
+
+static void cfl_whitelist_build(struct i915_wa_list *w)
+{
+ gen9_whitelist_build(w);
+}
+
+static void cnl_whitelist_build(struct i915_wa_list *w)
+{
+ /* WaEnablePreemptionGranularityControlByUMD:cnl */
+ whitelist_reg(w, GEN8_CS_CHICKEN1);
+}
+
+static void icl_whitelist_build(struct i915_wa_list *w)
+{
+ /* WaAllowUMDToModifyHalfSliceChicken7:icl */
+ whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
+
+ /* WaAllowUMDToModifySamplerMode:icl */
+ whitelist_reg(w, GEN10_SAMPLER_MODE);
+}
+
+void intel_engine_init_whitelist(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *i915 = engine->i915;
+ struct i915_wa_list *w = &engine->whitelist;
+
+ GEM_BUG_ON(engine->id != RCS0);
+
+ wa_init_start(w, "whitelist");
+
+ if (IS_GEN(i915, 11))
+ icl_whitelist_build(w);
+ else if (IS_CANNONLAKE(i915))
+ cnl_whitelist_build(w);
+ else if (IS_COFFEELAKE(i915))
+ cfl_whitelist_build(w);
+ else if (IS_GEMINILAKE(i915))
+ glk_whitelist_build(w);
+ else if (IS_KABYLAKE(i915))
+ kbl_whitelist_build(w);
+ else if (IS_BROXTON(i915))
+ bxt_whitelist_build(w);
+ else if (IS_SKYLAKE(i915))
+ skl_whitelist_build(w);
+ else if (INTEL_GEN(i915) <= 8)
+ return;
+ else
+ MISSING_CASE(INTEL_GEN(i915));
+
+ wa_init_finish(w);
+}
+
+void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
+{
+ const struct i915_wa_list *wal = &engine->whitelist;
+ struct intel_uncore *uncore = engine->uncore;
+ const u32 base = engine->mmio_base;
+ struct i915_wa *wa;
+ unsigned int i;
+
+ if (!wal->count)
+ return;
+
+ for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
+ intel_uncore_write(uncore,
+ RING_FORCE_TO_NONPRIV(base, i),
+ i915_mmio_reg_offset(wa->reg));
+
+ /* And clear the rest just in case of garbage */
+ for (; i < RING_MAX_NONPRIV_SLOTS; i++)
+ intel_uncore_write(uncore,
+ RING_FORCE_TO_NONPRIV(base, i),
+ i915_mmio_reg_offset(RING_NOPID(base)));
+}
+
+static void
+rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+{
+ struct drm_i915_private *i915 = engine->i915;
+
+ if (IS_GEN(i915, 11)) {
+ /* This is not an Wa. Enable for better image quality */
+ wa_masked_en(wal,
+ _3D_CHICKEN3,
+ _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
+
+ /* WaPipelineFlushCoherentLines:icl */
+ ignore_wa_write_or(wal,
+ GEN8_L3SQCREG4,
+ GEN8_LQSC_FLUSH_COHERENT_LINES,
+ GEN8_LQSC_FLUSH_COHERENT_LINES);
+
+ /*
+ * Wa_1405543622:icl
+ * Formerly known as WaGAPZPriorityScheme
+ */
+ wa_write_or(wal,
+ GEN8_GARBCNTL,
+ GEN11_ARBITRATION_PRIO_ORDER_MASK);
+
+ /*
+ * Wa_1604223664:icl
+ * Formerly known as WaL3BankAddressHashing
+ */
+ wa_write_masked_or(wal,
+ GEN8_GARBCNTL,
+ GEN11_HASH_CTRL_EXCL_MASK,
+ GEN11_HASH_CTRL_EXCL_BIT0);
+ wa_write_masked_or(wal,
+ GEN11_GLBLINVL,
+ GEN11_BANK_HASH_ADDR_EXCL_MASK,
+ GEN11_BANK_HASH_ADDR_EXCL_BIT0);
+
+ /*
+ * Wa_1405733216:icl
+ * Formerly known as WaDisableCleanEvicts
+ */
+ ignore_wa_write_or(wal,
+ GEN8_L3SQCREG4,
+ GEN11_LQSC_CLEAN_EVICT_DISABLE,
+ GEN11_LQSC_CLEAN_EVICT_DISABLE);
+
+ /* WaForwardProgressSoftReset:icl */
+ wa_write_or(wal,
+ GEN10_SCRATCH_LNCF2,
+ PMFLUSHDONE_LNICRSDROP |
+ PMFLUSH_GAPL3UNBLOCK |
+ PMFLUSHDONE_LNEBLK);
+
+ /* Wa_1406609255:icl (pre-prod) */
+ if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
+ wa_write_or(wal,
+ GEN7_SARCHKMD,
+ GEN7_DISABLE_DEMAND_PREFETCH |
+ GEN7_DISABLE_SAMPLER_PREFETCH);
+ }
+
+ if (IS_GEN_RANGE(i915, 9, 11)) {
+ /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */
+ wa_masked_en(wal,
+ GEN7_FF_SLICE_CS_CHICKEN1,
+ GEN9_FFSC_PERCTX_PREEMPT_CTRL);
+ }
+
+ if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
+ /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
+ wa_write_or(wal,
+ GEN8_GARBCNTL,
+ GEN9_GAPS_TSV_CREDIT_DISABLE);
+ }
+
+ if (IS_BROXTON(i915)) {
+ /* WaDisablePooledEuLoadBalancingFix:bxt */
+ wa_masked_en(wal,
+ FF_SLICE_CS_CHICKEN2,
+ GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
+ }
+
+ if (IS_GEN(i915, 9)) {
+ /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
+ wa_masked_en(wal,
+ GEN9_CSFE_CHICKEN1_RCS,
+ GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
+
+ /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
+ wa_write_or(wal,
+ BDW_SCRATCH1,
+ GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
+
+ /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
+ if (IS_GEN9_LP(i915))
+ wa_write_masked_or(wal,
+ GEN8_L3SQCREG1,
+ L3_PRIO_CREDITS_MASK,
+ L3_GENERAL_PRIO_CREDITS(62) |
+ L3_HIGH_PRIO_CREDITS(2));
+
+ /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
+ wa_write_or(wal,
+ GEN8_L3SQCREG4,
+ GEN8_LQSC_FLUSH_COHERENT_LINES);
+ }
+}
+
+static void
+xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+{
+ struct drm_i915_private *i915 = engine->i915;
+
+ /* WaKBLVECSSemaphoreWaitPoll:kbl */
+ if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
+ wa_write(wal,
+ RING_SEMA_WAIT_POLL(engine->mmio_base),
+ 1);
+ }
+}
+
+static void
+engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+{
+ if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8))
+ return;
+
+ if (engine->id == RCS0)
+ rcs_engine_wa_init(engine, wal);
+ else
+ xcs_engine_wa_init(engine, wal);
+}
+
+void intel_engine_init_workarounds(struct intel_engine_cs *engine)
+{
+ struct i915_wa_list *wal = &engine->wa_list;
+
+ if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8))
+ return;
+
+ wa_init_start(wal, engine->name);
+ engine_init_workarounds(engine, wal);
+ wa_init_finish(wal);
+}
+
+void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
+{
+ wa_list_apply(engine->uncore, &engine->wa_list);
+}
+
+static struct i915_vma *
+create_scratch(struct i915_address_space *vm, int count)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ unsigned int size;
+ int err;
+
+ size = round_up(count * sizeof(u32), PAGE_SIZE);
+ obj = i915_gem_object_create_internal(vm->i915, size);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
+
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_obj;
+ }
+
+ err = i915_vma_pin(vma, 0, 0,
+ i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
+ if (err)
+ goto err_obj;
+
+ return vma;
+
+err_obj:
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+}
+
+static int
+wa_list_srm(struct i915_request *rq,
+ const struct i915_wa_list *wal,
+ struct i915_vma *vma)
+{
+ const struct i915_wa *wa;
+ unsigned int i;
+ u32 srm, *cs;
+
+ srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+ if (INTEL_GEN(rq->i915) >= 8)
+ srm++;
+
+ cs = intel_ring_begin(rq, 4 * wal->count);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
+ *cs++ = srm;
+ *cs++ = i915_mmio_reg_offset(wa->reg);
+ *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
+ *cs++ = 0;
+ }
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int engine_wa_list_verify(struct intel_engine_cs *engine,
+ const struct i915_wa_list * const wal,
+ const char *from)
+{
+ const struct i915_wa *wa;
+ struct i915_request *rq;
+ struct i915_vma *vma;
+ unsigned int i;
+ u32 *results;
+ int err;
+
+ if (!wal->count)
+ return 0;
+
+ vma = create_scratch(&engine->i915->ggtt.vm, wal->count);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+
+ rq = i915_request_alloc(engine, engine->kernel_context->gem_context);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_vma;
+ }
+
+ err = wa_list_srm(rq, wal, vma);
+ if (err)
+ goto err_vma;
+
+ i915_request_add(rq);
+ if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {
+ err = -ETIME;
+ goto err_vma;
+ }
+
+ results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
+ if (IS_ERR(results)) {
+ err = PTR_ERR(results);
+ goto err_vma;
+ }
+
+ err = 0;
+ for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
+ if (!wa_verify(wa, results[i], wal->name, from))
+ err = -ENXIO;
+
+ i915_gem_object_unpin_map(vma->obj);
+
+err_vma:
+ i915_vma_unpin(vma);
+ i915_vma_put(vma);
+ return err;
+}
+
+int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
+ const char *from)
+{
+ return engine_wa_list_verify(engine, &engine->wa_list, from);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_workarounds.c"
+#endif
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2018 Intel Corporation
+ */
+
+#ifndef _INTEL_WORKAROUNDS_H_
+#define _INTEL_WORKAROUNDS_H_
+
+#include <linux/slab.h>
+
+#include "intel_workarounds_types.h"
+
+struct drm_i915_private;
+struct i915_request;
+struct intel_engine_cs;
+
+static inline void intel_wa_list_free(struct i915_wa_list *wal)
+{
+ kfree(wal->list);
+ memset(wal, 0, sizeof(*wal));
+}
+
+void intel_engine_init_ctx_wa(struct intel_engine_cs *engine);
+int intel_engine_emit_ctx_wa(struct i915_request *rq);
+
+void intel_gt_init_workarounds(struct drm_i915_private *i915);
+void intel_gt_apply_workarounds(struct drm_i915_private *i915);
+bool intel_gt_verify_workarounds(struct drm_i915_private *i915,
+ const char *from);
+
+void intel_engine_init_whitelist(struct intel_engine_cs *engine);
+void intel_engine_apply_whitelist(struct intel_engine_cs *engine);
+
+void intel_engine_init_workarounds(struct intel_engine_cs *engine);
+void intel_engine_apply_workarounds(struct intel_engine_cs *engine);
+int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
+ const char *from);
+
+#endif
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2018 Intel Corporation
+ */
+
+#ifndef __INTEL_WORKAROUNDS_TYPES_H__
+#define __INTEL_WORKAROUNDS_TYPES_H__
+
+#include <linux/types.h>
+
+#include "i915_reg.h"
+
+struct i915_wa {
+ i915_reg_t reg;
+ u32 mask;
+ u32 val;
+ u32 read;
+};
+
+struct i915_wa_list {
+ const char *name;
+ struct i915_wa *list;
+ unsigned int count;
+ unsigned int wa_count;
+};
+
+#endif /* __INTEL_WORKAROUNDS_TYPES_H__ */
--- /dev/null
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "i915_drv.h"
+#include "intel_context.h"
+
+#include "mock_engine.h"
+#include "selftests/mock_request.h"
+
+struct mock_ring {
+ struct intel_ring base;
+ struct i915_timeline timeline;
+};
+
+static void mock_timeline_pin(struct i915_timeline *tl)
+{
+ tl->pin_count++;
+}
+
+static void mock_timeline_unpin(struct i915_timeline *tl)
+{
+ GEM_BUG_ON(!tl->pin_count);
+ tl->pin_count--;
+}
+
+static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
+{
+ const unsigned long sz = PAGE_SIZE / 2;
+ struct mock_ring *ring;
+
+ ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL);
+ if (!ring)
+ return NULL;
+
+ if (i915_timeline_init(engine->i915, &ring->timeline, NULL)) {
+ kfree(ring);
+ return NULL;
+ }
+
+ kref_init(&ring->base.ref);
+ ring->base.size = sz;
+ ring->base.effective_size = sz;
+ ring->base.vaddr = (void *)(ring + 1);
+ ring->base.timeline = &ring->timeline;
+
+ INIT_LIST_HEAD(&ring->base.request_list);
+ intel_ring_update_space(&ring->base);
+
+ return &ring->base;
+}
+
+static void mock_ring_free(struct intel_ring *base)
+{
+ struct mock_ring *ring = container_of(base, typeof(*ring), base);
+
+ i915_timeline_fini(&ring->timeline);
+ kfree(ring);
+}
+
+static struct i915_request *first_request(struct mock_engine *engine)
+{
+ return list_first_entry_or_null(&engine->hw_queue,
+ struct i915_request,
+ mock.link);
+}
+
+static void advance(struct i915_request *request)
+{
+ list_del_init(&request->mock.link);
+ i915_request_mark_complete(request);
+ GEM_BUG_ON(!i915_request_completed(request));
+
+ intel_engine_queue_breadcrumbs(request->engine);
+}
+
+static void hw_delay_complete(struct timer_list *t)
+{
+ struct mock_engine *engine = from_timer(engine, t, hw_delay);
+ struct i915_request *request;
+ unsigned long flags;
+
+ spin_lock_irqsave(&engine->hw_lock, flags);
+
+ /* Timer fired, first request is complete */
+ request = first_request(engine);
+ if (request)
+ advance(request);
+
+ /*
+ * Also immediately signal any subsequent 0-delay requests, but
+ * requeue the timer for the next delayed request.
+ */
+ while ((request = first_request(engine))) {
+ if (request->mock.delay) {
+ mod_timer(&engine->hw_delay,
+ jiffies + request->mock.delay);
+ break;
+ }
+
+ advance(request);
+ }
+
+ spin_unlock_irqrestore(&engine->hw_lock, flags);
+}
+
+static void mock_context_unpin(struct intel_context *ce)
+{
+ mock_timeline_unpin(ce->ring->timeline);
+}
+
+static void mock_context_destroy(struct kref *ref)
+{
+ struct intel_context *ce = container_of(ref, typeof(*ce), ref);
+
+ GEM_BUG_ON(intel_context_is_pinned(ce));
+
+ if (ce->ring)
+ mock_ring_free(ce->ring);
+
+ intel_context_free(ce);
+}
+
+static int mock_context_pin(struct intel_context *ce)
+{
+ if (!ce->ring) {
+ ce->ring = mock_ring(ce->engine);
+ if (!ce->ring)
+ return -ENOMEM;
+ }
+
+ mock_timeline_pin(ce->ring->timeline);
+ return 0;
+}
+
+static const struct intel_context_ops mock_context_ops = {
+ .pin = mock_context_pin,
+ .unpin = mock_context_unpin,
+
+ .destroy = mock_context_destroy,
+};
+
+static int mock_request_alloc(struct i915_request *request)
+{
+ INIT_LIST_HEAD(&request->mock.link);
+ request->mock.delay = 0;
+
+ return 0;
+}
+
+static int mock_emit_flush(struct i915_request *request,
+ unsigned int flags)
+{
+ return 0;
+}
+
+static u32 *mock_emit_breadcrumb(struct i915_request *request, u32 *cs)
+{
+ return cs;
+}
+
+static void mock_submit_request(struct i915_request *request)
+{
+ struct mock_engine *engine =
+ container_of(request->engine, typeof(*engine), base);
+ unsigned long flags;
+
+ i915_request_submit(request);
+
+ spin_lock_irqsave(&engine->hw_lock, flags);
+ list_add_tail(&request->mock.link, &engine->hw_queue);
+ if (list_is_first(&request->mock.link, &engine->hw_queue)) {
+ if (request->mock.delay)
+ mod_timer(&engine->hw_delay,
+ jiffies + request->mock.delay);
+ else
+ advance(request);
+ }
+ spin_unlock_irqrestore(&engine->hw_lock, flags);
+}
+
+static void mock_reset_prepare(struct intel_engine_cs *engine)
+{
+}
+
+static void mock_reset(struct intel_engine_cs *engine, bool stalled)
+{
+ GEM_BUG_ON(stalled);
+}
+
+static void mock_reset_finish(struct intel_engine_cs *engine)
+{
+}
+
+static void mock_cancel_requests(struct intel_engine_cs *engine)
+{
+ struct i915_request *request;
+ unsigned long flags;
+
+ spin_lock_irqsave(&engine->timeline.lock, flags);
+
+ /* Mark all submitted requests as skipped. */
+ list_for_each_entry(request, &engine->timeline.requests, sched.link) {
+ if (!i915_request_signaled(request))
+ dma_fence_set_error(&request->fence, -EIO);
+
+ i915_request_mark_complete(request);
+ }
+
+ spin_unlock_irqrestore(&engine->timeline.lock, flags);
+}
+
+struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
+ const char *name,
+ int id)
+{
+ struct mock_engine *engine;
+
+ GEM_BUG_ON(id >= I915_NUM_ENGINES);
+
+ engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL);
+ if (!engine)
+ return NULL;
+
+ /* minimal engine setup for requests */
+ engine->base.i915 = i915;
+ snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
+ engine->base.id = id;
+ engine->base.mask = BIT(id);
+ engine->base.status_page.addr = (void *)(engine + 1);
+
+ engine->base.cops = &mock_context_ops;
+ engine->base.request_alloc = mock_request_alloc;
+ engine->base.emit_flush = mock_emit_flush;
+ engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb;
+ engine->base.submit_request = mock_submit_request;
+
+ engine->base.reset.prepare = mock_reset_prepare;
+ engine->base.reset.reset = mock_reset;
+ engine->base.reset.finish = mock_reset_finish;
+ engine->base.cancel_requests = mock_cancel_requests;
+
+ if (i915_timeline_init(i915, &engine->base.timeline, NULL))
+ goto err_free;
+ i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE);
+
+ intel_engine_init_breadcrumbs(&engine->base);
+
+ /* fake hw queue */
+ spin_lock_init(&engine->hw_lock);
+ timer_setup(&engine->hw_delay, hw_delay_complete, 0);
+ INIT_LIST_HEAD(&engine->hw_queue);
+
+ engine->base.kernel_context =
+ intel_context_pin(i915->kernel_context, &engine->base);
+ if (IS_ERR(engine->base.kernel_context))
+ goto err_breadcrumbs;
+
+ return &engine->base;
+
+err_breadcrumbs:
+ intel_engine_fini_breadcrumbs(&engine->base);
+ i915_timeline_fini(&engine->base.timeline);
+err_free:
+ kfree(engine);
+ return NULL;
+}
+
+void mock_engine_flush(struct intel_engine_cs *engine)
+{
+ struct mock_engine *mock =
+ container_of(engine, typeof(*mock), base);
+ struct i915_request *request, *rn;
+
+ del_timer_sync(&mock->hw_delay);
+
+ spin_lock_irq(&mock->hw_lock);
+ list_for_each_entry_safe(request, rn, &mock->hw_queue, mock.link)
+ advance(request);
+ spin_unlock_irq(&mock->hw_lock);
+}
+
+void mock_engine_reset(struct intel_engine_cs *engine)
+{
+}
+
+void mock_engine_free(struct intel_engine_cs *engine)
+{
+ struct mock_engine *mock =
+ container_of(engine, typeof(*mock), base);
+ struct intel_context *ce;
+
+ GEM_BUG_ON(timer_pending(&mock->hw_delay));
+
+ ce = fetch_and_zero(&engine->last_retired_context);
+ if (ce)
+ intel_context_unpin(ce);
+
+ intel_context_unpin(engine->kernel_context);
+
+ intel_engine_fini_breadcrumbs(engine);
+ i915_timeline_fini(&engine->timeline);
+
+ kfree(engine);
+}
--- /dev/null
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MOCK_ENGINE_H__
+#define __MOCK_ENGINE_H__
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+
+#include "gt/intel_engine.h"
+
+struct mock_engine {
+ struct intel_engine_cs base;
+
+ spinlock_t hw_lock;
+ struct list_head hw_queue;
+ struct timer_list hw_delay;
+};
+
+struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
+ const char *name,
+ int id);
+void mock_engine_flush(struct intel_engine_cs *engine);
+void mock_engine_reset(struct intel_engine_cs *engine);
+void mock_engine_free(struct intel_engine_cs *engine);
+
+#endif /* !__MOCK_ENGINE_H__ */
--- /dev/null
+/*
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "../i915_selftest.h"
+
+static int intel_mmio_bases_check(void *arg)
+{
+ int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
+ const struct engine_info *info = &intel_engines[i];
+ char name[INTEL_ENGINE_CS_MAX_NAME];
+ u8 prev = U8_MAX;
+
+ __sprint_engine_name(name, info);
+
+ for (j = 0; j < MAX_MMIO_BASES; j++) {
+ u8 gen = info->mmio_bases[j].gen;
+ u32 base = info->mmio_bases[j].base;
+
+ if (gen >= prev) {
+ pr_err("%s: %s: mmio base for gen %x "
+ "is before the one for gen %x\n",
+ __func__, name, prev, gen);
+ return -EINVAL;
+ }
+
+ if (gen == 0)
+ break;
+
+ if (!base) {
+ pr_err("%s: %s: invalid mmio base (%x) "
+ "for gen %x at entry %u\n",
+ __func__, name, base, gen, j);
+ return -EINVAL;
+ }
+
+ prev = gen;
+ }
+
+ pr_info("%s: min gen supported for %s = %d\n",
+ __func__, name, prev);
+ }
+
+ return 0;
+}
+
+int intel_engine_cs_mock_selftests(void)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(intel_mmio_bases_check),
+ };
+
+ return i915_subtests(tests, NULL);
+}
--- /dev/null
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/kthread.h>
+
+#include "i915_selftest.h"
+#include "selftests/i915_random.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/igt_reset.h"
+#include "selftests/igt_wedge_me.h"
+
+#include "selftests/mock_context.h"
+#include "selftests/mock_drm.h"
+
+#define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */
+
+struct hang {
+ struct drm_i915_private *i915;
+ struct drm_i915_gem_object *hws;
+ struct drm_i915_gem_object *obj;
+ struct i915_gem_context *ctx;
+ u32 *seqno;
+ u32 *batch;
+};
+
+static int hang_init(struct hang *h, struct drm_i915_private *i915)
+{
+ void *vaddr;
+ int err;
+
+ memset(h, 0, sizeof(*h));
+ h->i915 = i915;
+
+ h->ctx = kernel_context(i915);
+ if (IS_ERR(h->ctx))
+ return PTR_ERR(h->ctx);
+
+ GEM_BUG_ON(i915_gem_context_is_bannable(h->ctx));
+
+ h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ if (IS_ERR(h->hws)) {
+ err = PTR_ERR(h->hws);
+ goto err_ctx;
+ }
+
+ h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ if (IS_ERR(h->obj)) {
+ err = PTR_ERR(h->obj);
+ goto err_hws;
+ }
+
+ i915_gem_object_set_cache_coherency(h->hws, I915_CACHE_LLC);
+ vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB);
+ if (IS_ERR(vaddr)) {
+ err = PTR_ERR(vaddr);
+ goto err_obj;
+ }
+ h->seqno = memset(vaddr, 0xff, PAGE_SIZE);
+
+ vaddr = i915_gem_object_pin_map(h->obj,
+ i915_coherent_map_type(i915));
+ if (IS_ERR(vaddr)) {
+ err = PTR_ERR(vaddr);
+ goto err_unpin_hws;
+ }
+ h->batch = vaddr;
+
+ return 0;
+
+err_unpin_hws:
+ i915_gem_object_unpin_map(h->hws);
+err_obj:
+ i915_gem_object_put(h->obj);
+err_hws:
+ i915_gem_object_put(h->hws);
+err_ctx:
+ kernel_context_close(h->ctx);
+ return err;
+}
+
+static u64 hws_address(const struct i915_vma *hws,
+ const struct i915_request *rq)
+{
+ return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context);
+}
+
+static int move_to_active(struct i915_vma *vma,
+ struct i915_request *rq,
+ unsigned int flags)
+{
+ int err;
+
+ err = i915_vma_move_to_active(vma, rq, flags);
+ if (err)
+ return err;
+
+ if (!i915_gem_object_has_active_reference(vma->obj)) {
+ i915_gem_object_get(vma->obj);
+ i915_gem_object_set_active_reference(vma->obj);
+ }
+
+ return 0;
+}
+
+static struct i915_request *
+hang_create_request(struct hang *h, struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *i915 = h->i915;
+ struct i915_address_space *vm =
+ h->ctx->ppgtt ? &h->ctx->ppgtt->vm : &i915->ggtt.vm;
+ struct i915_request *rq = NULL;
+ struct i915_vma *hws, *vma;
+ unsigned int flags;
+ u32 *batch;
+ int err;
+
+ if (i915_gem_object_is_active(h->obj)) {
+ struct drm_i915_gem_object *obj;
+ void *vaddr;
+
+ obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ vaddr = i915_gem_object_pin_map(obj,
+ i915_coherent_map_type(h->i915));
+ if (IS_ERR(vaddr)) {
+ i915_gem_object_put(obj);
+ return ERR_CAST(vaddr);
+ }
+
+ i915_gem_object_unpin_map(h->obj);
+ i915_gem_object_put(h->obj);
+
+ h->obj = obj;
+ h->batch = vaddr;
+ }
+
+ vma = i915_vma_instance(h->obj, vm, NULL);
+ if (IS_ERR(vma))
+ return ERR_CAST(vma);
+
+ hws = i915_vma_instance(h->hws, vm, NULL);
+ if (IS_ERR(hws))
+ return ERR_CAST(hws);
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ return ERR_PTR(err);
+
+ err = i915_vma_pin(hws, 0, 0, PIN_USER);
+ if (err)
+ goto unpin_vma;
+
+ rq = i915_request_alloc(engine, h->ctx);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto unpin_hws;
+ }
+
+ err = move_to_active(vma, rq, 0);
+ if (err)
+ goto cancel_rq;
+
+ err = move_to_active(hws, rq, 0);
+ if (err)
+ goto cancel_rq;
+
+ batch = h->batch;
+ if (INTEL_GEN(i915) >= 8) {
+ *batch++ = MI_STORE_DWORD_IMM_GEN4;
+ *batch++ = lower_32_bits(hws_address(hws, rq));
+ *batch++ = upper_32_bits(hws_address(hws, rq));
+ *batch++ = rq->fence.seqno;
+ *batch++ = MI_ARB_CHECK;
+
+ memset(batch, 0, 1024);
+ batch += 1024 / sizeof(*batch);
+
+ *batch++ = MI_ARB_CHECK;
+ *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
+ *batch++ = lower_32_bits(vma->node.start);
+ *batch++ = upper_32_bits(vma->node.start);
+ } else if (INTEL_GEN(i915) >= 6) {
+ *batch++ = MI_STORE_DWORD_IMM_GEN4;
+ *batch++ = 0;
+ *batch++ = lower_32_bits(hws_address(hws, rq));
+ *batch++ = rq->fence.seqno;
+ *batch++ = MI_ARB_CHECK;
+
+ memset(batch, 0, 1024);
+ batch += 1024 / sizeof(*batch);
+
+ *batch++ = MI_ARB_CHECK;
+ *batch++ = MI_BATCH_BUFFER_START | 1 << 8;
+ *batch++ = lower_32_bits(vma->node.start);
+ } else if (INTEL_GEN(i915) >= 4) {
+ *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *batch++ = 0;
+ *batch++ = lower_32_bits(hws_address(hws, rq));
+ *batch++ = rq->fence.seqno;
+ *batch++ = MI_ARB_CHECK;
+
+ memset(batch, 0, 1024);
+ batch += 1024 / sizeof(*batch);
+
+ *batch++ = MI_ARB_CHECK;
+ *batch++ = MI_BATCH_BUFFER_START | 2 << 6;
+ *batch++ = lower_32_bits(vma->node.start);
+ } else {
+ *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+ *batch++ = lower_32_bits(hws_address(hws, rq));
+ *batch++ = rq->fence.seqno;
+ *batch++ = MI_ARB_CHECK;
+
+ memset(batch, 0, 1024);
+ batch += 1024 / sizeof(*batch);
+
+ *batch++ = MI_ARB_CHECK;
+ *batch++ = MI_BATCH_BUFFER_START | 2 << 6;
+ *batch++ = lower_32_bits(vma->node.start);
+ }
+ *batch++ = MI_BATCH_BUFFER_END; /* not reached */
+ i915_gem_chipset_flush(h->i915);
+
+ if (rq->engine->emit_init_breadcrumb) {
+ err = rq->engine->emit_init_breadcrumb(rq);
+ if (err)
+ goto cancel_rq;
+ }
+
+ flags = 0;
+ if (INTEL_GEN(vm->i915) <= 5)
+ flags |= I915_DISPATCH_SECURE;
+
+ err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
+
+cancel_rq:
+ if (err) {
+ i915_request_skip(rq, err);
+ i915_request_add(rq);
+ }
+unpin_hws:
+ i915_vma_unpin(hws);
+unpin_vma:
+ i915_vma_unpin(vma);
+ return err ? ERR_PTR(err) : rq;
+}
+
+static u32 hws_seqno(const struct hang *h, const struct i915_request *rq)
+{
+ return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]);
+}
+
+static void hang_fini(struct hang *h)
+{
+ *h->batch = MI_BATCH_BUFFER_END;
+ i915_gem_chipset_flush(h->i915);
+
+ i915_gem_object_unpin_map(h->obj);
+ i915_gem_object_put(h->obj);
+
+ i915_gem_object_unpin_map(h->hws);
+ i915_gem_object_put(h->hws);
+
+ kernel_context_close(h->ctx);
+
+ igt_flush_test(h->i915, I915_WAIT_LOCKED);
+}
+
+static bool wait_until_running(struct hang *h, struct i915_request *rq)
+{
+ return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq),
+ rq->fence.seqno),
+ 10) &&
+ wait_for(i915_seqno_passed(hws_seqno(h, rq),
+ rq->fence.seqno),
+ 1000));
+}
+
+static int igt_hang_sanitycheck(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct i915_request *rq;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct hang h;
+ int err;
+
+ /* Basic check that we can execute our hanging batch */
+
+ mutex_lock(&i915->drm.struct_mutex);
+ err = hang_init(&h, i915);
+ if (err)
+ goto unlock;
+
+ for_each_engine(engine, i915, id) {
+ struct igt_wedge_me w;
+ long timeout;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ rq = hang_create_request(&h, engine);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ pr_err("Failed to create request for %s, err=%d\n",
+ engine->name, err);
+ goto fini;
+ }
+
+ i915_request_get(rq);
+
+ *h.batch = MI_BATCH_BUFFER_END;
+ i915_gem_chipset_flush(i915);
+
+ i915_request_add(rq);
+
+ timeout = 0;
+ igt_wedge_on_timeout(&w, i915, HZ / 10 /* 100ms timeout*/)
+ timeout = i915_request_wait(rq,
+ I915_WAIT_LOCKED,
+ MAX_SCHEDULE_TIMEOUT);
+ if (i915_reset_failed(i915))
+ timeout = -EIO;
+
+ i915_request_put(rq);
+
+ if (timeout < 0) {
+ err = timeout;
+ pr_err("Wait for request failed on %s, err=%d\n",
+ engine->name, err);
+ goto fini;
+ }
+ }
+
+fini:
+ hang_fini(&h);
+unlock:
+ mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+}
+
+static int igt_global_reset(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ unsigned int reset_count;
+ int err = 0;
+
+ /* Check that we can issue a global GPU reset */
+
+ igt_global_reset_lock(i915);
+
+ reset_count = i915_reset_count(&i915->gpu_error);
+
+ i915_reset(i915, ALL_ENGINES, NULL);
+
+ if (i915_reset_count(&i915->gpu_error) == reset_count) {
+ pr_err("No GPU reset recorded!\n");
+ err = -EINVAL;
+ }
+
+ igt_global_reset_unlock(i915);
+
+ if (i915_reset_failed(i915))
+ err = -EIO;
+
+ return err;
+}
+
+static int igt_wedged_reset(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ intel_wakeref_t wakeref;
+
+ /* Check that we can recover a wedged device with a GPU reset */
+
+ igt_global_reset_lock(i915);
+ wakeref = intel_runtime_pm_get(i915);
+
+ i915_gem_set_wedged(i915);
+
+ GEM_BUG_ON(!i915_reset_failed(i915));
+ i915_reset(i915, ALL_ENGINES, NULL);
+
+ intel_runtime_pm_put(i915, wakeref);
+ igt_global_reset_unlock(i915);
+
+ return i915_reset_failed(i915) ? -EIO : 0;
+}
+
+static bool wait_for_idle(struct intel_engine_cs *engine)
+{
+ return wait_for(intel_engine_is_idle(engine), IGT_IDLE_TIMEOUT) == 0;
+}
+
+static int igt_reset_nop(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_engine_cs *engine;
+ struct i915_gem_context *ctx;
+ unsigned int reset_count, count;
+ enum intel_engine_id id;
+ intel_wakeref_t wakeref;
+ struct drm_file *file;
+ IGT_TIMEOUT(end_time);
+ int err = 0;
+
+ /* Check that we can reset during non-user portions of requests */
+
+ file = mock_file(i915);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ mutex_lock(&i915->drm.struct_mutex);
+ ctx = live_context(i915, file);
+ mutex_unlock(&i915->drm.struct_mutex);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto out;
+ }
+
+ i915_gem_context_clear_bannable(ctx);
+ wakeref = intel_runtime_pm_get(i915);
+ reset_count = i915_reset_count(&i915->gpu_error);
+ count = 0;
+ do {
+ mutex_lock(&i915->drm.struct_mutex);
+ for_each_engine(engine, i915, id) {
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ struct i915_request *rq;
+
+ rq = i915_request_alloc(engine, ctx);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ break;
+ }
+
+ i915_request_add(rq);
+ }
+ }
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ igt_global_reset_lock(i915);
+ i915_reset(i915, ALL_ENGINES, NULL);
+ igt_global_reset_unlock(i915);
+ if (i915_reset_failed(i915)) {
+ err = -EIO;
+ break;
+ }
+
+ if (i915_reset_count(&i915->gpu_error) !=
+ reset_count + ++count) {
+ pr_err("Full GPU reset not recorded!\n");
+ err = -EINVAL;
+ break;
+ }
+
+ if (!i915_reset_flush(i915)) {
+ struct drm_printer p =
+ drm_info_printer(i915->drm.dev);
+
+ pr_err("%s failed to idle after reset\n",
+ engine->name);
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+
+ err = -EIO;
+ break;
+ }
+
+ err = igt_flush_test(i915, 0);
+ if (err)
+ break;
+ } while (time_before(jiffies, end_time));
+ pr_info("%s: %d resets\n", __func__, count);
+
+ mutex_lock(&i915->drm.struct_mutex);
+ err = igt_flush_test(i915, I915_WAIT_LOCKED);
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ intel_runtime_pm_put(i915, wakeref);
+
+out:
+ mock_file_free(i915, file);
+ if (i915_reset_failed(i915))
+ err = -EIO;
+ return err;
+}
+
+static int igt_reset_nop_engine(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_engine_cs *engine;
+ struct i915_gem_context *ctx;
+ enum intel_engine_id id;
+ intel_wakeref_t wakeref;
+ struct drm_file *file;
+ int err = 0;
+
+ /* Check that we can engine-reset during non-user portions */
+
+ if (!intel_has_reset_engine(i915))
+ return 0;
+
+ file = mock_file(i915);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ mutex_lock(&i915->drm.struct_mutex);
+ ctx = live_context(i915, file);
+ mutex_unlock(&i915->drm.struct_mutex);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto out;
+ }
+
+ i915_gem_context_clear_bannable(ctx);
+ wakeref = intel_runtime_pm_get(i915);
+ for_each_engine(engine, i915, id) {
+ unsigned int reset_count, reset_engine_count;
+ unsigned int count;
+ IGT_TIMEOUT(end_time);
+
+ reset_count = i915_reset_count(&i915->gpu_error);
+ reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
+ engine);
+ count = 0;
+
+ set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+ do {
+ int i;
+
+ if (!wait_for_idle(engine)) {
+ pr_err("%s failed to idle before reset\n",
+ engine->name);
+ err = -EIO;
+ break;
+ }
+
+ mutex_lock(&i915->drm.struct_mutex);
+ for (i = 0; i < 16; i++) {
+ struct i915_request *rq;
+
+ rq = i915_request_alloc(engine, ctx);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ break;
+ }
+
+ i915_request_add(rq);
+ }
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ err = i915_reset_engine(engine, NULL);
+ if (err) {
+ pr_err("i915_reset_engine failed\n");
+ break;
+ }
+
+ if (i915_reset_count(&i915->gpu_error) != reset_count) {
+ pr_err("Full GPU reset recorded! (engine reset expected)\n");
+ err = -EINVAL;
+ break;
+ }
+
+ if (i915_reset_engine_count(&i915->gpu_error, engine) !=
+ reset_engine_count + ++count) {
+ pr_err("%s engine reset not recorded!\n",
+ engine->name);
+ err = -EINVAL;
+ break;
+ }
+
+ if (!i915_reset_flush(i915)) {
+ struct drm_printer p =
+ drm_info_printer(i915->drm.dev);
+
+ pr_err("%s failed to idle after reset\n",
+ engine->name);
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+
+ err = -EIO;
+ break;
+ }
+ } while (time_before(jiffies, end_time));
+ clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+ pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
+
+ if (err)
+ break;
+
+ err = igt_flush_test(i915, 0);
+ if (err)
+ break;
+ }
+
+ mutex_lock(&i915->drm.struct_mutex);
+ err = igt_flush_test(i915, I915_WAIT_LOCKED);
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ intel_runtime_pm_put(i915, wakeref);
+out:
+ mock_file_free(i915, file);
+ if (i915_reset_failed(i915))
+ err = -EIO;
+ return err;
+}
+
+static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct hang h;
+ int err = 0;
+
+ /* Check that we can issue an engine reset on an idle engine (no-op) */
+
+ if (!intel_has_reset_engine(i915))
+ return 0;
+
+ if (active) {
+ mutex_lock(&i915->drm.struct_mutex);
+ err = hang_init(&h, i915);
+ mutex_unlock(&i915->drm.struct_mutex);
+ if (err)
+ return err;
+ }
+
+ for_each_engine(engine, i915, id) {
+ unsigned int reset_count, reset_engine_count;
+ IGT_TIMEOUT(end_time);
+
+ if (active && !intel_engine_can_store_dword(engine))
+ continue;
+
+ if (!wait_for_idle(engine)) {
+ pr_err("%s failed to idle before reset\n",
+ engine->name);
+ err = -EIO;
+ break;
+ }
+
+ reset_count = i915_reset_count(&i915->gpu_error);
+ reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
+ engine);
+
+ set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+ do {
+ if (active) {
+ struct i915_request *rq;
+
+ mutex_lock(&i915->drm.struct_mutex);
+ rq = hang_create_request(&h, engine);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ mutex_unlock(&i915->drm.struct_mutex);
+ break;
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ if (!wait_until_running(&h, rq)) {
+ struct drm_printer p = drm_info_printer(i915->drm.dev);
+
+ pr_err("%s: Failed to start request %llx, at %x\n",
+ __func__, rq->fence.seqno, hws_seqno(&h, rq));
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+
+ i915_request_put(rq);
+ err = -EIO;
+ break;
+ }
+
+ i915_request_put(rq);
+ }
+
+ err = i915_reset_engine(engine, NULL);
+ if (err) {
+ pr_err("i915_reset_engine failed\n");
+ break;
+ }
+
+ if (i915_reset_count(&i915->gpu_error) != reset_count) {
+ pr_err("Full GPU reset recorded! (engine reset expected)\n");
+ err = -EINVAL;
+ break;
+ }
+
+ if (i915_reset_engine_count(&i915->gpu_error, engine) !=
+ ++reset_engine_count) {
+ pr_err("%s engine reset not recorded!\n",
+ engine->name);
+ err = -EINVAL;
+ break;
+ }
+
+ if (!i915_reset_flush(i915)) {
+ struct drm_printer p =
+ drm_info_printer(i915->drm.dev);
+
+ pr_err("%s failed to idle after reset\n",
+ engine->name);
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+
+ err = -EIO;
+ break;
+ }
+ } while (time_before(jiffies, end_time));
+ clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+
+ if (err)
+ break;
+
+ err = igt_flush_test(i915, 0);
+ if (err)
+ break;
+ }
+
+ if (i915_reset_failed(i915))
+ err = -EIO;
+
+ if (active) {
+ mutex_lock(&i915->drm.struct_mutex);
+ hang_fini(&h);
+ mutex_unlock(&i915->drm.struct_mutex);
+ }
+
+ return err;
+}
+
+static int igt_reset_idle_engine(void *arg)
+{
+ return __igt_reset_engine(arg, false);
+}
+
+static int igt_reset_active_engine(void *arg)
+{
+ return __igt_reset_engine(arg, true);
+}
+
+struct active_engine {
+ struct task_struct *task;
+ struct intel_engine_cs *engine;
+ unsigned long resets;
+ unsigned int flags;
+};
+
+#define TEST_ACTIVE BIT(0)
+#define TEST_OTHERS BIT(1)
+#define TEST_SELF BIT(2)
+#define TEST_PRIORITY BIT(3)
+
+static int active_request_put(struct i915_request *rq)
+{
+ int err = 0;
+
+ if (!rq)
+ return 0;
+
+ if (i915_request_wait(rq, 0, 5 * HZ) < 0) {
+ GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld\n",
+ rq->engine->name,
+ rq->fence.context,
+ rq->fence.seqno);
+ GEM_TRACE_DUMP();
+
+ i915_gem_set_wedged(rq->i915);
+ err = -EIO;
+ }
+
+ i915_request_put(rq);
+
+ return err;
+}
+
+static int active_engine(void *data)
+{
+ I915_RND_STATE(prng);
+ struct active_engine *arg = data;
+ struct intel_engine_cs *engine = arg->engine;
+ struct i915_request *rq[8] = {};
+ struct i915_gem_context *ctx[ARRAY_SIZE(rq)];
+ struct drm_file *file;
+ unsigned long count = 0;
+ int err = 0;
+
+ file = mock_file(engine->i915);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ for (count = 0; count < ARRAY_SIZE(ctx); count++) {
+ mutex_lock(&engine->i915->drm.struct_mutex);
+ ctx[count] = live_context(engine->i915, file);
+ mutex_unlock(&engine->i915->drm.struct_mutex);
+ if (IS_ERR(ctx[count])) {
+ err = PTR_ERR(ctx[count]);
+ while (--count)
+ i915_gem_context_put(ctx[count]);
+ goto err_file;
+ }
+ }
+
+ while (!kthread_should_stop()) {
+ unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1);
+ struct i915_request *old = rq[idx];
+ struct i915_request *new;
+
+ mutex_lock(&engine->i915->drm.struct_mutex);
+ new = i915_request_alloc(engine, ctx[idx]);
+ if (IS_ERR(new)) {
+ mutex_unlock(&engine->i915->drm.struct_mutex);
+ err = PTR_ERR(new);
+ break;
+ }
+
+ if (arg->flags & TEST_PRIORITY)
+ ctx[idx]->sched.priority =
+ i915_prandom_u32_max_state(512, &prng);
+
+ rq[idx] = i915_request_get(new);
+ i915_request_add(new);
+ mutex_unlock(&engine->i915->drm.struct_mutex);
+
+ err = active_request_put(old);
+ if (err)
+ break;
+
+ cond_resched();
+ }
+
+ for (count = 0; count < ARRAY_SIZE(rq); count++) {
+ int err__ = active_request_put(rq[count]);
+
+ /* Keep the first error */
+ if (!err)
+ err = err__;
+ }
+
+err_file:
+ mock_file_free(engine->i915, file);
+ return err;
+}
+
+static int __igt_reset_engines(struct drm_i915_private *i915,
+ const char *test_name,
+ unsigned int flags)
+{
+ struct intel_engine_cs *engine, *other;
+ enum intel_engine_id id, tmp;
+ struct hang h;
+ int err = 0;
+
+ /* Check that issuing a reset on one engine does not interfere
+ * with any other engine.
+ */
+
+ if (!intel_has_reset_engine(i915))
+ return 0;
+
+ if (flags & TEST_ACTIVE) {
+ mutex_lock(&i915->drm.struct_mutex);
+ err = hang_init(&h, i915);
+ mutex_unlock(&i915->drm.struct_mutex);
+ if (err)
+ return err;
+
+ if (flags & TEST_PRIORITY)
+ h.ctx->sched.priority = 1024;
+ }
+
+ for_each_engine(engine, i915, id) {
+ struct active_engine threads[I915_NUM_ENGINES] = {};
+ unsigned long global = i915_reset_count(&i915->gpu_error);
+ unsigned long count = 0, reported;
+ IGT_TIMEOUT(end_time);
+
+ if (flags & TEST_ACTIVE &&
+ !intel_engine_can_store_dword(engine))
+ continue;
+
+ if (!wait_for_idle(engine)) {
+ pr_err("i915_reset_engine(%s:%s): failed to idle before reset\n",
+ engine->name, test_name);
+ err = -EIO;
+ break;
+ }
+
+ memset(threads, 0, sizeof(threads));
+ for_each_engine(other, i915, tmp) {
+ struct task_struct *tsk;
+
+ threads[tmp].resets =
+ i915_reset_engine_count(&i915->gpu_error,
+ other);
+
+ if (!(flags & TEST_OTHERS))
+ continue;
+
+ if (other == engine && !(flags & TEST_SELF))
+ continue;
+
+ threads[tmp].engine = other;
+ threads[tmp].flags = flags;
+
+ tsk = kthread_run(active_engine, &threads[tmp],
+ "igt/%s", other->name);
+ if (IS_ERR(tsk)) {
+ err = PTR_ERR(tsk);
+ goto unwind;
+ }
+
+ threads[tmp].task = tsk;
+ get_task_struct(tsk);
+ }
+
+ set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+ do {
+ struct i915_request *rq = NULL;
+
+ if (flags & TEST_ACTIVE) {
+ mutex_lock(&i915->drm.struct_mutex);
+ rq = hang_create_request(&h, engine);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ mutex_unlock(&i915->drm.struct_mutex);
+ break;
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ if (!wait_until_running(&h, rq)) {
+ struct drm_printer p = drm_info_printer(i915->drm.dev);
+
+ pr_err("%s: Failed to start request %llx, at %x\n",
+ __func__, rq->fence.seqno, hws_seqno(&h, rq));
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+
+ i915_request_put(rq);
+ err = -EIO;
+ break;
+ }
+ }
+
+ err = i915_reset_engine(engine, NULL);
+ if (err) {
+ pr_err("i915_reset_engine(%s:%s): failed, err=%d\n",
+ engine->name, test_name, err);
+ break;
+ }
+
+ count++;
+
+ if (rq) {
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ struct drm_printer p =
+ drm_info_printer(i915->drm.dev);
+
+ pr_err("i915_reset_engine(%s:%s):"
+ " failed to complete request after reset\n",
+ engine->name, test_name);
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+ i915_request_put(rq);
+
+ GEM_TRACE_DUMP();
+ i915_gem_set_wedged(i915);
+ err = -EIO;
+ break;
+ }
+
+ i915_request_put(rq);
+ }
+
+ if (!(flags & TEST_SELF) && !wait_for_idle(engine)) {
+ struct drm_printer p =
+ drm_info_printer(i915->drm.dev);
+
+ pr_err("i915_reset_engine(%s:%s):"
+ " failed to idle after reset\n",
+ engine->name, test_name);
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+
+ err = -EIO;
+ break;
+ }
+ } while (time_before(jiffies, end_time));
+ clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+ pr_info("i915_reset_engine(%s:%s): %lu resets\n",
+ engine->name, test_name, count);
+
+ reported = i915_reset_engine_count(&i915->gpu_error, engine);
+ reported -= threads[engine->id].resets;
+ if (reported != count) {
+ pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n",
+ engine->name, test_name, count, reported);
+ if (!err)
+ err = -EINVAL;
+ }
+
+unwind:
+ for_each_engine(other, i915, tmp) {
+ int ret;
+
+ if (!threads[tmp].task)
+ continue;
+
+ ret = kthread_stop(threads[tmp].task);
+ if (ret) {
+ pr_err("kthread for other engine %s failed, err=%d\n",
+ other->name, ret);
+ if (!err)
+ err = ret;
+ }
+ put_task_struct(threads[tmp].task);
+
+ if (other != engine &&
+ threads[tmp].resets !=
+ i915_reset_engine_count(&i915->gpu_error, other)) {
+ pr_err("Innocent engine %s was reset (count=%ld)\n",
+ other->name,
+ i915_reset_engine_count(&i915->gpu_error,
+ other) -
+ threads[tmp].resets);
+ if (!err)
+ err = -EINVAL;
+ }
+ }
+
+ if (global != i915_reset_count(&i915->gpu_error)) {
+ pr_err("Global reset (count=%ld)!\n",
+ i915_reset_count(&i915->gpu_error) - global);
+ if (!err)
+ err = -EINVAL;
+ }
+
+ if (err)
+ break;
+
+ err = igt_flush_test(i915, 0);
+ if (err)
+ break;
+ }
+
+ if (i915_reset_failed(i915))
+ err = -EIO;
+
+ if (flags & TEST_ACTIVE) {
+ mutex_lock(&i915->drm.struct_mutex);
+ hang_fini(&h);
+ mutex_unlock(&i915->drm.struct_mutex);
+ }
+
+ return err;
+}
+
+static int igt_reset_engines(void *arg)
+{
+ static const struct {
+ const char *name;
+ unsigned int flags;
+ } phases[] = {
+ { "idle", 0 },
+ { "active", TEST_ACTIVE },
+ { "others-idle", TEST_OTHERS },
+ { "others-active", TEST_OTHERS | TEST_ACTIVE },
+ {
+ "others-priority",
+ TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY
+ },
+ {
+ "self-priority",
+ TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY | TEST_SELF,
+ },
+ { }
+ };
+ struct drm_i915_private *i915 = arg;
+ typeof(*phases) *p;
+ int err;
+
+ for (p = phases; p->name; p++) {
+ if (p->flags & TEST_PRIORITY) {
+ if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
+ continue;
+ }
+
+ err = __igt_reset_engines(arg, p->name, p->flags);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static u32 fake_hangcheck(struct drm_i915_private *i915,
+ intel_engine_mask_t mask)
+{
+ u32 count = i915_reset_count(&i915->gpu_error);
+
+ i915_reset(i915, mask, NULL);
+
+ return count;
+}
+
+static int igt_reset_wait(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct i915_request *rq;
+ unsigned int reset_count;
+ struct hang h;
+ long timeout;
+ int err;
+
+ if (!intel_engine_can_store_dword(i915->engine[RCS0]))
+ return 0;
+
+ /* Check that we detect a stuck waiter and issue a reset */
+
+ igt_global_reset_lock(i915);
+
+ mutex_lock(&i915->drm.struct_mutex);
+ err = hang_init(&h, i915);
+ if (err)
+ goto unlock;
+
+ rq = hang_create_request(&h, i915->engine[RCS0]);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto fini;
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (!wait_until_running(&h, rq)) {
+ struct drm_printer p = drm_info_printer(i915->drm.dev);
+
+ pr_err("%s: Failed to start request %llx, at %x\n",
+ __func__, rq->fence.seqno, hws_seqno(&h, rq));
+ intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
+
+ i915_gem_set_wedged(i915);
+
+ err = -EIO;
+ goto out_rq;
+ }
+
+ reset_count = fake_hangcheck(i915, ALL_ENGINES);
+
+ timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10);
+ if (timeout < 0) {
+ pr_err("i915_request_wait failed on a stuck request: err=%ld\n",
+ timeout);
+ err = timeout;
+ goto out_rq;
+ }
+
+ if (i915_reset_count(&i915->gpu_error) == reset_count) {
+ pr_err("No GPU reset recorded!\n");
+ err = -EINVAL;
+ goto out_rq;
+ }
+
+out_rq:
+ i915_request_put(rq);
+fini:
+ hang_fini(&h);
+unlock:
+ mutex_unlock(&i915->drm.struct_mutex);
+ igt_global_reset_unlock(i915);
+
+ if (i915_reset_failed(i915))
+ return -EIO;
+
+ return err;
+}
+
+struct evict_vma {
+ struct completion completion;
+ struct i915_vma *vma;
+};
+
+static int evict_vma(void *data)
+{
+ struct evict_vma *arg = data;
+ struct i915_address_space *vm = arg->vma->vm;
+ struct drm_i915_private *i915 = vm->i915;
+ struct drm_mm_node evict = arg->vma->node;
+ int err;
+
+ complete(&arg->completion);
+
+ mutex_lock(&i915->drm.struct_mutex);
+ err = i915_gem_evict_for_node(vm, &evict, 0);
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ return err;
+}
+
+static int evict_fence(void *data)
+{
+ struct evict_vma *arg = data;
+ struct drm_i915_private *i915 = arg->vma->vm->i915;
+ int err;
+
+ complete(&arg->completion);
+
+ mutex_lock(&i915->drm.struct_mutex);
+
+ /* Mark the fence register as dirty to force the mmio update. */
+ err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512);
+ if (err) {
+ pr_err("Invalid Y-tiling settings; err:%d\n", err);
+ goto out_unlock;
+ }
+
+ err = i915_vma_pin_fence(arg->vma);
+ if (err) {
+ pr_err("Unable to pin Y-tiled fence; err:%d\n", err);
+ goto out_unlock;
+ }
+
+ i915_vma_unpin_fence(arg->vma);
+
+out_unlock:
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ return err;
+}
+
+static int __igt_reset_evict_vma(struct drm_i915_private *i915,
+ struct i915_address_space *vm,
+ int (*fn)(void *),
+ unsigned int flags)
+{
+ struct drm_i915_gem_object *obj;
+ struct task_struct *tsk = NULL;
+ struct i915_request *rq;
+ struct evict_vma arg;
+ struct hang h;
+ int err;
+
+ if (!intel_engine_can_store_dword(i915->engine[RCS0]))
+ return 0;
+
+ /* Check that we can recover an unbind stuck on a hanging request */
+
+ mutex_lock(&i915->drm.struct_mutex);
+ err = hang_init(&h, i915);
+ if (err)
+ goto unlock;
+
+ obj = i915_gem_object_create_internal(i915, SZ_1M);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto fini;
+ }
+
+ if (flags & EXEC_OBJECT_NEEDS_FENCE) {
+ err = i915_gem_object_set_tiling(obj, I915_TILING_X, 512);
+ if (err) {
+ pr_err("Invalid X-tiling settings; err:%d\n", err);
+ goto out_obj;
+ }
+ }
+
+ arg.vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(arg.vma)) {
+ err = PTR_ERR(arg.vma);
+ goto out_obj;
+ }
+
+ rq = hang_create_request(&h, i915->engine[RCS0]);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_obj;
+ }
+
+ err = i915_vma_pin(arg.vma, 0, 0,
+ i915_vma_is_ggtt(arg.vma) ?
+ PIN_GLOBAL | PIN_MAPPABLE :
+ PIN_USER);
+ if (err) {
+ i915_request_add(rq);
+ goto out_obj;
+ }
+
+ if (flags & EXEC_OBJECT_NEEDS_FENCE) {
+ err = i915_vma_pin_fence(arg.vma);
+ if (err) {
+ pr_err("Unable to pin X-tiled fence; err:%d\n", err);
+ i915_vma_unpin(arg.vma);
+ i915_request_add(rq);
+ goto out_obj;
+ }
+ }
+
+ err = i915_vma_move_to_active(arg.vma, rq, flags);
+
+ if (flags & EXEC_OBJECT_NEEDS_FENCE)
+ i915_vma_unpin_fence(arg.vma);
+ i915_vma_unpin(arg.vma);
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (err)
+ goto out_rq;
+
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ if (!wait_until_running(&h, rq)) {
+ struct drm_printer p = drm_info_printer(i915->drm.dev);
+
+ pr_err("%s: Failed to start request %llx, at %x\n",
+ __func__, rq->fence.seqno, hws_seqno(&h, rq));
+ intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
+
+ i915_gem_set_wedged(i915);
+ goto out_reset;
+ }
+
+ init_completion(&arg.completion);
+
+ tsk = kthread_run(fn, &arg, "igt/evict_vma");
+ if (IS_ERR(tsk)) {
+ err = PTR_ERR(tsk);
+ tsk = NULL;
+ goto out_reset;
+ }
+ get_task_struct(tsk);
+
+ wait_for_completion(&arg.completion);
+
+ if (wait_for(!list_empty(&rq->fence.cb_list), 10)) {
+ struct drm_printer p = drm_info_printer(i915->drm.dev);
+
+ pr_err("igt/evict_vma kthread did not wait\n");
+ intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
+
+ i915_gem_set_wedged(i915);
+ goto out_reset;
+ }
+
+out_reset:
+ igt_global_reset_lock(i915);
+ fake_hangcheck(rq->i915, rq->engine->mask);
+ igt_global_reset_unlock(i915);
+
+ if (tsk) {
+ struct igt_wedge_me w;
+
+ /* The reset, even indirectly, should take less than 10ms. */
+ igt_wedge_on_timeout(&w, i915, HZ / 10 /* 100ms timeout*/)
+ err = kthread_stop(tsk);
+
+ put_task_struct(tsk);
+ }
+
+ mutex_lock(&i915->drm.struct_mutex);
+out_rq:
+ i915_request_put(rq);
+out_obj:
+ i915_gem_object_put(obj);
+fini:
+ hang_fini(&h);
+unlock:
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ if (i915_reset_failed(i915))
+ return -EIO;
+
+ return err;
+}
+
+static int igt_reset_evict_ggtt(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+
+ return __igt_reset_evict_vma(i915, &i915->ggtt.vm,
+ evict_vma, EXEC_OBJECT_WRITE);
+}
+
+static int igt_reset_evict_ppgtt(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct i915_gem_context *ctx;
+ struct drm_file *file;
+ int err;
+
+ file = mock_file(i915);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ mutex_lock(&i915->drm.struct_mutex);
+ ctx = live_context(i915, file);
+ mutex_unlock(&i915->drm.struct_mutex);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto out;
+ }
+
+ err = 0;
+ if (ctx->ppgtt) /* aliasing == global gtt locking, covered above */
+ err = __igt_reset_evict_vma(i915, &ctx->ppgtt->vm,
+ evict_vma, EXEC_OBJECT_WRITE);
+
+out:
+ mock_file_free(i915, file);
+ return err;
+}
+
+static int igt_reset_evict_fence(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+
+ return __igt_reset_evict_vma(i915, &i915->ggtt.vm,
+ evict_fence, EXEC_OBJECT_NEEDS_FENCE);
+}
+
+static int wait_for_others(struct drm_i915_private *i915,
+ struct intel_engine_cs *exclude)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine(engine, i915, id) {
+ if (engine == exclude)
+ continue;
+
+ if (!wait_for_idle(engine))
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int igt_reset_queue(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct hang h;
+ int err;
+
+ /* Check that we replay pending requests following a hang */
+
+ igt_global_reset_lock(i915);
+
+ mutex_lock(&i915->drm.struct_mutex);
+ err = hang_init(&h, i915);
+ if (err)
+ goto unlock;
+
+ for_each_engine(engine, i915, id) {
+ struct i915_request *prev;
+ IGT_TIMEOUT(end_time);
+ unsigned int count;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ prev = hang_create_request(&h, engine);
+ if (IS_ERR(prev)) {
+ err = PTR_ERR(prev);
+ goto fini;
+ }
+
+ i915_request_get(prev);
+ i915_request_add(prev);
+
+ count = 0;
+ do {
+ struct i915_request *rq;
+ unsigned int reset_count;
+
+ rq = hang_create_request(&h, engine);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto fini;
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ /*
+ * XXX We don't handle resetting the kernel context
+ * very well. If we trigger a device reset twice in
+ * quick succession while the kernel context is
+ * executing, we may end up skipping the breadcrumb.
+ * This is really only a problem for the selftest as
+ * normally there is a large interlude between resets
+ * (hangcheck), or we focus on resetting just one
+ * engine and so avoid repeatedly resetting innocents.
+ */
+ err = wait_for_others(i915, engine);
+ if (err) {
+ pr_err("%s(%s): Failed to idle other inactive engines after device reset\n",
+ __func__, engine->name);
+ i915_request_put(rq);
+ i915_request_put(prev);
+
+ GEM_TRACE_DUMP();
+ i915_gem_set_wedged(i915);
+ goto fini;
+ }
+
+ if (!wait_until_running(&h, prev)) {
+ struct drm_printer p = drm_info_printer(i915->drm.dev);
+
+ pr_err("%s(%s): Failed to start request %llx, at %x\n",
+ __func__, engine->name,
+ prev->fence.seqno, hws_seqno(&h, prev));
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+
+ i915_request_put(rq);
+ i915_request_put(prev);
+
+ i915_gem_set_wedged(i915);
+
+ err = -EIO;
+ goto fini;
+ }
+
+ reset_count = fake_hangcheck(i915, BIT(id));
+
+ if (prev->fence.error != -EIO) {
+ pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
+ prev->fence.error);
+ i915_request_put(rq);
+ i915_request_put(prev);
+ err = -EINVAL;
+ goto fini;
+ }
+
+ if (rq->fence.error) {
+ pr_err("Fence error status not zero [%d] after unrelated reset\n",
+ rq->fence.error);
+ i915_request_put(rq);
+ i915_request_put(prev);
+ err = -EINVAL;
+ goto fini;
+ }
+
+ if (i915_reset_count(&i915->gpu_error) == reset_count) {
+ pr_err("No GPU reset recorded!\n");
+ i915_request_put(rq);
+ i915_request_put(prev);
+ err = -EINVAL;
+ goto fini;
+ }
+
+ i915_request_put(prev);
+ prev = rq;
+ count++;
+ } while (time_before(jiffies, end_time));
+ pr_info("%s: Completed %d resets\n", engine->name, count);
+
+ *h.batch = MI_BATCH_BUFFER_END;
+ i915_gem_chipset_flush(i915);
+
+ i915_request_put(prev);
+
+ err = igt_flush_test(i915, I915_WAIT_LOCKED);
+ if (err)
+ break;
+ }
+
+fini:
+ hang_fini(&h);
+unlock:
+ mutex_unlock(&i915->drm.struct_mutex);
+ igt_global_reset_unlock(i915);
+
+ if (i915_reset_failed(i915))
+ return -EIO;
+
+ return err;
+}
+
+static int igt_handle_error(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_engine_cs *engine = i915->engine[RCS0];
+ struct hang h;
+ struct i915_request *rq;
+ struct i915_gpu_state *error;
+ int err;
+
+ /* Check that we can issue a global GPU and engine reset */
+
+ if (!intel_has_reset_engine(i915))
+ return 0;
+
+ if (!engine || !intel_engine_can_store_dword(engine))
+ return 0;
+
+ mutex_lock(&i915->drm.struct_mutex);
+
+ err = hang_init(&h, i915);
+ if (err)
+ goto err_unlock;
+
+ rq = hang_create_request(&h, engine);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_fini;
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (!wait_until_running(&h, rq)) {
+ struct drm_printer p = drm_info_printer(i915->drm.dev);
+
+ pr_err("%s: Failed to start request %llx, at %x\n",
+ __func__, rq->fence.seqno, hws_seqno(&h, rq));
+ intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
+
+ i915_gem_set_wedged(i915);
+
+ err = -EIO;
+ goto err_request;
+ }
+
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ /* Temporarily disable error capture */
+ error = xchg(&i915->gpu_error.first_error, (void *)-1);
+
+ i915_handle_error(i915, engine->mask, 0, NULL);
+
+ xchg(&i915->gpu_error.first_error, error);
+
+ mutex_lock(&i915->drm.struct_mutex);
+
+ if (rq->fence.error != -EIO) {
+ pr_err("Guilty request not identified!\n");
+ err = -EINVAL;
+ goto err_request;
+ }
+
+err_request:
+ i915_request_put(rq);
+err_fini:
+ hang_fini(&h);
+err_unlock:
+ mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+}
+
+static void __preempt_begin(void)
+{
+ preempt_disable();
+}
+
+static void __preempt_end(void)
+{
+ preempt_enable();
+}
+
+static void __softirq_begin(void)
+{
+ local_bh_disable();
+}
+
+static void __softirq_end(void)
+{
+ local_bh_enable();
+}
+
+static void __hardirq_begin(void)
+{
+ local_irq_disable();
+}
+
+static void __hardirq_end(void)
+{
+ local_irq_enable();
+}
+
+struct atomic_section {
+ const char *name;
+ void (*critical_section_begin)(void);
+ void (*critical_section_end)(void);
+};
+
+static int __igt_atomic_reset_engine(struct intel_engine_cs *engine,
+ const struct atomic_section *p,
+ const char *mode)
+{
+ struct tasklet_struct * const t = &engine->execlists.tasklet;
+ int err;
+
+ GEM_TRACE("i915_reset_engine(%s:%s) under %s\n",
+ engine->name, mode, p->name);
+
+ tasklet_disable_nosync(t);
+ p->critical_section_begin();
+
+ err = i915_reset_engine(engine, NULL);
+
+ p->critical_section_end();
+ tasklet_enable(t);
+
+ if (err)
+ pr_err("i915_reset_engine(%s:%s) failed under %s\n",
+ engine->name, mode, p->name);
+
+ return err;
+}
+
+static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
+ const struct atomic_section *p)
+{
+ struct drm_i915_private *i915 = engine->i915;
+ struct i915_request *rq;
+ struct hang h;
+ int err;
+
+ err = __igt_atomic_reset_engine(engine, p, "idle");
+ if (err)
+ return err;
+
+ err = hang_init(&h, i915);
+ if (err)
+ return err;
+
+ rq = hang_create_request(&h, engine);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out;
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (wait_until_running(&h, rq)) {
+ err = __igt_atomic_reset_engine(engine, p, "active");
+ } else {
+ pr_err("%s(%s): Failed to start request %llx, at %x\n",
+ __func__, engine->name,
+ rq->fence.seqno, hws_seqno(&h, rq));
+ i915_gem_set_wedged(i915);
+ err = -EIO;
+ }
+
+ if (err == 0) {
+ struct igt_wedge_me w;
+
+ igt_wedge_on_timeout(&w, i915, HZ / 20 /* 50ms timeout*/)
+ i915_request_wait(rq,
+ I915_WAIT_LOCKED,
+ MAX_SCHEDULE_TIMEOUT);
+ if (i915_reset_failed(i915))
+ err = -EIO;
+ }
+
+ i915_request_put(rq);
+out:
+ hang_fini(&h);
+ return err;
+}
+
+static void force_reset(struct drm_i915_private *i915)
+{
+ i915_gem_set_wedged(i915);
+ i915_reset(i915, 0, NULL);
+}
+
+static int igt_atomic_reset(void *arg)
+{
+ static const struct atomic_section phases[] = {
+ { "preempt", __preempt_begin, __preempt_end },
+ { "softirq", __softirq_begin, __softirq_end },
+ { "hardirq", __hardirq_begin, __hardirq_end },
+ { }
+ };
+ struct drm_i915_private *i915 = arg;
+ intel_wakeref_t wakeref;
+ int err = 0;
+
+ /* Check that the resets are usable from atomic context */
+
+ igt_global_reset_lock(i915);
+ mutex_lock(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(i915);
+
+ /* Flush any requests before we get started and check basics */
+ force_reset(i915);
+ if (i915_reset_failed(i915))
+ goto unlock;
+
+ if (intel_has_gpu_reset(i915)) {
+ const typeof(*phases) *p;
+
+ for (p = phases; p->name; p++) {
+ GEM_TRACE("intel_gpu_reset under %s\n", p->name);
+
+ p->critical_section_begin();
+ err = intel_gpu_reset(i915, ALL_ENGINES);
+ p->critical_section_end();
+
+ if (err) {
+ pr_err("intel_gpu_reset failed under %s\n",
+ p->name);
+ goto out;
+ }
+ }
+
+ force_reset(i915);
+ }
+
+ if (USES_GUC_SUBMISSION(i915))
+ goto unlock;
+
+ if (intel_has_reset_engine(i915)) {
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine(engine, i915, id) {
+ const typeof(*phases) *p;
+
+ for (p = phases; p->name; p++) {
+ err = igt_atomic_reset_engine(engine, p);
+ if (err)
+ goto out;
+ }
+ }
+ }
+
+out:
+ /* As we poke around the guts, do a full reset before continuing. */
+ force_reset(i915);
+
+unlock:
+ intel_runtime_pm_put(i915, wakeref);
+ mutex_unlock(&i915->drm.struct_mutex);
+ igt_global_reset_unlock(i915);
+
+ return err;
+}
+
+int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(igt_global_reset), /* attempt to recover GPU first */
+ SUBTEST(igt_wedged_reset),
+ SUBTEST(igt_hang_sanitycheck),
+ SUBTEST(igt_reset_nop),
+ SUBTEST(igt_reset_nop_engine),
+ SUBTEST(igt_reset_idle_engine),
+ SUBTEST(igt_reset_active_engine),
+ SUBTEST(igt_reset_engines),
+ SUBTEST(igt_reset_queue),
+ SUBTEST(igt_reset_wait),
+ SUBTEST(igt_reset_evict_ggtt),
+ SUBTEST(igt_reset_evict_ppgtt),
+ SUBTEST(igt_reset_evict_fence),
+ SUBTEST(igt_handle_error),
+ SUBTEST(igt_atomic_reset),
+ };
+ intel_wakeref_t wakeref;
+ bool saved_hangcheck;
+ int err;
+
+ if (!intel_has_gpu_reset(i915))
+ return 0;
+
+ if (i915_terminally_wedged(i915))
+ return -EIO; /* we're long past hope of a successful reset */
+
+ wakeref = intel_runtime_pm_get(i915);
+ saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
+ drain_delayed_work(&i915->gpu_error.hangcheck_work); /* flush param */
+
+ err = i915_subtests(tests, i915);
+
+ mutex_lock(&i915->drm.struct_mutex);
+ igt_flush_test(i915, I915_WAIT_LOCKED);
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ i915_modparams.enable_hangcheck = saved_hangcheck;
+ intel_runtime_pm_put(i915, wakeref);
+
+ return err;
+}
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include <linux/prime_numbers.h>
+
+#include "gt/intel_reset.h"
+#include "i915_selftest.h"
+#include "selftests/i915_random.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/igt_live_test.h"
+#include "selftests/igt_spinner.h"
+#include "selftests/mock_context.h"
+
+static int live_sanitycheck(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_engine_cs *engine;
+ struct i915_gem_context *ctx;
+ enum intel_engine_id id;
+ struct igt_spinner spin;
+ intel_wakeref_t wakeref;
+ int err = -ENOMEM;
+
+ if (!HAS_LOGICAL_RING_CONTEXTS(i915))
+ return 0;
+
+ mutex_lock(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(i915);
+
+ if (igt_spinner_init(&spin, i915))
+ goto err_unlock;
+
+ ctx = kernel_context(i915);
+ if (!ctx)
+ goto err_spin;
+
+ for_each_engine(engine, i915, id) {
+ struct i915_request *rq;
+
+ rq = igt_spinner_create_request(&spin, ctx, engine, MI_NOOP);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_ctx;
+ }
+
+ i915_request_add(rq);
+ if (!igt_wait_for_spinner(&spin, rq)) {
+ GEM_TRACE("spinner failed to start\n");
+ GEM_TRACE_DUMP();
+ i915_gem_set_wedged(i915);
+ err = -EIO;
+ goto err_ctx;
+ }
+
+ igt_spinner_end(&spin);
+ if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+ err = -EIO;
+ goto err_ctx;
+ }
+ }
+
+ err = 0;
+err_ctx:
+ kernel_context_close(ctx);
+err_spin:
+ igt_spinner_fini(&spin);
+err_unlock:
+ igt_flush_test(i915, I915_WAIT_LOCKED);
+ intel_runtime_pm_put(i915, wakeref);
+ mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+}
+
+static int live_busywait_preempt(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct i915_gem_context *ctx_hi, *ctx_lo;
+ struct intel_engine_cs *engine;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ enum intel_engine_id id;
+ intel_wakeref_t wakeref;
+ int err = -ENOMEM;
+ u32 *map;
+
+ /*
+ * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
+ * preempt the busywaits used to synchronise between rings.
+ */
+
+ mutex_lock(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(i915);
+
+ ctx_hi = kernel_context(i915);
+ if (!ctx_hi)
+ goto err_unlock;
+ ctx_hi->sched.priority = INT_MAX;
+
+ ctx_lo = kernel_context(i915);
+ if (!ctx_lo)
+ goto err_ctx_hi;
+ ctx_lo->sched.priority = INT_MIN;
+
+ obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto err_ctx_lo;
+ }
+
+ map = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(map)) {
+ err = PTR_ERR(map);
+ goto err_obj;
+ }
+
+ vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_map;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+ if (err)
+ goto err_map;
+
+ for_each_engine(engine, i915, id) {
+ struct i915_request *lo, *hi;
+ struct igt_live_test t;
+ u32 *cs;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+ err = -EIO;
+ goto err_vma;
+ }
+
+ /*
+ * We create two requests. The low priority request
+ * busywaits on a semaphore (inside the ringbuffer where
+ * is should be preemptible) and the high priority requests
+ * uses a MI_STORE_DWORD_IMM to update the semaphore value
+ * allowing the first request to complete. If preemption
+ * fails, we hang instead.
+ */
+
+ lo = i915_request_alloc(engine, ctx_lo);
+ if (IS_ERR(lo)) {
+ err = PTR_ERR(lo);
+ goto err_vma;
+ }
+
+ cs = intel_ring_begin(lo, 8);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ i915_request_add(lo);
+ goto err_vma;
+ }
+
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = i915_ggtt_offset(vma);
+ *cs++ = 0;
+ *cs++ = 1;
+
+ /* XXX Do we need a flush + invalidate here? */
+
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD;
+ *cs++ = 0;
+ *cs++ = i915_ggtt_offset(vma);
+ *cs++ = 0;
+
+ intel_ring_advance(lo, cs);
+ i915_request_add(lo);
+
+ if (wait_for(READ_ONCE(*map), 10)) {
+ err = -ETIMEDOUT;
+ goto err_vma;
+ }
+
+ /* Low priority request should be busywaiting now */
+ if (i915_request_wait(lo, I915_WAIT_LOCKED, 1) != -ETIME) {
+ pr_err("%s: Busywaiting request did not!\n",
+ engine->name);
+ err = -EIO;
+ goto err_vma;
+ }
+
+ hi = i915_request_alloc(engine, ctx_hi);
+ if (IS_ERR(hi)) {
+ err = PTR_ERR(hi);
+ goto err_vma;
+ }
+
+ cs = intel_ring_begin(hi, 4);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ i915_request_add(hi);
+ goto err_vma;
+ }
+
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = i915_ggtt_offset(vma);
+ *cs++ = 0;
+ *cs++ = 0;
+
+ intel_ring_advance(hi, cs);
+ i915_request_add(hi);
+
+ if (i915_request_wait(lo, I915_WAIT_LOCKED, HZ / 5) < 0) {
+ struct drm_printer p = drm_info_printer(i915->drm.dev);
+
+ pr_err("%s: Failed to preempt semaphore busywait!\n",
+ engine->name);
+
+ intel_engine_dump(engine, &p, "%s\n", engine->name);
+ GEM_TRACE_DUMP();
+
+ i915_gem_set_wedged(i915);
+ err = -EIO;
+ goto err_vma;
+ }
+ GEM_BUG_ON(READ_ONCE(*map));
+
+ if (igt_live_test_end(&t)) {
+ err = -EIO;
+ goto err_vma;
+ }
+ }
+
+ err = 0;
+err_vma:
+ i915_vma_unpin(vma);
+err_map:
+ i915_gem_object_unpin_map(obj);
+err_obj:
+ i915_gem_object_put(obj);
+err_ctx_lo:
+ kernel_context_close(ctx_lo);
+err_ctx_hi:
+ kernel_context_close(ctx_hi);
+err_unlock:
+ if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ err = -EIO;
+ intel_runtime_pm_put(i915, wakeref);
+ mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+}
+
+static int live_preempt(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct i915_gem_context *ctx_hi, *ctx_lo;
+ struct igt_spinner spin_hi, spin_lo;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ intel_wakeref_t wakeref;
+ int err = -ENOMEM;
+
+ if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ return 0;
+
+ if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
+ pr_err("Logical preemption supported, but not exposed\n");
+
+ mutex_lock(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(i915);
+
+ if (igt_spinner_init(&spin_hi, i915))
+ goto err_unlock;
+
+ if (igt_spinner_init(&spin_lo, i915))
+ goto err_spin_hi;
+
+ ctx_hi = kernel_context(i915);
+ if (!ctx_hi)
+ goto err_spin_lo;
+ ctx_hi->sched.priority =
+ I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
+
+ ctx_lo = kernel_context(i915);
+ if (!ctx_lo)
+ goto err_ctx_hi;
+ ctx_lo->sched.priority =
+ I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
+
+ for_each_engine(engine, i915, id) {
+ struct igt_live_test t;
+ struct i915_request *rq;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+ err = -EIO;
+ goto err_ctx_lo;
+ }
+
+ rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_ctx_lo;
+ }
+
+ i915_request_add(rq);
+ if (!igt_wait_for_spinner(&spin_lo, rq)) {
+ GEM_TRACE("lo spinner failed to start\n");
+ GEM_TRACE_DUMP();
+ i915_gem_set_wedged(i915);
+ err = -EIO;
+ goto err_ctx_lo;
+ }
+
+ rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq)) {
+ igt_spinner_end(&spin_lo);
+ err = PTR_ERR(rq);
+ goto err_ctx_lo;
+ }
+
+ i915_request_add(rq);
+ if (!igt_wait_for_spinner(&spin_hi, rq)) {
+ GEM_TRACE("hi spinner failed to start\n");
+ GEM_TRACE_DUMP();
+ i915_gem_set_wedged(i915);
+ err = -EIO;
+ goto err_ctx_lo;
+ }
+
+ igt_spinner_end(&spin_hi);
+ igt_spinner_end(&spin_lo);
+
+ if (igt_live_test_end(&t)) {
+ err = -EIO;
+ goto err_ctx_lo;
+ }
+ }
+
+ err = 0;
+err_ctx_lo:
+ kernel_context_close(ctx_lo);
+err_ctx_hi:
+ kernel_context_close(ctx_hi);
+err_spin_lo:
+ igt_spinner_fini(&spin_lo);
+err_spin_hi:
+ igt_spinner_fini(&spin_hi);
+err_unlock:
+ igt_flush_test(i915, I915_WAIT_LOCKED);
+ intel_runtime_pm_put(i915, wakeref);
+ mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+}
+
+static int live_late_preempt(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct i915_gem_context *ctx_hi, *ctx_lo;
+ struct igt_spinner spin_hi, spin_lo;
+ struct intel_engine_cs *engine;
+ struct i915_sched_attr attr = {};
+ enum intel_engine_id id;
+ intel_wakeref_t wakeref;
+ int err = -ENOMEM;
+
+ if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ return 0;
+
+ mutex_lock(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(i915);
+
+ if (igt_spinner_init(&spin_hi, i915))
+ goto err_unlock;
+
+ if (igt_spinner_init(&spin_lo, i915))
+ goto err_spin_hi;
+
+ ctx_hi = kernel_context(i915);
+ if (!ctx_hi)
+ goto err_spin_lo;
+
+ ctx_lo = kernel_context(i915);
+ if (!ctx_lo)
+ goto err_ctx_hi;
+
+ for_each_engine(engine, i915, id) {
+ struct igt_live_test t;
+ struct i915_request *rq;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+ err = -EIO;
+ goto err_ctx_lo;
+ }
+
+ rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_ctx_lo;
+ }
+
+ i915_request_add(rq);
+ if (!igt_wait_for_spinner(&spin_lo, rq)) {
+ pr_err("First context failed to start\n");
+ goto err_wedged;
+ }
+
+ rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine,
+ MI_NOOP);
+ if (IS_ERR(rq)) {
+ igt_spinner_end(&spin_lo);
+ err = PTR_ERR(rq);
+ goto err_ctx_lo;
+ }
+
+ i915_request_add(rq);
+ if (igt_wait_for_spinner(&spin_hi, rq)) {
+ pr_err("Second context overtook first?\n");
+ goto err_wedged;
+ }
+
+ attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
+ engine->schedule(rq, &attr);
+
+ if (!igt_wait_for_spinner(&spin_hi, rq)) {
+ pr_err("High priority context failed to preempt the low priority context\n");
+ GEM_TRACE_DUMP();
+ goto err_wedged;
+ }
+
+ igt_spinner_end(&spin_hi);
+ igt_spinner_end(&spin_lo);
+
+ if (igt_live_test_end(&t)) {
+ err = -EIO;
+ goto err_ctx_lo;
+ }
+ }
+
+ err = 0;
+err_ctx_lo:
+ kernel_context_close(ctx_lo);
+err_ctx_hi:
+ kernel_context_close(ctx_hi);
+err_spin_lo:
+ igt_spinner_fini(&spin_lo);
+err_spin_hi:
+ igt_spinner_fini(&spin_hi);
+err_unlock:
+ igt_flush_test(i915, I915_WAIT_LOCKED);
+ intel_runtime_pm_put(i915, wakeref);
+ mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+
+err_wedged:
+ igt_spinner_end(&spin_hi);
+ igt_spinner_end(&spin_lo);
+ i915_gem_set_wedged(i915);
+ err = -EIO;
+ goto err_ctx_lo;
+}
+
+struct preempt_client {
+ struct igt_spinner spin;
+ struct i915_gem_context *ctx;
+};
+
+static int preempt_client_init(struct drm_i915_private *i915,
+ struct preempt_client *c)
+{
+ c->ctx = kernel_context(i915);
+ if (!c->ctx)
+ return -ENOMEM;
+
+ if (igt_spinner_init(&c->spin, i915))
+ goto err_ctx;
+
+ return 0;
+
+err_ctx:
+ kernel_context_close(c->ctx);
+ return -ENOMEM;
+}
+
+static void preempt_client_fini(struct preempt_client *c)
+{
+ igt_spinner_fini(&c->spin);
+ kernel_context_close(c->ctx);
+}
+
+static int live_suppress_self_preempt(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_engine_cs *engine;
+ struct i915_sched_attr attr = {
+ .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
+ };
+ struct preempt_client a, b;
+ enum intel_engine_id id;
+ intel_wakeref_t wakeref;
+ int err = -ENOMEM;
+
+ /*
+ * Verify that if a preemption request does not cause a change in
+ * the current execution order, the preempt-to-idle injection is
+ * skipped and that we do not accidentally apply it after the CS
+ * completion event.
+ */
+
+ if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ return 0;
+
+ if (USES_GUC_SUBMISSION(i915))
+ return 0; /* presume black blox */
+
+ mutex_lock(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(i915);
+
+ if (preempt_client_init(i915, &a))
+ goto err_unlock;
+ if (preempt_client_init(i915, &b))
+ goto err_client_a;
+
+ for_each_engine(engine, i915, id) {
+ struct i915_request *rq_a, *rq_b;
+ int depth;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ engine->execlists.preempt_hang.count = 0;
+
+ rq_a = igt_spinner_create_request(&a.spin,
+ a.ctx, engine,
+ MI_NOOP);
+ if (IS_ERR(rq_a)) {
+ err = PTR_ERR(rq_a);
+ goto err_client_b;
+ }
+
+ i915_request_add(rq_a);
+ if (!igt_wait_for_spinner(&a.spin, rq_a)) {
+ pr_err("First client failed to start\n");
+ goto err_wedged;
+ }
+
+ for (depth = 0; depth < 8; depth++) {
+ rq_b = igt_spinner_create_request(&b.spin,
+ b.ctx, engine,
+ MI_NOOP);
+ if (IS_ERR(rq_b)) {
+ err = PTR_ERR(rq_b);
+ goto err_client_b;
+ }
+ i915_request_add(rq_b);
+
+ GEM_BUG_ON(i915_request_completed(rq_a));
+ engine->schedule(rq_a, &attr);
+ igt_spinner_end(&a.spin);
+
+ if (!igt_wait_for_spinner(&b.spin, rq_b)) {
+ pr_err("Second client failed to start\n");
+ goto err_wedged;
+ }
+
+ swap(a, b);
+ rq_a = rq_b;
+ }
+ igt_spinner_end(&a.spin);
+
+ if (engine->execlists.preempt_hang.count) {
+ pr_err("Preemption recorded x%d, depth %d; should have been suppressed!\n",
+ engine->execlists.preempt_hang.count,
+ depth);
+ err = -EINVAL;
+ goto err_client_b;
+ }
+
+ if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ goto err_wedged;
+ }
+
+ err = 0;
+err_client_b:
+ preempt_client_fini(&b);
+err_client_a:
+ preempt_client_fini(&a);
+err_unlock:
+ if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ err = -EIO;
+ intel_runtime_pm_put(i915, wakeref);
+ mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+
+err_wedged:
+ igt_spinner_end(&b.spin);
+ igt_spinner_end(&a.spin);
+ i915_gem_set_wedged(i915);
+ err = -EIO;
+ goto err_client_b;
+}
+
+static int __i915_sw_fence_call
+dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+ return NOTIFY_DONE;
+}
+
+static struct i915_request *dummy_request(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq;
+
+ rq = kzalloc(sizeof(*rq), GFP_KERNEL);
+ if (!rq)
+ return NULL;
+
+ INIT_LIST_HEAD(&rq->active_list);
+ rq->engine = engine;
+
+ i915_sched_node_init(&rq->sched);
+
+ /* mark this request as permanently incomplete */
+ rq->fence.seqno = 1;
+ BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
+ rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
+ GEM_BUG_ON(i915_request_completed(rq));
+
+ i915_sw_fence_init(&rq->submit, dummy_notify);
+ i915_sw_fence_commit(&rq->submit);
+
+ return rq;
+}
+
+static void dummy_request_free(struct i915_request *dummy)
+{
+ i915_request_mark_complete(dummy);
+ i915_sched_node_fini(&dummy->sched);
+ i915_sw_fence_fini(&dummy->submit);
+
+ dma_fence_free(&dummy->fence);
+}
+
+static int live_suppress_wait_preempt(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct preempt_client client[4];
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ intel_wakeref_t wakeref;
+ int err = -ENOMEM;
+ int i;
+
+ /*
+ * Waiters are given a little priority nudge, but not enough
+ * to actually cause any preemption. Double check that we do
+ * not needlessly generate preempt-to-idle cycles.
+ */
+
+ if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ return 0;
+
+ mutex_lock(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(i915);
+
+ if (preempt_client_init(i915, &client[0])) /* ELSP[0] */
+ goto err_unlock;
+ if (preempt_client_init(i915, &client[1])) /* ELSP[1] */
+ goto err_client_0;
+ if (preempt_client_init(i915, &client[2])) /* head of queue */
+ goto err_client_1;
+ if (preempt_client_init(i915, &client[3])) /* bystander */
+ goto err_client_2;
+
+ for_each_engine(engine, i915, id) {
+ int depth;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ if (!engine->emit_init_breadcrumb)
+ continue;
+
+ for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
+ struct i915_request *rq[ARRAY_SIZE(client)];
+ struct i915_request *dummy;
+
+ engine->execlists.preempt_hang.count = 0;
+
+ dummy = dummy_request(engine);
+ if (!dummy)
+ goto err_client_3;
+
+ for (i = 0; i < ARRAY_SIZE(client); i++) {
+ rq[i] = igt_spinner_create_request(&client[i].spin,
+ client[i].ctx, engine,
+ MI_NOOP);
+ if (IS_ERR(rq[i])) {
+ err = PTR_ERR(rq[i]);
+ goto err_wedged;
+ }
+
+ /* Disable NEWCLIENT promotion */
+ __i915_active_request_set(&rq[i]->timeline->last_request,
+ dummy);
+ i915_request_add(rq[i]);
+ }
+
+ dummy_request_free(dummy);
+
+ GEM_BUG_ON(i915_request_completed(rq[0]));
+ if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
+ pr_err("%s: First client failed to start\n",
+ engine->name);
+ goto err_wedged;
+ }
+ GEM_BUG_ON(!i915_request_started(rq[0]));
+
+ if (i915_request_wait(rq[depth],
+ I915_WAIT_LOCKED |
+ I915_WAIT_PRIORITY,
+ 1) != -ETIME) {
+ pr_err("%s: Waiter depth:%d completed!\n",
+ engine->name, depth);
+ goto err_wedged;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(client); i++)
+ igt_spinner_end(&client[i].spin);
+
+ if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ goto err_wedged;
+
+ if (engine->execlists.preempt_hang.count) {
+ pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
+ engine->name,
+ engine->execlists.preempt_hang.count,
+ depth);
+ err = -EINVAL;
+ goto err_client_3;
+ }
+ }
+ }
+
+ err = 0;
+err_client_3:
+ preempt_client_fini(&client[3]);
+err_client_2:
+ preempt_client_fini(&client[2]);
+err_client_1:
+ preempt_client_fini(&client[1]);
+err_client_0:
+ preempt_client_fini(&client[0]);
+err_unlock:
+ if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ err = -EIO;
+ intel_runtime_pm_put(i915, wakeref);
+ mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+
+err_wedged:
+ for (i = 0; i < ARRAY_SIZE(client); i++)
+ igt_spinner_end(&client[i].spin);
+ i915_gem_set_wedged(i915);
+ err = -EIO;
+ goto err_client_3;
+}
+
+static int live_chain_preempt(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_engine_cs *engine;
+ struct preempt_client hi, lo;
+ enum intel_engine_id id;
+ intel_wakeref_t wakeref;
+ int err = -ENOMEM;
+
+ /*
+ * Build a chain AB...BA between two contexts (A, B) and request
+ * preemption of the last request. It should then complete before
+ * the previously submitted spinner in B.
+ */
+
+ if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ return 0;
+
+ mutex_lock(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(i915);
+
+ if (preempt_client_init(i915, &hi))
+ goto err_unlock;
+
+ if (preempt_client_init(i915, &lo))
+ goto err_client_hi;
+
+ for_each_engine(engine, i915, id) {
+ struct i915_sched_attr attr = {
+ .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
+ };
+ struct igt_live_test t;
+ struct i915_request *rq;
+ int ring_size, count, i;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ rq = igt_spinner_create_request(&lo.spin,
+ lo.ctx, engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq))
+ goto err_wedged;
+ i915_request_add(rq);
+
+ ring_size = rq->wa_tail - rq->head;
+ if (ring_size < 0)
+ ring_size += rq->ring->size;
+ ring_size = rq->ring->size / ring_size;
+ pr_debug("%s(%s): Using maximum of %d requests\n",
+ __func__, engine->name, ring_size);
+
+ igt_spinner_end(&lo.spin);
+ if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 2) < 0) {
+ pr_err("Timed out waiting to flush %s\n", engine->name);
+ goto err_wedged;
+ }
+
+ if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+ err = -EIO;
+ goto err_wedged;
+ }
+
+ for_each_prime_number_from(count, 1, ring_size) {
+ rq = igt_spinner_create_request(&hi.spin,
+ hi.ctx, engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq))
+ goto err_wedged;
+ i915_request_add(rq);
+ if (!igt_wait_for_spinner(&hi.spin, rq))
+ goto err_wedged;
+
+ rq = igt_spinner_create_request(&lo.spin,
+ lo.ctx, engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq))
+ goto err_wedged;
+ i915_request_add(rq);
+
+ for (i = 0; i < count; i++) {
+ rq = i915_request_alloc(engine, lo.ctx);
+ if (IS_ERR(rq))
+ goto err_wedged;
+ i915_request_add(rq);
+ }
+
+ rq = i915_request_alloc(engine, hi.ctx);
+ if (IS_ERR(rq))
+ goto err_wedged;
+ i915_request_add(rq);
+ engine->schedule(rq, &attr);
+
+ igt_spinner_end(&hi.spin);
+ if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {
+ struct drm_printer p =
+ drm_info_printer(i915->drm.dev);
+
+ pr_err("Failed to preempt over chain of %d\n",
+ count);
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+ goto err_wedged;
+ }
+ igt_spinner_end(&lo.spin);
+
+ rq = i915_request_alloc(engine, lo.ctx);
+ if (IS_ERR(rq))
+ goto err_wedged;
+ i915_request_add(rq);
+ if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {
+ struct drm_printer p =
+ drm_info_printer(i915->drm.dev);
+
+ pr_err("Failed to flush low priority chain of %d requests\n",
+ count);
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+ goto err_wedged;
+ }
+ }
+
+ if (igt_live_test_end(&t)) {
+ err = -EIO;
+ goto err_wedged;
+ }
+ }
+
+ err = 0;
+err_client_lo:
+ preempt_client_fini(&lo);
+err_client_hi:
+ preempt_client_fini(&hi);
+err_unlock:
+ if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ err = -EIO;
+ intel_runtime_pm_put(i915, wakeref);
+ mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+
+err_wedged:
+ igt_spinner_end(&hi.spin);
+ igt_spinner_end(&lo.spin);
+ i915_gem_set_wedged(i915);
+ err = -EIO;
+ goto err_client_lo;
+}
+
+static int live_preempt_hang(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct i915_gem_context *ctx_hi, *ctx_lo;
+ struct igt_spinner spin_hi, spin_lo;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ intel_wakeref_t wakeref;
+ int err = -ENOMEM;
+
+ if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ return 0;
+
+ if (!intel_has_reset_engine(i915))
+ return 0;
+
+ mutex_lock(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(i915);
+
+ if (igt_spinner_init(&spin_hi, i915))
+ goto err_unlock;
+
+ if (igt_spinner_init(&spin_lo, i915))
+ goto err_spin_hi;
+
+ ctx_hi = kernel_context(i915);
+ if (!ctx_hi)
+ goto err_spin_lo;
+ ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
+
+ ctx_lo = kernel_context(i915);
+ if (!ctx_lo)
+ goto err_ctx_hi;
+ ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
+
+ for_each_engine(engine, i915, id) {
+ struct i915_request *rq;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_ctx_lo;
+ }
+
+ i915_request_add(rq);
+ if (!igt_wait_for_spinner(&spin_lo, rq)) {
+ GEM_TRACE("lo spinner failed to start\n");
+ GEM_TRACE_DUMP();
+ i915_gem_set_wedged(i915);
+ err = -EIO;
+ goto err_ctx_lo;
+ }
+
+ rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq)) {
+ igt_spinner_end(&spin_lo);
+ err = PTR_ERR(rq);
+ goto err_ctx_lo;
+ }
+
+ init_completion(&engine->execlists.preempt_hang.completion);
+ engine->execlists.preempt_hang.inject_hang = true;
+
+ i915_request_add(rq);
+
+ if (!wait_for_completion_timeout(&engine->execlists.preempt_hang.completion,
+ HZ / 10)) {
+ pr_err("Preemption did not occur within timeout!");
+ GEM_TRACE_DUMP();
+ i915_gem_set_wedged(i915);
+ err = -EIO;
+ goto err_ctx_lo;
+ }
+
+ set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+ i915_reset_engine(engine, NULL);
+ clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+
+ engine->execlists.preempt_hang.inject_hang = false;
+
+ if (!igt_wait_for_spinner(&spin_hi, rq)) {
+ GEM_TRACE("hi spinner failed to start\n");
+ GEM_TRACE_DUMP();
+ i915_gem_set_wedged(i915);
+ err = -EIO;
+ goto err_ctx_lo;
+ }
+
+ igt_spinner_end(&spin_hi);
+ igt_spinner_end(&spin_lo);
+ if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+ err = -EIO;
+ goto err_ctx_lo;
+ }
+ }
+
+ err = 0;
+err_ctx_lo:
+ kernel_context_close(ctx_lo);
+err_ctx_hi:
+ kernel_context_close(ctx_hi);
+err_spin_lo:
+ igt_spinner_fini(&spin_lo);
+err_spin_hi:
+ igt_spinner_fini(&spin_hi);
+err_unlock:
+ igt_flush_test(i915, I915_WAIT_LOCKED);
+ intel_runtime_pm_put(i915, wakeref);
+ mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+}
+
+static int random_range(struct rnd_state *rnd, int min, int max)
+{
+ return i915_prandom_u32_max_state(max - min, rnd) + min;
+}
+
+static int random_priority(struct rnd_state *rnd)
+{
+ return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
+}
+
+struct preempt_smoke {
+ struct drm_i915_private *i915;
+ struct i915_gem_context **contexts;
+ struct intel_engine_cs *engine;
+ struct drm_i915_gem_object *batch;
+ unsigned int ncontext;
+ struct rnd_state prng;
+ unsigned long count;
+};
+
+static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
+{
+ return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
+ &smoke->prng)];
+}
+
+static int smoke_submit(struct preempt_smoke *smoke,
+ struct i915_gem_context *ctx, int prio,
+ struct drm_i915_gem_object *batch)
+{
+ struct i915_request *rq;
+ struct i915_vma *vma = NULL;
+ int err = 0;
+
+ if (batch) {
+ vma = i915_vma_instance(batch, &ctx->ppgtt->vm, NULL);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ return err;
+ }
+
+ ctx->sched.priority = prio;
+
+ rq = i915_request_alloc(smoke->engine, ctx);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto unpin;
+ }
+
+ if (vma) {
+ err = rq->engine->emit_bb_start(rq,
+ vma->node.start,
+ PAGE_SIZE, 0);
+ if (!err)
+ err = i915_vma_move_to_active(vma, rq, 0);
+ }
+
+ i915_request_add(rq);
+
+unpin:
+ if (vma)
+ i915_vma_unpin(vma);
+
+ return err;
+}
+
+static int smoke_crescendo_thread(void *arg)
+{
+ struct preempt_smoke *smoke = arg;
+ IGT_TIMEOUT(end_time);
+ unsigned long count;
+
+ count = 0;
+ do {
+ struct i915_gem_context *ctx = smoke_context(smoke);
+ int err;
+
+ mutex_lock(&smoke->i915->drm.struct_mutex);
+ err = smoke_submit(smoke,
+ ctx, count % I915_PRIORITY_MAX,
+ smoke->batch);
+ mutex_unlock(&smoke->i915->drm.struct_mutex);
+ if (err)
+ return err;
+
+ count++;
+ } while (!__igt_timeout(end_time, NULL));
+
+ smoke->count = count;
+ return 0;
+}
+
+static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
+#define BATCH BIT(0)
+{
+ struct task_struct *tsk[I915_NUM_ENGINES] = {};
+ struct preempt_smoke arg[I915_NUM_ENGINES];
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ unsigned long count;
+ int err = 0;
+
+ mutex_unlock(&smoke->i915->drm.struct_mutex);
+
+ for_each_engine(engine, smoke->i915, id) {
+ arg[id] = *smoke;
+ arg[id].engine = engine;
+ if (!(flags & BATCH))
+ arg[id].batch = NULL;
+ arg[id].count = 0;
+
+ tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
+ "igt/smoke:%d", id);
+ if (IS_ERR(tsk[id])) {
+ err = PTR_ERR(tsk[id]);
+ break;
+ }
+ get_task_struct(tsk[id]);
+ }
+
+ count = 0;
+ for_each_engine(engine, smoke->i915, id) {
+ int status;
+
+ if (IS_ERR_OR_NULL(tsk[id]))
+ continue;
+
+ status = kthread_stop(tsk[id]);
+ if (status && !err)
+ err = status;
+
+ count += arg[id].count;
+
+ put_task_struct(tsk[id]);
+ }
+
+ mutex_lock(&smoke->i915->drm.struct_mutex);
+
+ pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
+ count, flags,
+ RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
+ return 0;
+}
+
+static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
+{
+ enum intel_engine_id id;
+ IGT_TIMEOUT(end_time);
+ unsigned long count;
+
+ count = 0;
+ do {
+ for_each_engine(smoke->engine, smoke->i915, id) {
+ struct i915_gem_context *ctx = smoke_context(smoke);
+ int err;
+
+ err = smoke_submit(smoke,
+ ctx, random_priority(&smoke->prng),
+ flags & BATCH ? smoke->batch : NULL);
+ if (err)
+ return err;
+
+ count++;
+ }
+ } while (!__igt_timeout(end_time, NULL));
+
+ pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
+ count, flags,
+ RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
+ return 0;
+}
+
+static int live_preempt_smoke(void *arg)
+{
+ struct preempt_smoke smoke = {
+ .i915 = arg,
+ .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
+ .ncontext = 1024,
+ };
+ const unsigned int phase[] = { 0, BATCH };
+ intel_wakeref_t wakeref;
+ struct igt_live_test t;
+ int err = -ENOMEM;
+ u32 *cs;
+ int n;
+
+ if (!HAS_LOGICAL_RING_PREEMPTION(smoke.i915))
+ return 0;
+
+ smoke.contexts = kmalloc_array(smoke.ncontext,
+ sizeof(*smoke.contexts),
+ GFP_KERNEL);
+ if (!smoke.contexts)
+ return -ENOMEM;
+
+ mutex_lock(&smoke.i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(smoke.i915);
+
+ smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE);
+ if (IS_ERR(smoke.batch)) {
+ err = PTR_ERR(smoke.batch);
+ goto err_unlock;
+ }
+
+ cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_batch;
+ }
+ for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
+ cs[n] = MI_ARB_CHECK;
+ cs[n] = MI_BATCH_BUFFER_END;
+ i915_gem_object_flush_map(smoke.batch);
+ i915_gem_object_unpin_map(smoke.batch);
+
+ if (igt_live_test_begin(&t, smoke.i915, __func__, "all")) {
+ err = -EIO;
+ goto err_batch;
+ }
+
+ for (n = 0; n < smoke.ncontext; n++) {
+ smoke.contexts[n] = kernel_context(smoke.i915);
+ if (!smoke.contexts[n])
+ goto err_ctx;
+ }
+
+ for (n = 0; n < ARRAY_SIZE(phase); n++) {
+ err = smoke_crescendo(&smoke, phase[n]);
+ if (err)
+ goto err_ctx;
+
+ err = smoke_random(&smoke, phase[n]);
+ if (err)
+ goto err_ctx;
+ }
+
+err_ctx:
+ if (igt_live_test_end(&t))
+ err = -EIO;
+
+ for (n = 0; n < smoke.ncontext; n++) {
+ if (!smoke.contexts[n])
+ break;
+ kernel_context_close(smoke.contexts[n]);
+ }
+
+err_batch:
+ i915_gem_object_put(smoke.batch);
+err_unlock:
+ intel_runtime_pm_put(smoke.i915, wakeref);
+ mutex_unlock(&smoke.i915->drm.struct_mutex);
+ kfree(smoke.contexts);
+
+ return err;
+}
+
+int intel_execlists_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_sanitycheck),
+ SUBTEST(live_busywait_preempt),
+ SUBTEST(live_preempt),
+ SUBTEST(live_late_preempt),
+ SUBTEST(live_suppress_self_preempt),
+ SUBTEST(live_suppress_wait_preempt),
+ SUBTEST(live_chain_preempt),
+ SUBTEST(live_preempt_hang),
+ SUBTEST(live_preempt_smoke),
+ };
+
+ if (!HAS_EXECLISTS(i915))
+ return 0;
+
+ if (i915_terminally_wedged(i915))
+ return 0;
+
+ return i915_subtests(tests, i915);
+}
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+#include "intel_reset.h"
+
+#include "selftests/igt_flush_test.h"
+#include "selftests/igt_reset.h"
+#include "selftests/igt_spinner.h"
+#include "selftests/igt_wedge_me.h"
+#include "selftests/mock_context.h"
+#include "selftests/mock_drm.h"
+
+static const struct wo_register {
+ enum intel_platform platform;
+ u32 reg;
+} wo_registers[] = {
+ { INTEL_GEMINILAKE, 0x731c }
+};
+
+#define REF_NAME_MAX (INTEL_ENGINE_CS_MAX_NAME + 4)
+struct wa_lists {
+ struct i915_wa_list gt_wa_list;
+ struct {
+ char name[REF_NAME_MAX];
+ struct i915_wa_list wa_list;
+ } engine[I915_NUM_ENGINES];
+};
+
+static void
+reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ memset(lists, 0, sizeof(*lists));
+
+ wa_init_start(&lists->gt_wa_list, "GT_REF");
+ gt_init_workarounds(i915, &lists->gt_wa_list);
+ wa_init_finish(&lists->gt_wa_list);
+
+ for_each_engine(engine, i915, id) {
+ struct i915_wa_list *wal = &lists->engine[id].wa_list;
+ char *name = lists->engine[id].name;
+
+ snprintf(name, REF_NAME_MAX, "%s_REF", engine->name);
+
+ wa_init_start(wal, name);
+ engine_init_workarounds(engine, wal);
+ wa_init_finish(wal);
+ }
+}
+
+static void
+reference_lists_fini(struct drm_i915_private *i915, struct wa_lists *lists)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine(engine, i915, id)
+ intel_wa_list_free(&lists->engine[id].wa_list);
+
+ intel_wa_list_free(&lists->gt_wa_list);
+}
+
+static struct drm_i915_gem_object *
+read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
+{
+ const u32 base = engine->mmio_base;
+ struct drm_i915_gem_object *result;
+ intel_wakeref_t wakeref;
+ struct i915_request *rq;
+ struct i915_vma *vma;
+ u32 srm, *cs;
+ int err;
+ int i;
+
+ result = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
+ if (IS_ERR(result))
+ return result;
+
+ i915_gem_object_set_cache_coherency(result, I915_CACHE_LLC);
+
+ cs = i915_gem_object_pin_map(result, I915_MAP_WB);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_obj;
+ }
+ memset(cs, 0xc5, PAGE_SIZE);
+ i915_gem_object_flush_map(result);
+ i915_gem_object_unpin_map(result);
+
+ vma = i915_vma_instance(result, &engine->i915->ggtt.vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_obj;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+ if (err)
+ goto err_obj;
+
+ rq = ERR_PTR(-ENODEV);
+ with_intel_runtime_pm(engine->i915, wakeref)
+ rq = i915_request_alloc(engine, ctx);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_pin;
+ }
+
+ err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+ if (err)
+ goto err_req;
+
+ srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+ if (INTEL_GEN(ctx->i915) >= 8)
+ srm++;
+
+ cs = intel_ring_begin(rq, 4 * RING_MAX_NONPRIV_SLOTS);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_req;
+ }
+
+ for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) {
+ *cs++ = srm;
+ *cs++ = i915_mmio_reg_offset(RING_FORCE_TO_NONPRIV(base, i));
+ *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
+ *cs++ = 0;
+ }
+ intel_ring_advance(rq, cs);
+
+ i915_gem_object_get(result);
+ i915_gem_object_set_active_reference(result);
+
+ i915_request_add(rq);
+ i915_vma_unpin(vma);
+
+ return result;
+
+err_req:
+ i915_request_add(rq);
+err_pin:
+ i915_vma_unpin(vma);
+err_obj:
+ i915_gem_object_put(result);
+ return ERR_PTR(err);
+}
+
+static u32
+get_whitelist_reg(const struct intel_engine_cs *engine, unsigned int i)
+{
+ i915_reg_t reg = i < engine->whitelist.count ?
+ engine->whitelist.list[i].reg :
+ RING_NOPID(engine->mmio_base);
+
+ return i915_mmio_reg_offset(reg);
+}
+
+static void
+print_results(const struct intel_engine_cs *engine, const u32 *results)
+{
+ unsigned int i;
+
+ for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) {
+ u32 expected = get_whitelist_reg(engine, i);
+ u32 actual = results[i];
+
+ pr_info("RING_NONPRIV[%d]: expected 0x%08x, found 0x%08x\n",
+ i, expected, actual);
+ }
+}
+
+static int check_whitelist(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine)
+{
+ struct drm_i915_gem_object *results;
+ struct igt_wedge_me wedge;
+ u32 *vaddr;
+ int err;
+ int i;
+
+ results = read_nonprivs(ctx, engine);
+ if (IS_ERR(results))
+ return PTR_ERR(results);
+
+ err = 0;
+ igt_wedge_on_timeout(&wedge, ctx->i915, HZ / 5) /* a safety net! */
+ err = i915_gem_object_set_to_cpu_domain(results, false);
+ if (i915_terminally_wedged(ctx->i915))
+ err = -EIO;
+ if (err)
+ goto out_put;
+
+ vaddr = i915_gem_object_pin_map(results, I915_MAP_WB);
+ if (IS_ERR(vaddr)) {
+ err = PTR_ERR(vaddr);
+ goto out_put;
+ }
+
+ for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) {
+ u32 expected = get_whitelist_reg(engine, i);
+ u32 actual = vaddr[i];
+
+ if (expected != actual) {
+ print_results(engine, vaddr);
+ pr_err("Invalid RING_NONPRIV[%d], expected 0x%08x, found 0x%08x\n",
+ i, expected, actual);
+
+ err = -EINVAL;
+ break;
+ }
+ }
+
+ i915_gem_object_unpin_map(results);
+out_put:
+ i915_gem_object_put(results);
+ return err;
+}
+
+static int do_device_reset(struct intel_engine_cs *engine)
+{
+ i915_reset(engine->i915, engine->mask, "live_workarounds");
+ return 0;
+}
+
+static int do_engine_reset(struct intel_engine_cs *engine)
+{
+ return i915_reset_engine(engine, "live_workarounds");
+}
+
+static int
+switch_to_scratch_context(struct intel_engine_cs *engine,
+ struct igt_spinner *spin)
+{
+ struct i915_gem_context *ctx;
+ struct i915_request *rq;
+ intel_wakeref_t wakeref;
+ int err = 0;
+
+ ctx = kernel_context(engine->i915);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ GEM_BUG_ON(i915_gem_context_is_bannable(ctx));
+
+ rq = ERR_PTR(-ENODEV);
+ with_intel_runtime_pm(engine->i915, wakeref)
+ rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP);
+
+ kernel_context_close(ctx);
+
+ if (IS_ERR(rq)) {
+ spin = NULL;
+ err = PTR_ERR(rq);
+ goto err;
+ }
+
+ i915_request_add(rq);
+
+ if (spin && !igt_wait_for_spinner(spin, rq)) {
+ pr_err("Spinner failed to start\n");
+ err = -ETIMEDOUT;
+ }
+
+err:
+ if (err && spin)
+ igt_spinner_end(spin);
+
+ return err;
+}
+
+static int check_whitelist_across_reset(struct intel_engine_cs *engine,
+ int (*reset)(struct intel_engine_cs *),
+ const char *name)
+{
+ struct drm_i915_private *i915 = engine->i915;
+ struct i915_gem_context *ctx;
+ struct igt_spinner spin;
+ intel_wakeref_t wakeref;
+ int err;
+
+ pr_info("Checking %d whitelisted registers (RING_NONPRIV) [%s]\n",
+ engine->whitelist.count, name);
+
+ err = igt_spinner_init(&spin, i915);
+ if (err)
+ return err;
+
+ ctx = kernel_context(i915);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ err = check_whitelist(ctx, engine);
+ if (err) {
+ pr_err("Invalid whitelist *before* %s reset!\n", name);
+ goto out;
+ }
+
+ err = switch_to_scratch_context(engine, &spin);
+ if (err)
+ goto out;
+
+ with_intel_runtime_pm(i915, wakeref)
+ err = reset(engine);
+
+ igt_spinner_end(&spin);
+ igt_spinner_fini(&spin);
+
+ if (err) {
+ pr_err("%s reset failed\n", name);
+ goto out;
+ }
+
+ err = check_whitelist(ctx, engine);
+ if (err) {
+ pr_err("Whitelist not preserved in context across %s reset!\n",
+ name);
+ goto out;
+ }
+
+ kernel_context_close(ctx);
+
+ ctx = kernel_context(i915);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ err = check_whitelist(ctx, engine);
+ if (err) {
+ pr_err("Invalid whitelist *after* %s reset in fresh context!\n",
+ name);
+ goto out;
+ }
+
+out:
+ kernel_context_close(ctx);
+ return err;
+}
+
+static struct i915_vma *create_batch(struct i915_gem_context *ctx)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int err;
+
+ obj = i915_gem_object_create_internal(ctx->i915, 16 * PAGE_SIZE);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_obj;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ goto err_obj;
+
+ err = i915_gem_object_set_to_wc_domain(obj, true);
+ if (err)
+ goto err_obj;
+
+ return vma;
+
+err_obj:
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+}
+
+static u32 reg_write(u32 old, u32 new, u32 rsvd)
+{
+ if (rsvd == 0x0000ffff) {
+ old &= ~(new >> 16);
+ old |= new & (new >> 16);
+ } else {
+ old &= ~rsvd;
+ old |= new & rsvd;
+ }
+
+ return old;
+}
+
+static bool wo_register(struct intel_engine_cs *engine, u32 reg)
+{
+ enum intel_platform platform = INTEL_INFO(engine->i915)->platform;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(wo_registers); i++) {
+ if (wo_registers[i].platform == platform &&
+ wo_registers[i].reg == reg)
+ return true;
+ }
+
+ return false;
+}
+
+static int check_dirty_whitelist(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine)
+{
+ const u32 values[] = {
+ 0x00000000,
+ 0x01010101,
+ 0x10100101,
+ 0x03030303,
+ 0x30300303,
+ 0x05050505,
+ 0x50500505,
+ 0x0f0f0f0f,
+ 0xf00ff00f,
+ 0x10101010,
+ 0xf0f01010,
+ 0x30303030,
+ 0xa0a03030,
+ 0x50505050,
+ 0xc0c05050,
+ 0xf0f0f0f0,
+ 0x11111111,
+ 0x33333333,
+ 0x55555555,
+ 0x0000ffff,
+ 0x00ff00ff,
+ 0xff0000ff,
+ 0xffff00ff,
+ 0xffffffff,
+ };
+ struct i915_vma *scratch;
+ struct i915_vma *batch;
+ int err = 0, i, v;
+ u32 *cs, *results;
+
+ scratch = create_scratch(&ctx->ppgtt->vm, 2 * ARRAY_SIZE(values) + 1);
+ if (IS_ERR(scratch))
+ return PTR_ERR(scratch);
+
+ batch = create_batch(ctx);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ goto out_scratch;
+ }
+
+ for (i = 0; i < engine->whitelist.count; i++) {
+ u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
+ u64 addr = scratch->node.start;
+ struct i915_request *rq;
+ u32 srm, lrm, rsvd;
+ u32 expect;
+ int idx;
+
+ if (wo_register(engine, reg))
+ continue;
+
+ srm = MI_STORE_REGISTER_MEM;
+ lrm = MI_LOAD_REGISTER_MEM;
+ if (INTEL_GEN(ctx->i915) >= 8)
+ lrm++, srm++;
+
+ pr_debug("%s: Writing garbage to %x\n",
+ engine->name, reg);
+
+ cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto out_batch;
+ }
+
+ /* SRM original */
+ *cs++ = srm;
+ *cs++ = reg;
+ *cs++ = lower_32_bits(addr);
+ *cs++ = upper_32_bits(addr);
+
+ idx = 1;
+ for (v = 0; v < ARRAY_SIZE(values); v++) {
+ /* LRI garbage */
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = reg;
+ *cs++ = values[v];
+
+ /* SRM result */
+ *cs++ = srm;
+ *cs++ = reg;
+ *cs++ = lower_32_bits(addr + sizeof(u32) * idx);
+ *cs++ = upper_32_bits(addr + sizeof(u32) * idx);
+ idx++;
+ }
+ for (v = 0; v < ARRAY_SIZE(values); v++) {
+ /* LRI garbage */
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = reg;
+ *cs++ = ~values[v];
+
+ /* SRM result */
+ *cs++ = srm;
+ *cs++ = reg;
+ *cs++ = lower_32_bits(addr + sizeof(u32) * idx);
+ *cs++ = upper_32_bits(addr + sizeof(u32) * idx);
+ idx++;
+ }
+ GEM_BUG_ON(idx * sizeof(u32) > scratch->size);
+
+ /* LRM original -- don't leave garbage in the context! */
+ *cs++ = lrm;
+ *cs++ = reg;
+ *cs++ = lower_32_bits(addr);
+ *cs++ = upper_32_bits(addr);
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ i915_gem_object_flush_map(batch->obj);
+ i915_gem_object_unpin_map(batch->obj);
+ i915_gem_chipset_flush(ctx->i915);
+
+ rq = i915_request_alloc(engine, ctx);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_batch;
+ }
+
+ if (engine->emit_init_breadcrumb) { /* Be nice if we hang */
+ err = engine->emit_init_breadcrumb(rq);
+ if (err)
+ goto err_request;
+ }
+
+ err = engine->emit_bb_start(rq,
+ batch->node.start, PAGE_SIZE,
+ 0);
+ if (err)
+ goto err_request;
+
+err_request:
+ i915_request_add(rq);
+ if (err)
+ goto out_batch;
+
+ if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {
+ pr_err("%s: Futzing %x timedout; cancelling test\n",
+ engine->name, reg);
+ i915_gem_set_wedged(ctx->i915);
+ err = -EIO;
+ goto out_batch;
+ }
+
+ results = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+ if (IS_ERR(results)) {
+ err = PTR_ERR(results);
+ goto out_batch;
+ }
+
+ GEM_BUG_ON(values[ARRAY_SIZE(values) - 1] != 0xffffffff);
+ rsvd = results[ARRAY_SIZE(values)]; /* detect write masking */
+ if (!rsvd) {
+ pr_err("%s: Unable to write to whitelisted register %x\n",
+ engine->name, reg);
+ err = -EINVAL;
+ goto out_unpin;
+ }
+
+ expect = results[0];
+ idx = 1;
+ for (v = 0; v < ARRAY_SIZE(values); v++) {
+ expect = reg_write(expect, values[v], rsvd);
+ if (results[idx] != expect)
+ err++;
+ idx++;
+ }
+ for (v = 0; v < ARRAY_SIZE(values); v++) {
+ expect = reg_write(expect, ~values[v], rsvd);
+ if (results[idx] != expect)
+ err++;
+ idx++;
+ }
+ if (err) {
+ pr_err("%s: %d mismatch between values written to whitelisted register [%x], and values read back!\n",
+ engine->name, err, reg);
+
+ pr_info("%s: Whitelisted register: %x, original value %08x, rsvd %08x\n",
+ engine->name, reg, results[0], rsvd);
+
+ expect = results[0];
+ idx = 1;
+ for (v = 0; v < ARRAY_SIZE(values); v++) {
+ u32 w = values[v];
+
+ expect = reg_write(expect, w, rsvd);
+ pr_info("Wrote %08x, read %08x, expect %08x\n",
+ w, results[idx], expect);
+ idx++;
+ }
+ for (v = 0; v < ARRAY_SIZE(values); v++) {
+ u32 w = ~values[v];
+
+ expect = reg_write(expect, w, rsvd);
+ pr_info("Wrote %08x, read %08x, expect %08x\n",
+ w, results[idx], expect);
+ idx++;
+ }
+
+ err = -EINVAL;
+ }
+out_unpin:
+ i915_gem_object_unpin_map(scratch->obj);
+ if (err)
+ break;
+ }
+
+ if (igt_flush_test(ctx->i915, I915_WAIT_LOCKED))
+ err = -EIO;
+out_batch:
+ i915_vma_unpin_and_release(&batch, 0);
+out_scratch:
+ i915_vma_unpin_and_release(&scratch, 0);
+ return err;
+}
+
+static int live_dirty_whitelist(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_engine_cs *engine;
+ struct i915_gem_context *ctx;
+ enum intel_engine_id id;
+ intel_wakeref_t wakeref;
+ struct drm_file *file;
+ int err = 0;
+
+ /* Can the user write to the whitelisted registers? */
+
+ if (INTEL_GEN(i915) < 7) /* minimum requirement for LRI, SRM, LRM */
+ return 0;
+
+ wakeref = intel_runtime_pm_get(i915);
+
+ mutex_unlock(&i915->drm.struct_mutex);
+ file = mock_file(i915);
+ mutex_lock(&i915->drm.struct_mutex);
+ if (IS_ERR(file)) {
+ err = PTR_ERR(file);
+ goto out_rpm;
+ }
+
+ ctx = live_context(i915, file);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto out_file;
+ }
+
+ for_each_engine(engine, i915, id) {
+ if (engine->whitelist.count == 0)
+ continue;
+
+ err = check_dirty_whitelist(ctx, engine);
+ if (err)
+ goto out_file;
+ }
+
+out_file:
+ mutex_unlock(&i915->drm.struct_mutex);
+ mock_file_free(i915, file);
+ mutex_lock(&i915->drm.struct_mutex);
+out_rpm:
+ intel_runtime_pm_put(i915, wakeref);
+ return err;
+}
+
+static int live_reset_whitelist(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_engine_cs *engine = i915->engine[RCS0];
+ int err = 0;
+
+ /* If we reset the gpu, we should not lose the RING_NONPRIV */
+
+ if (!engine || engine->whitelist.count == 0)
+ return 0;
+
+ igt_global_reset_lock(i915);
+
+ if (intel_has_reset_engine(i915)) {
+ err = check_whitelist_across_reset(engine,
+ do_engine_reset,
+ "engine");
+ if (err)
+ goto out;
+ }
+
+ if (intel_has_gpu_reset(i915)) {
+ err = check_whitelist_across_reset(engine,
+ do_device_reset,
+ "device");
+ if (err)
+ goto out;
+ }
+
+out:
+ igt_global_reset_unlock(i915);
+ return err;
+}
+
+static int read_whitelisted_registers(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine,
+ struct i915_vma *results)
+{
+ intel_wakeref_t wakeref;
+ struct i915_request *rq;
+ int i, err = 0;
+ u32 srm, *cs;
+
+ rq = ERR_PTR(-ENODEV);
+ with_intel_runtime_pm(engine->i915, wakeref)
+ rq = i915_request_alloc(engine, ctx);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ srm = MI_STORE_REGISTER_MEM;
+ if (INTEL_GEN(ctx->i915) >= 8)
+ srm++;
+
+ cs = intel_ring_begin(rq, 4 * engine->whitelist.count);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_req;
+ }
+
+ for (i = 0; i < engine->whitelist.count; i++) {
+ u64 offset = results->node.start + sizeof(u32) * i;
+
+ *cs++ = srm;
+ *cs++ = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
+ *cs++ = lower_32_bits(offset);
+ *cs++ = upper_32_bits(offset);
+ }
+ intel_ring_advance(rq, cs);
+
+err_req:
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0)
+ err = -EIO;
+
+ return err;
+}
+
+static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine)
+{
+ intel_wakeref_t wakeref;
+ struct i915_request *rq;
+ struct i915_vma *batch;
+ int i, err = 0;
+ u32 *cs;
+
+ batch = create_batch(ctx);
+ if (IS_ERR(batch))
+ return PTR_ERR(batch);
+
+ cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_batch;
+ }
+
+ *cs++ = MI_LOAD_REGISTER_IMM(engine->whitelist.count);
+ for (i = 0; i < engine->whitelist.count; i++) {
+ *cs++ = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
+ *cs++ = 0xffffffff;
+ }
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ i915_gem_object_flush_map(batch->obj);
+ i915_gem_chipset_flush(ctx->i915);
+
+ rq = ERR_PTR(-ENODEV);
+ with_intel_runtime_pm(engine->i915, wakeref)
+ rq = i915_request_alloc(engine, ctx);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_unpin;
+ }
+
+ if (engine->emit_init_breadcrumb) { /* Be nice if we hang */
+ err = engine->emit_init_breadcrumb(rq);
+ if (err)
+ goto err_request;
+ }
+
+ /* Perform the writes from an unprivileged "user" batch */
+ err = engine->emit_bb_start(rq, batch->node.start, 0, 0);
+
+err_request:
+ i915_request_add(rq);
+ if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0)
+ err = -EIO;
+
+err_unpin:
+ i915_gem_object_unpin_map(batch->obj);
+err_batch:
+ i915_vma_unpin_and_release(&batch, 0);
+ return err;
+}
+
+struct regmask {
+ i915_reg_t reg;
+ unsigned long gen_mask;
+};
+
+static bool find_reg(struct drm_i915_private *i915,
+ i915_reg_t reg,
+ const struct regmask *tbl,
+ unsigned long count)
+{
+ u32 offset = i915_mmio_reg_offset(reg);
+
+ while (count--) {
+ if (INTEL_INFO(i915)->gen_mask & tbl->gen_mask &&
+ i915_mmio_reg_offset(tbl->reg) == offset)
+ return true;
+ tbl++;
+ }
+
+ return false;
+}
+
+static bool pardon_reg(struct drm_i915_private *i915, i915_reg_t reg)
+{
+ /* Alas, we must pardon some whitelists. Mistakes already made */
+ static const struct regmask pardon[] = {
+ { GEN9_CTX_PREEMPT_REG, INTEL_GEN_MASK(9, 9) },
+ { GEN8_L3SQCREG4, INTEL_GEN_MASK(9, 9) },
+ };
+
+ return find_reg(i915, reg, pardon, ARRAY_SIZE(pardon));
+}
+
+static bool result_eq(struct intel_engine_cs *engine,
+ u32 a, u32 b, i915_reg_t reg)
+{
+ if (a != b && !pardon_reg(engine->i915, reg)) {
+ pr_err("Whitelisted register 0x%4x not context saved: A=%08x, B=%08x\n",
+ i915_mmio_reg_offset(reg), a, b);
+ return false;
+ }
+
+ return true;
+}
+
+static bool writeonly_reg(struct drm_i915_private *i915, i915_reg_t reg)
+{
+ /* Some registers do not seem to behave and our writes unreadable */
+ static const struct regmask wo[] = {
+ { GEN9_SLICE_COMMON_ECO_CHICKEN1, INTEL_GEN_MASK(9, 9) },
+ };
+
+ return find_reg(i915, reg, wo, ARRAY_SIZE(wo));
+}
+
+static bool result_neq(struct intel_engine_cs *engine,
+ u32 a, u32 b, i915_reg_t reg)
+{
+ if (a == b && !writeonly_reg(engine->i915, reg)) {
+ pr_err("Whitelist register 0x%4x:%08x was unwritable\n",
+ i915_mmio_reg_offset(reg), a);
+ return false;
+ }
+
+ return true;
+}
+
+static int
+check_whitelisted_registers(struct intel_engine_cs *engine,
+ struct i915_vma *A,
+ struct i915_vma *B,
+ bool (*fn)(struct intel_engine_cs *engine,
+ u32 a, u32 b,
+ i915_reg_t reg))
+{
+ u32 *a, *b;
+ int i, err;
+
+ a = i915_gem_object_pin_map(A->obj, I915_MAP_WB);
+ if (IS_ERR(a))
+ return PTR_ERR(a);
+
+ b = i915_gem_object_pin_map(B->obj, I915_MAP_WB);
+ if (IS_ERR(b)) {
+ err = PTR_ERR(b);
+ goto err_a;
+ }
+
+ err = 0;
+ for (i = 0; i < engine->whitelist.count; i++) {
+ if (!fn(engine, a[i], b[i], engine->whitelist.list[i].reg))
+ err = -EINVAL;
+ }
+
+ i915_gem_object_unpin_map(B->obj);
+err_a:
+ i915_gem_object_unpin_map(A->obj);
+ return err;
+}
+
+static int live_isolated_whitelist(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct {
+ struct i915_gem_context *ctx;
+ struct i915_vma *scratch[2];
+ } client[2] = {};
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int i, err = 0;
+
+ /*
+ * Check that a write into a whitelist register works, but
+ * invisible to a second context.
+ */
+
+ if (!intel_engines_has_context_isolation(i915))
+ return 0;
+
+ if (!i915->kernel_context->ppgtt)
+ return 0;
+
+ for (i = 0; i < ARRAY_SIZE(client); i++) {
+ struct i915_gem_context *c;
+
+ c = kernel_context(i915);
+ if (IS_ERR(c)) {
+ err = PTR_ERR(c);
+ goto err;
+ }
+
+ client[i].scratch[0] = create_scratch(&c->ppgtt->vm, 1024);
+ if (IS_ERR(client[i].scratch[0])) {
+ err = PTR_ERR(client[i].scratch[0]);
+ kernel_context_close(c);
+ goto err;
+ }
+
+ client[i].scratch[1] = create_scratch(&c->ppgtt->vm, 1024);
+ if (IS_ERR(client[i].scratch[1])) {
+ err = PTR_ERR(client[i].scratch[1]);
+ i915_vma_unpin_and_release(&client[i].scratch[0], 0);
+ kernel_context_close(c);
+ goto err;
+ }
+
+ client[i].ctx = c;
+ }
+
+ for_each_engine(engine, i915, id) {
+ if (!engine->whitelist.count)
+ continue;
+
+ /* Read default values */
+ err = read_whitelisted_registers(client[0].ctx, engine,
+ client[0].scratch[0]);
+ if (err)
+ goto err;
+
+ /* Try to overwrite registers (should only affect ctx0) */
+ err = scrub_whitelisted_registers(client[0].ctx, engine);
+ if (err)
+ goto err;
+
+ /* Read values from ctx1, we expect these to be defaults */
+ err = read_whitelisted_registers(client[1].ctx, engine,
+ client[1].scratch[0]);
+ if (err)
+ goto err;
+
+ /* Verify that both reads return the same default values */
+ err = check_whitelisted_registers(engine,
+ client[0].scratch[0],
+ client[1].scratch[0],
+ result_eq);
+ if (err)
+ goto err;
+
+ /* Read back the updated values in ctx0 */
+ err = read_whitelisted_registers(client[0].ctx, engine,
+ client[0].scratch[1]);
+ if (err)
+ goto err;
+
+ /* User should be granted privilege to overwhite regs */
+ err = check_whitelisted_registers(engine,
+ client[0].scratch[0],
+ client[0].scratch[1],
+ result_neq);
+ if (err)
+ goto err;
+ }
+
+err:
+ for (i = 0; i < ARRAY_SIZE(client); i++) {
+ if (!client[i].ctx)
+ break;
+
+ i915_vma_unpin_and_release(&client[i].scratch[1], 0);
+ i915_vma_unpin_and_release(&client[i].scratch[0], 0);
+ kernel_context_close(client[i].ctx);
+ }
+
+ if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ err = -EIO;
+
+ return err;
+}
+
+static bool verify_gt_engine_wa(struct drm_i915_private *i915,
+ struct wa_lists *lists, const char *str)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ bool ok = true;
+
+ ok &= wa_list_verify(&i915->uncore, &lists->gt_wa_list, str);
+
+ for_each_engine(engine, i915, id) {
+ ok &= engine_wa_list_verify(engine,
+ &lists->engine[id].wa_list,
+ str) == 0;
+ }
+
+ return ok;
+}
+
+static int
+live_gpu_reset_gt_engine_workarounds(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ intel_wakeref_t wakeref;
+ struct wa_lists lists;
+ bool ok;
+
+ if (!intel_has_gpu_reset(i915))
+ return 0;
+
+ pr_info("Verifying after GPU reset...\n");
+
+ igt_global_reset_lock(i915);
+ wakeref = intel_runtime_pm_get(i915);
+
+ reference_lists_init(i915, &lists);
+
+ ok = verify_gt_engine_wa(i915, &lists, "before reset");
+ if (!ok)
+ goto out;
+
+ i915_reset(i915, ALL_ENGINES, "live_workarounds");
+
+ ok = verify_gt_engine_wa(i915, &lists, "after reset");
+
+out:
+ reference_lists_fini(i915, &lists);
+ intel_runtime_pm_put(i915, wakeref);
+ igt_global_reset_unlock(i915);
+
+ return ok ? 0 : -ESRCH;
+}
+
+static int
+live_engine_reset_gt_engine_workarounds(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_engine_cs *engine;
+ struct i915_gem_context *ctx;
+ struct igt_spinner spin;
+ enum intel_engine_id id;
+ struct i915_request *rq;
+ intel_wakeref_t wakeref;
+ struct wa_lists lists;
+ int ret = 0;
+
+ if (!intel_has_reset_engine(i915))
+ return 0;
+
+ ctx = kernel_context(i915);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ igt_global_reset_lock(i915);
+ wakeref = intel_runtime_pm_get(i915);
+
+ reference_lists_init(i915, &lists);
+
+ for_each_engine(engine, i915, id) {
+ bool ok;
+
+ pr_info("Verifying after %s reset...\n", engine->name);
+
+ ok = verify_gt_engine_wa(i915, &lists, "before reset");
+ if (!ok) {
+ ret = -ESRCH;
+ goto err;
+ }
+
+ i915_reset_engine(engine, "live_workarounds");
+
+ ok = verify_gt_engine_wa(i915, &lists, "after idle reset");
+ if (!ok) {
+ ret = -ESRCH;
+ goto err;
+ }
+
+ ret = igt_spinner_init(&spin, i915);
+ if (ret)
+ goto err;
+
+ rq = igt_spinner_create_request(&spin, ctx, engine, MI_NOOP);
+ if (IS_ERR(rq)) {
+ ret = PTR_ERR(rq);
+ igt_spinner_fini(&spin);
+ goto err;
+ }
+
+ i915_request_add(rq);
+
+ if (!igt_wait_for_spinner(&spin, rq)) {
+ pr_err("Spinner failed to start\n");
+ igt_spinner_fini(&spin);
+ ret = -ETIMEDOUT;
+ goto err;
+ }
+
+ i915_reset_engine(engine, "live_workarounds");
+
+ igt_spinner_end(&spin);
+ igt_spinner_fini(&spin);
+
+ ok = verify_gt_engine_wa(i915, &lists, "after busy reset");
+ if (!ok) {
+ ret = -ESRCH;
+ goto err;
+ }
+ }
+
+err:
+ reference_lists_fini(i915, &lists);
+ intel_runtime_pm_put(i915, wakeref);
+ igt_global_reset_unlock(i915);
+ kernel_context_close(ctx);
+
+ igt_flush_test(i915, I915_WAIT_LOCKED);
+
+ return ret;
+}
+
+int intel_workarounds_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_dirty_whitelist),
+ SUBTEST(live_reset_whitelist),
+ SUBTEST(live_isolated_whitelist),
+ SUBTEST(live_gpu_reset_gt_engine_workarounds),
+ SUBTEST(live_engine_reset_gt_engine_workarounds),
+ };
+ int err;
+
+ if (i915_terminally_wedged(i915))
+ return 0;
+
+ mutex_lock(&i915->drm.struct_mutex);
+ err = i915_subtests(tests, i915);
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ return err;
+}
*
*/
+#include "gt/intel_engine.h"
+
#include "i915_drv.h"
-#include "intel_ringbuffer.h"
/**
* DOC: batch buffer command parser
#include <drm/drm_debugfs.h>
#include <drm/drm_fourcc.h>
-#include "i915_reset.h"
+#include "gt/intel_reset.h"
+
#include "intel_dp.h"
#include "intel_drv.h"
#include "intel_fbc.h"
#include <drm/drm_probe_helper.h>
#include <drm/i915_drm.h>
+#include "gt/intel_workarounds.h"
+#include "gt/intel_reset.h"
+
#include "i915_drv.h"
#include "i915_pmu.h"
#include "i915_query.h"
-#include "i915_reset.h"
#include "i915_trace.h"
#include "i915_vgpu.h"
#include "intel_audio.h"
#include "intel_pm.h"
#include "intel_sprite.h"
#include "intel_uc.h"
-#include "intel_workarounds.h"
static struct drm_driver driver;
#include "i915_reg.h"
#include "i915_utils.h"
+#include "gt/intel_lrc.h"
+#include "gt/intel_engine.h"
+#include "gt/intel_workarounds.h"
+
#include "intel_bios.h"
#include "intel_device_info.h"
#include "intel_display.h"
#include "intel_dpll_mgr.h"
#include "intel_frontbuffer.h"
-#include "intel_lrc.h"
#include "intel_opregion.h"
-#include "intel_ringbuffer.h"
#include "intel_uc.h"
#include "intel_uncore.h"
#include "intel_wopcm.h"
-#include "intel_workarounds.h"
#include "i915_gem.h"
#include "i915_gem_context.h"
#include <linux/dma-buf.h>
#include <linux/mman.h>
+#include "gt/intel_mocs.h"
+#include "gt/intel_reset.h"
+#include "gt/intel_workarounds.h"
+
#include "i915_drv.h"
#include "i915_gem_clflush.h"
#include "i915_gemfs.h"
#include "i915_globals.h"
-#include "i915_reset.h"
#include "i915_trace.h"
#include "i915_vgpu.h"
#include "intel_drv.h"
#include "intel_frontbuffer.h"
-#include "intel_mocs.h"
#include "intel_pm.h"
-#include "intel_workarounds.h"
static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
*/
#include <linux/log2.h>
+
#include <drm/i915_drm.h>
+
+#include "gt/intel_lrc_reg.h"
+#include "gt/intel_workarounds.h"
+
#include "i915_drv.h"
#include "i915_globals.h"
#include "i915_trace.h"
#include "i915_user_extensions.h"
-#include "intel_lrc_reg.h"
-#include "intel_workarounds.h"
#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1 << 1)
#define I915_CONTEXT_PARAM_VM 0x9
#include "i915_gem_context_types.h"
+#include "gt/intel_context.h"
+
#include "i915_gem.h"
#include "i915_scheduler.h"
-#include "intel_context.h"
#include "intel_device_info.h"
struct drm_device;
#include <linux/rcupdate.h>
#include <linux/types.h>
+#include "gt/intel_context_types.h"
+
#include "i915_scheduler.h"
-#include "intel_context_types.h"
struct pid;
#include "i915_drv.h"
#include "i915_vgpu.h"
-#include "i915_reset.h"
#include "i915_trace.h"
#include "intel_drv.h"
#include "intel_frontbuffer.h"
#include <linux/mm.h>
#include <linux/pagevec.h>
+#include "gt/intel_reset.h"
#include "i915_request.h"
-#include "i915_reset.h"
#include "i915_selftest.h"
#include "i915_timeline.h"
#include <drm/drm_mm.h>
+#include "gt/intel_engine.h"
+
#include "intel_device_info.h"
-#include "intel_ringbuffer.h"
#include "intel_uc_fw.h"
#include "i915_gem.h"
#include <linux/sizes.h>
#include <linux/uuid.h>
+#include "gt/intel_lrc_reg.h"
+
#include "i915_drv.h"
#include "i915_oa_hsw.h"
#include "i915_oa_bdw.h"
#include "i915_oa_cflgt3.h"
#include "i915_oa_cnl.h"
#include "i915_oa_icl.h"
-#include "intel_lrc_reg.h"
/* HW requires this to be a power of two, between 128k and 16M, though driver
* is currently generally designed assuming the largest 16M size is used such
#include <linux/irq.h>
#include <linux/pm_runtime.h>
+
+#include "gt/intel_engine.h"
+
#include "i915_pmu.h"
-#include "intel_ringbuffer.h"
#include "i915_drv.h"
/* Frequency for the sampling timer for events which need it. */
#include "i915_active.h"
#include "i915_drv.h"
#include "i915_globals.h"
-#include "i915_reset.h"
#include "intel_pm.h"
struct execute_cb {
+++ /dev/null
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2008-2018 Intel Corporation
- */
-
-#include <linux/sched/mm.h>
-#include <linux/stop_machine.h>
-
-#include "i915_drv.h"
-#include "i915_gpu_error.h"
-#include "i915_reset.h"
-
-#include "intel_guc.h"
-
-#define RESET_MAX_RETRIES 3
-
-/* XXX How to handle concurrent GGTT updates using tiling registers? */
-#define RESET_UNDER_STOP_MACHINE 0
-
-static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set)
-{
- intel_uncore_rmw(uncore, reg, 0, set);
-}
-
-static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
-{
- intel_uncore_rmw(uncore, reg, clr, 0);
-}
-
-static void rmw_set_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 set)
-{
- intel_uncore_rmw_fw(uncore, reg, 0, set);
-}
-
-static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
-{
- intel_uncore_rmw_fw(uncore, reg, clr, 0);
-}
-
-static void engine_skip_context(struct i915_request *rq)
-{
- struct intel_engine_cs *engine = rq->engine;
- struct i915_gem_context *hung_ctx = rq->gem_context;
-
- lockdep_assert_held(&engine->timeline.lock);
-
- if (!i915_request_is_active(rq))
- return;
-
- list_for_each_entry_continue(rq, &engine->timeline.requests, link)
- if (rq->gem_context == hung_ctx)
- i915_request_skip(rq, -EIO);
-}
-
-static void client_mark_guilty(struct drm_i915_file_private *file_priv,
- const struct i915_gem_context *ctx)
-{
- unsigned int score;
- unsigned long prev_hang;
-
- if (i915_gem_context_is_banned(ctx))
- score = I915_CLIENT_SCORE_CONTEXT_BAN;
- else
- score = 0;
-
- prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
- if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
- score += I915_CLIENT_SCORE_HANG_FAST;
-
- if (score) {
- atomic_add(score, &file_priv->ban_score);
-
- DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
- ctx->name, score,
- atomic_read(&file_priv->ban_score));
- }
-}
-
-static bool context_mark_guilty(struct i915_gem_context *ctx)
-{
- unsigned long prev_hang;
- bool banned;
- int i;
-
- atomic_inc(&ctx->guilty_count);
-
- /* Cool contexts are too cool to be banned! (Used for reset testing.) */
- if (!i915_gem_context_is_bannable(ctx))
- return false;
-
- /* Record the timestamp for the last N hangs */
- prev_hang = ctx->hang_timestamp[0];
- for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp) - 1; i++)
- ctx->hang_timestamp[i] = ctx->hang_timestamp[i + 1];
- ctx->hang_timestamp[i] = jiffies;
-
- /* If we have hung N+1 times in rapid succession, we ban the context! */
- banned = !i915_gem_context_is_recoverable(ctx);
- if (time_before(jiffies, prev_hang + CONTEXT_FAST_HANG_JIFFIES))
- banned = true;
- if (banned) {
- DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n",
- ctx->name, atomic_read(&ctx->guilty_count));
- i915_gem_context_set_banned(ctx);
- }
-
- if (!IS_ERR_OR_NULL(ctx->file_priv))
- client_mark_guilty(ctx->file_priv, ctx);
-
- return banned;
-}
-
-static void context_mark_innocent(struct i915_gem_context *ctx)
-{
- atomic_inc(&ctx->active_count);
-}
-
-void i915_reset_request(struct i915_request *rq, bool guilty)
-{
- GEM_TRACE("%s rq=%llx:%lld, guilty? %s\n",
- rq->engine->name,
- rq->fence.context,
- rq->fence.seqno,
- yesno(guilty));
-
- lockdep_assert_held(&rq->engine->timeline.lock);
- GEM_BUG_ON(i915_request_completed(rq));
-
- if (guilty) {
- i915_request_skip(rq, -EIO);
- if (context_mark_guilty(rq->gem_context))
- engine_skip_context(rq);
- } else {
- dma_fence_set_error(&rq->fence, -EAGAIN);
- context_mark_innocent(rq->gem_context);
- }
-}
-
-static void gen3_stop_engine(struct intel_engine_cs *engine)
-{
- struct intel_uncore *uncore = engine->uncore;
- const u32 base = engine->mmio_base;
-
- GEM_TRACE("%s\n", engine->name);
-
- if (intel_engine_stop_cs(engine))
- GEM_TRACE("%s: timed out on STOP_RING\n", engine->name);
-
- intel_uncore_write_fw(uncore,
- RING_HEAD(base),
- intel_uncore_read_fw(uncore, RING_TAIL(base)));
- intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */
-
- intel_uncore_write_fw(uncore, RING_HEAD(base), 0);
- intel_uncore_write_fw(uncore, RING_TAIL(base), 0);
- intel_uncore_posting_read_fw(uncore, RING_TAIL(base));
-
- /* The ring must be empty before it is disabled */
- intel_uncore_write_fw(uncore, RING_CTL(base), 0);
-
- /* Check acts as a post */
- if (intel_uncore_read_fw(uncore, RING_HEAD(base)))
- GEM_TRACE("%s: ring head [%x] not parked\n",
- engine->name,
- intel_uncore_read_fw(uncore, RING_HEAD(base)));
-}
-
-static void i915_stop_engines(struct drm_i915_private *i915,
- intel_engine_mask_t engine_mask)
-{
- struct intel_engine_cs *engine;
- intel_engine_mask_t tmp;
-
- if (INTEL_GEN(i915) < 3)
- return;
-
- for_each_engine_masked(engine, i915, engine_mask, tmp)
- gen3_stop_engine(engine);
-}
-
-static bool i915_in_reset(struct pci_dev *pdev)
-{
- u8 gdrst;
-
- pci_read_config_byte(pdev, I915_GDRST, &gdrst);
- return gdrst & GRDOM_RESET_STATUS;
-}
-
-static int i915_do_reset(struct drm_i915_private *i915,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
-{
- struct pci_dev *pdev = i915->drm.pdev;
- int err;
-
- /* Assert reset for at least 20 usec, and wait for acknowledgement. */
- pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
- udelay(50);
- err = wait_for_atomic(i915_in_reset(pdev), 50);
-
- /* Clear the reset request. */
- pci_write_config_byte(pdev, I915_GDRST, 0);
- udelay(50);
- if (!err)
- err = wait_for_atomic(!i915_in_reset(pdev), 50);
-
- return err;
-}
-
-static bool g4x_reset_complete(struct pci_dev *pdev)
-{
- u8 gdrst;
-
- pci_read_config_byte(pdev, I915_GDRST, &gdrst);
- return (gdrst & GRDOM_RESET_ENABLE) == 0;
-}
-
-static int g33_do_reset(struct drm_i915_private *i915,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
-{
- struct pci_dev *pdev = i915->drm.pdev;
-
- pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
- return wait_for_atomic(g4x_reset_complete(pdev), 50);
-}
-
-static int g4x_do_reset(struct drm_i915_private *i915,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
-{
- struct pci_dev *pdev = i915->drm.pdev;
- struct intel_uncore *uncore = &i915->uncore;
- int ret;
-
- /* WaVcpClkGateDisableForMediaReset:ctg,elk */
- rmw_set_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE);
- intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D);
-
- pci_write_config_byte(pdev, I915_GDRST,
- GRDOM_MEDIA | GRDOM_RESET_ENABLE);
- ret = wait_for_atomic(g4x_reset_complete(pdev), 50);
- if (ret) {
- DRM_DEBUG_DRIVER("Wait for media reset failed\n");
- goto out;
- }
-
- pci_write_config_byte(pdev, I915_GDRST,
- GRDOM_RENDER | GRDOM_RESET_ENABLE);
- ret = wait_for_atomic(g4x_reset_complete(pdev), 50);
- if (ret) {
- DRM_DEBUG_DRIVER("Wait for render reset failed\n");
- goto out;
- }
-
-out:
- pci_write_config_byte(pdev, I915_GDRST, 0);
-
- rmw_clear_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE);
- intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D);
-
- return ret;
-}
-
-static int ironlake_do_reset(struct drm_i915_private *i915,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
-{
- struct intel_uncore *uncore = &i915->uncore;
- int ret;
-
- intel_uncore_write_fw(uncore, ILK_GDSR,
- ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
- ret = __intel_wait_for_register_fw(uncore, ILK_GDSR,
- ILK_GRDOM_RESET_ENABLE, 0,
- 5000, 0,
- NULL);
- if (ret) {
- DRM_DEBUG_DRIVER("Wait for render reset failed\n");
- goto out;
- }
-
- intel_uncore_write_fw(uncore, ILK_GDSR,
- ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
- ret = __intel_wait_for_register_fw(uncore, ILK_GDSR,
- ILK_GRDOM_RESET_ENABLE, 0,
- 5000, 0,
- NULL);
- if (ret) {
- DRM_DEBUG_DRIVER("Wait for media reset failed\n");
- goto out;
- }
-
-out:
- intel_uncore_write_fw(uncore, ILK_GDSR, 0);
- intel_uncore_posting_read_fw(uncore, ILK_GDSR);
- return ret;
-}
-
-/* Reset the hardware domains (GENX_GRDOM_*) specified by mask */
-static int gen6_hw_domain_reset(struct drm_i915_private *i915,
- u32 hw_domain_mask)
-{
- struct intel_uncore *uncore = &i915->uncore;
- int err;
-
- /*
- * GEN6_GDRST is not in the gt power well, no need to check
- * for fifo space for the write or forcewake the chip for
- * the read
- */
- intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
-
- /* Wait for the device to ack the reset requests */
- err = __intel_wait_for_register_fw(uncore,
- GEN6_GDRST, hw_domain_mask, 0,
- 500, 0,
- NULL);
- if (err)
- DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n",
- hw_domain_mask);
-
- return err;
-}
-
-static int gen6_reset_engines(struct drm_i915_private *i915,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
-{
- struct intel_engine_cs *engine;
- const u32 hw_engine_mask[] = {
- [RCS0] = GEN6_GRDOM_RENDER,
- [BCS0] = GEN6_GRDOM_BLT,
- [VCS0] = GEN6_GRDOM_MEDIA,
- [VCS1] = GEN8_GRDOM_MEDIA2,
- [VECS0] = GEN6_GRDOM_VECS,
- };
- u32 hw_mask;
-
- if (engine_mask == ALL_ENGINES) {
- hw_mask = GEN6_GRDOM_FULL;
- } else {
- intel_engine_mask_t tmp;
-
- hw_mask = 0;
- for_each_engine_masked(engine, i915, engine_mask, tmp) {
- GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
- hw_mask |= hw_engine_mask[engine->id];
- }
- }
-
- return gen6_hw_domain_reset(i915, hw_mask);
-}
-
-static u32 gen11_lock_sfc(struct intel_engine_cs *engine)
-{
- struct intel_uncore *uncore = engine->uncore;
- u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access;
- i915_reg_t sfc_forced_lock, sfc_forced_lock_ack;
- u32 sfc_forced_lock_bit, sfc_forced_lock_ack_bit;
- i915_reg_t sfc_usage;
- u32 sfc_usage_bit;
- u32 sfc_reset_bit;
-
- switch (engine->class) {
- case VIDEO_DECODE_CLASS:
- if ((BIT(engine->instance) & vdbox_sfc_access) == 0)
- return 0;
-
- sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine);
- sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT;
-
- sfc_forced_lock_ack = GEN11_VCS_SFC_LOCK_STATUS(engine);
- sfc_forced_lock_ack_bit = GEN11_VCS_SFC_LOCK_ACK_BIT;
-
- sfc_usage = GEN11_VCS_SFC_LOCK_STATUS(engine);
- sfc_usage_bit = GEN11_VCS_SFC_USAGE_BIT;
- sfc_reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance);
- break;
-
- case VIDEO_ENHANCEMENT_CLASS:
- sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine);
- sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT;
-
- sfc_forced_lock_ack = GEN11_VECS_SFC_LOCK_ACK(engine);
- sfc_forced_lock_ack_bit = GEN11_VECS_SFC_LOCK_ACK_BIT;
-
- sfc_usage = GEN11_VECS_SFC_USAGE(engine);
- sfc_usage_bit = GEN11_VECS_SFC_USAGE_BIT;
- sfc_reset_bit = GEN11_VECS_SFC_RESET_BIT(engine->instance);
- break;
-
- default:
- return 0;
- }
-
- /*
- * Tell the engine that a software reset is going to happen. The engine
- * will then try to force lock the SFC (if currently locked, it will
- * remain so until we tell the engine it is safe to unlock; if currently
- * unlocked, it will ignore this and all new lock requests). If SFC
- * ends up being locked to the engine we want to reset, we have to reset
- * it as well (we will unlock it once the reset sequence is completed).
- */
- rmw_set_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit);
-
- if (__intel_wait_for_register_fw(uncore,
- sfc_forced_lock_ack,
- sfc_forced_lock_ack_bit,
- sfc_forced_lock_ack_bit,
- 1000, 0, NULL)) {
- DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n");
- return 0;
- }
-
- if (intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)
- return sfc_reset_bit;
-
- return 0;
-}
-
-static void gen11_unlock_sfc(struct intel_engine_cs *engine)
-{
- struct intel_uncore *uncore = engine->uncore;
- u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access;
- i915_reg_t sfc_forced_lock;
- u32 sfc_forced_lock_bit;
-
- switch (engine->class) {
- case VIDEO_DECODE_CLASS:
- if ((BIT(engine->instance) & vdbox_sfc_access) == 0)
- return;
-
- sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine);
- sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT;
- break;
-
- case VIDEO_ENHANCEMENT_CLASS:
- sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine);
- sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT;
- break;
-
- default:
- return;
- }
-
- rmw_clear_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit);
-}
-
-static int gen11_reset_engines(struct drm_i915_private *i915,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
-{
- const u32 hw_engine_mask[] = {
- [RCS0] = GEN11_GRDOM_RENDER,
- [BCS0] = GEN11_GRDOM_BLT,
- [VCS0] = GEN11_GRDOM_MEDIA,
- [VCS1] = GEN11_GRDOM_MEDIA2,
- [VCS2] = GEN11_GRDOM_MEDIA3,
- [VCS3] = GEN11_GRDOM_MEDIA4,
- [VECS0] = GEN11_GRDOM_VECS,
- [VECS1] = GEN11_GRDOM_VECS2,
- };
- struct intel_engine_cs *engine;
- intel_engine_mask_t tmp;
- u32 hw_mask;
- int ret;
-
- if (engine_mask == ALL_ENGINES) {
- hw_mask = GEN11_GRDOM_FULL;
- } else {
- hw_mask = 0;
- for_each_engine_masked(engine, i915, engine_mask, tmp) {
- GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
- hw_mask |= hw_engine_mask[engine->id];
- hw_mask |= gen11_lock_sfc(engine);
- }
- }
-
- ret = gen6_hw_domain_reset(i915, hw_mask);
-
- if (engine_mask != ALL_ENGINES)
- for_each_engine_masked(engine, i915, engine_mask, tmp)
- gen11_unlock_sfc(engine);
-
- return ret;
-}
-
-static int gen8_engine_reset_prepare(struct intel_engine_cs *engine)
-{
- struct intel_uncore *uncore = engine->uncore;
- const i915_reg_t reg = RING_RESET_CTL(engine->mmio_base);
- u32 request, mask, ack;
- int ret;
-
- ack = intel_uncore_read_fw(uncore, reg);
- if (ack & RESET_CTL_CAT_ERROR) {
- /*
- * For catastrophic errors, ready-for-reset sequence
- * needs to be bypassed: HAS#396813
- */
- request = RESET_CTL_CAT_ERROR;
- mask = RESET_CTL_CAT_ERROR;
-
- /* Catastrophic errors need to be cleared by HW */
- ack = 0;
- } else if (!(ack & RESET_CTL_READY_TO_RESET)) {
- request = RESET_CTL_REQUEST_RESET;
- mask = RESET_CTL_READY_TO_RESET;
- ack = RESET_CTL_READY_TO_RESET;
- } else {
- return 0;
- }
-
- intel_uncore_write_fw(uncore, reg, _MASKED_BIT_ENABLE(request));
- ret = __intel_wait_for_register_fw(uncore, reg, mask, ack,
- 700, 0, NULL);
- if (ret)
- DRM_ERROR("%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n",
- engine->name, request,
- intel_uncore_read_fw(uncore, reg));
-
- return ret;
-}
-
-static void gen8_engine_reset_cancel(struct intel_engine_cs *engine)
-{
- intel_uncore_write_fw(engine->uncore,
- RING_RESET_CTL(engine->mmio_base),
- _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET));
-}
-
-static int gen8_reset_engines(struct drm_i915_private *i915,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
-{
- struct intel_engine_cs *engine;
- const bool reset_non_ready = retry >= 1;
- intel_engine_mask_t tmp;
- int ret;
-
- for_each_engine_masked(engine, i915, engine_mask, tmp) {
- ret = gen8_engine_reset_prepare(engine);
- if (ret && !reset_non_ready)
- goto skip_reset;
-
- /*
- * If this is not the first failed attempt to prepare,
- * we decide to proceed anyway.
- *
- * By doing so we risk context corruption and with
- * some gens (kbl), possible system hang if reset
- * happens during active bb execution.
- *
- * We rather take context corruption instead of
- * failed reset with a wedged driver/gpu. And
- * active bb execution case should be covered by
- * i915_stop_engines we have before the reset.
- */
- }
-
- if (INTEL_GEN(i915) >= 11)
- ret = gen11_reset_engines(i915, engine_mask, retry);
- else
- ret = gen6_reset_engines(i915, engine_mask, retry);
-
-skip_reset:
- for_each_engine_masked(engine, i915, engine_mask, tmp)
- gen8_engine_reset_cancel(engine);
-
- return ret;
-}
-
-typedef int (*reset_func)(struct drm_i915_private *,
- intel_engine_mask_t engine_mask,
- unsigned int retry);
-
-static reset_func intel_get_gpu_reset(struct drm_i915_private *i915)
-{
- if (INTEL_GEN(i915) >= 8)
- return gen8_reset_engines;
- else if (INTEL_GEN(i915) >= 6)
- return gen6_reset_engines;
- else if (INTEL_GEN(i915) >= 5)
- return ironlake_do_reset;
- else if (IS_G4X(i915))
- return g4x_do_reset;
- else if (IS_G33(i915) || IS_PINEVIEW(i915))
- return g33_do_reset;
- else if (INTEL_GEN(i915) >= 3)
- return i915_do_reset;
- else
- return NULL;
-}
-
-int intel_gpu_reset(struct drm_i915_private *i915,
- intel_engine_mask_t engine_mask)
-{
- const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1;
- reset_func reset;
- int ret = -ETIMEDOUT;
- int retry;
-
- reset = intel_get_gpu_reset(i915);
- if (!reset)
- return -ENODEV;
-
- /*
- * If the power well sleeps during the reset, the reset
- * request may be dropped and never completes (causing -EIO).
- */
- intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
- for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) {
- /*
- * We stop engines, otherwise we might get failed reset and a
- * dead gpu (on elk). Also as modern gpu as kbl can suffer
- * from system hang if batchbuffer is progressing when
- * the reset is issued, regardless of READY_TO_RESET ack.
- * Thus assume it is best to stop engines on all gens
- * where we have a gpu reset.
- *
- * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
- *
- * WaMediaResetMainRingCleanup:ctg,elk (presumably)
- *
- * FIXME: Wa for more modern gens needs to be validated
- */
- if (retry)
- i915_stop_engines(i915, engine_mask);
-
- GEM_TRACE("engine_mask=%x\n", engine_mask);
- preempt_disable();
- ret = reset(i915, engine_mask, retry);
- preempt_enable();
- }
- intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
-
- return ret;
-}
-
-bool intel_has_gpu_reset(struct drm_i915_private *i915)
-{
- if (!i915_modparams.reset)
- return NULL;
-
- return intel_get_gpu_reset(i915);
-}
-
-bool intel_has_reset_engine(struct drm_i915_private *i915)
-{
- return INTEL_INFO(i915)->has_reset_engine && i915_modparams.reset >= 2;
-}
-
-int intel_reset_guc(struct drm_i915_private *i915)
-{
- u32 guc_domain =
- INTEL_GEN(i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC;
- int ret;
-
- GEM_BUG_ON(!HAS_GUC(i915));
-
- intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
- ret = gen6_hw_domain_reset(i915, guc_domain);
- intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
-
- return ret;
-}
-
-/*
- * Ensure irq handler finishes, and not run again.
- * Also return the active request so that we only search for it once.
- */
-static void reset_prepare_engine(struct intel_engine_cs *engine)
-{
- /*
- * During the reset sequence, we must prevent the engine from
- * entering RC6. As the context state is undefined until we restart
- * the engine, if it does enter RC6 during the reset, the state
- * written to the powercontext is undefined and so we may lose
- * GPU state upon resume, i.e. fail to restart after a reset.
- */
- intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
- engine->reset.prepare(engine);
-}
-
-static void revoke_mmaps(struct drm_i915_private *i915)
-{
- int i;
-
- for (i = 0; i < i915->num_fence_regs; i++) {
- struct drm_vma_offset_node *node;
- struct i915_vma *vma;
- u64 vma_offset;
-
- vma = READ_ONCE(i915->fence_regs[i].vma);
- if (!vma)
- continue;
-
- if (!i915_vma_has_userfault(vma))
- continue;
-
- GEM_BUG_ON(vma->fence != &i915->fence_regs[i]);
- node = &vma->obj->base.vma_node;
- vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT;
- unmap_mapping_range(i915->drm.anon_inode->i_mapping,
- drm_vma_node_offset_addr(node) + vma_offset,
- vma->size,
- 1);
- }
-}
-
-static void reset_prepare(struct drm_i915_private *i915)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- for_each_engine(engine, i915, id)
- reset_prepare_engine(engine);
-
- intel_uc_reset_prepare(i915);
-}
-
-static void gt_revoke(struct drm_i915_private *i915)
-{
- revoke_mmaps(i915);
-}
-
-static int gt_reset(struct drm_i915_private *i915,
- intel_engine_mask_t stalled_mask)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int err;
-
- /*
- * Everything depends on having the GTT running, so we need to start
- * there.
- */
- err = i915_ggtt_enable_hw(i915);
- if (err)
- return err;
-
- for_each_engine(engine, i915, id)
- intel_engine_reset(engine, stalled_mask & engine->mask);
-
- i915_gem_restore_fences(i915);
-
- return err;
-}
-
-static void reset_finish_engine(struct intel_engine_cs *engine)
-{
- engine->reset.finish(engine);
- intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
-}
-
-struct i915_gpu_restart {
- struct work_struct work;
- struct drm_i915_private *i915;
-};
-
-static void restart_work(struct work_struct *work)
-{
- struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work);
- struct drm_i915_private *i915 = arg->i915;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- intel_wakeref_t wakeref;
-
- wakeref = intel_runtime_pm_get(i915);
- mutex_lock(&i915->drm.struct_mutex);
- WRITE_ONCE(i915->gpu_error.restart, NULL);
-
- for_each_engine(engine, i915, id) {
- struct i915_request *rq;
-
- /*
- * Ostensibily, we always want a context loaded for powersaving,
- * so if the engine is idle after the reset, send a request
- * to load our scratch kernel_context.
- */
- if (!intel_engine_is_idle(engine))
- continue;
-
- rq = i915_request_alloc(engine, i915->kernel_context);
- if (!IS_ERR(rq))
- i915_request_add(rq);
- }
-
- mutex_unlock(&i915->drm.struct_mutex);
- intel_runtime_pm_put(i915, wakeref);
-
- kfree(arg);
-}
-
-static void reset_finish(struct drm_i915_private *i915)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- for_each_engine(engine, i915, id) {
- reset_finish_engine(engine);
- intel_engine_signal_breadcrumbs(engine);
- }
-}
-
-static void reset_restart(struct drm_i915_private *i915)
-{
- struct i915_gpu_restart *arg;
-
- /*
- * Following the reset, ensure that we always reload context for
- * powersaving, and to correct engine->last_retired_context. Since
- * this requires us to submit a request, queue a worker to do that
- * task for us to evade any locking here.
- */
- if (READ_ONCE(i915->gpu_error.restart))
- return;
-
- arg = kmalloc(sizeof(*arg), GFP_KERNEL);
- if (arg) {
- arg->i915 = i915;
- INIT_WORK(&arg->work, restart_work);
-
- WRITE_ONCE(i915->gpu_error.restart, arg);
- queue_work(i915->wq, &arg->work);
- }
-}
-
-static void nop_submit_request(struct i915_request *request)
-{
- struct intel_engine_cs *engine = request->engine;
- unsigned long flags;
-
- GEM_TRACE("%s fence %llx:%lld -> -EIO\n",
- engine->name, request->fence.context, request->fence.seqno);
- dma_fence_set_error(&request->fence, -EIO);
-
- spin_lock_irqsave(&engine->timeline.lock, flags);
- __i915_request_submit(request);
- i915_request_mark_complete(request);
- spin_unlock_irqrestore(&engine->timeline.lock, flags);
-
- intel_engine_queue_breadcrumbs(engine);
-}
-
-static void __i915_gem_set_wedged(struct drm_i915_private *i915)
-{
- struct i915_gpu_error *error = &i915->gpu_error;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- if (test_bit(I915_WEDGED, &error->flags))
- return;
-
- if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) {
- struct drm_printer p = drm_debug_printer(__func__);
-
- for_each_engine(engine, i915, id)
- intel_engine_dump(engine, &p, "%s\n", engine->name);
- }
-
- GEM_TRACE("start\n");
-
- /*
- * First, stop submission to hw, but do not yet complete requests by
- * rolling the global seqno forward (since this would complete requests
- * for which we haven't set the fence error to EIO yet).
- */
- reset_prepare(i915);
-
- /* Even if the GPU reset fails, it should still stop the engines */
- if (!INTEL_INFO(i915)->gpu_reset_clobbers_display)
- intel_gpu_reset(i915, ALL_ENGINES);
-
- for_each_engine(engine, i915, id) {
- engine->submit_request = nop_submit_request;
- engine->schedule = NULL;
- }
- i915->caps.scheduler = 0;
-
- /*
- * Make sure no request can slip through without getting completed by
- * either this call here to intel_engine_write_global_seqno, or the one
- * in nop_submit_request.
- */
- synchronize_rcu_expedited();
-
- /* Mark all executing requests as skipped */
- for_each_engine(engine, i915, id)
- engine->cancel_requests(engine);
-
- reset_finish(i915);
-
- smp_mb__before_atomic();
- set_bit(I915_WEDGED, &error->flags);
-
- GEM_TRACE("end\n");
-}
-
-void i915_gem_set_wedged(struct drm_i915_private *i915)
-{
- struct i915_gpu_error *error = &i915->gpu_error;
- intel_wakeref_t wakeref;
-
- mutex_lock(&error->wedge_mutex);
- with_intel_runtime_pm(i915, wakeref)
- __i915_gem_set_wedged(i915);
- mutex_unlock(&error->wedge_mutex);
-}
-
-static bool __i915_gem_unset_wedged(struct drm_i915_private *i915)
-{
- struct i915_gpu_error *error = &i915->gpu_error;
- struct i915_timeline *tl;
-
- if (!test_bit(I915_WEDGED, &error->flags))
- return true;
-
- if (!i915->gt.scratch) /* Never full initialised, recovery impossible */
- return false;
-
- GEM_TRACE("start\n");
-
- /*
- * Before unwedging, make sure that all pending operations
- * are flushed and errored out - we may have requests waiting upon
- * third party fences. We marked all inflight requests as EIO, and
- * every execbuf since returned EIO, for consistency we want all
- * the currently pending requests to also be marked as EIO, which
- * is done inside our nop_submit_request - and so we must wait.
- *
- * No more can be submitted until we reset the wedged bit.
- */
- mutex_lock(&i915->gt.timelines.mutex);
- list_for_each_entry(tl, &i915->gt.timelines.active_list, link) {
- struct i915_request *rq;
-
- rq = i915_active_request_get_unlocked(&tl->last_request);
- if (!rq)
- continue;
-
- /*
- * All internal dependencies (i915_requests) will have
- * been flushed by the set-wedge, but we may be stuck waiting
- * for external fences. These should all be capped to 10s
- * (I915_FENCE_TIMEOUT) so this wait should not be unbounded
- * in the worst case.
- */
- dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT);
- i915_request_put(rq);
- }
- mutex_unlock(&i915->gt.timelines.mutex);
-
- intel_engines_sanitize(i915, false);
-
- /*
- * Undo nop_submit_request. We prevent all new i915 requests from
- * being queued (by disallowing execbuf whilst wedged) so having
- * waited for all active requests above, we know the system is idle
- * and do not have to worry about a thread being inside
- * engine->submit_request() as we swap over. So unlike installing
- * the nop_submit_request on reset, we can do this from normal
- * context and do not require stop_machine().
- */
- intel_engines_reset_default_submission(i915);
-
- GEM_TRACE("end\n");
-
- smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
- clear_bit(I915_WEDGED, &i915->gpu_error.flags);
-
- return true;
-}
-
-bool i915_gem_unset_wedged(struct drm_i915_private *i915)
-{
- struct i915_gpu_error *error = &i915->gpu_error;
- bool result;
-
- mutex_lock(&error->wedge_mutex);
- result = __i915_gem_unset_wedged(i915);
- mutex_unlock(&error->wedge_mutex);
-
- return result;
-}
-
-static int do_reset(struct drm_i915_private *i915,
- intel_engine_mask_t stalled_mask)
-{
- int err, i;
-
- gt_revoke(i915);
-
- err = intel_gpu_reset(i915, ALL_ENGINES);
- for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
- msleep(10 * (i + 1));
- err = intel_gpu_reset(i915, ALL_ENGINES);
- }
- if (err)
- return err;
-
- return gt_reset(i915, stalled_mask);
-}
-
-/**
- * i915_reset - reset chip after a hang
- * @i915: #drm_i915_private to reset
- * @stalled_mask: mask of the stalled engines with the guilty requests
- * @reason: user error message for why we are resetting
- *
- * Reset the chip. Useful if a hang is detected. Marks the device as wedged
- * on failure.
- *
- * Procedure is fairly simple:
- * - reset the chip using the reset reg
- * - re-init context state
- * - re-init hardware status page
- * - re-init ring buffer
- * - re-init interrupt state
- * - re-init display
- */
-void i915_reset(struct drm_i915_private *i915,
- intel_engine_mask_t stalled_mask,
- const char *reason)
-{
- struct i915_gpu_error *error = &i915->gpu_error;
- int ret;
-
- GEM_TRACE("flags=%lx\n", error->flags);
-
- might_sleep();
- assert_rpm_wakelock_held(i915);
- GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
-
- /* Clear any previous failed attempts at recovery. Time to try again. */
- if (!__i915_gem_unset_wedged(i915))
- return;
-
- if (reason)
- dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
- error->reset_count++;
-
- reset_prepare(i915);
-
- if (!intel_has_gpu_reset(i915)) {
- if (i915_modparams.reset)
- dev_err(i915->drm.dev, "GPU reset not supported\n");
- else
- DRM_DEBUG_DRIVER("GPU reset disabled\n");
- goto error;
- }
-
- if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
- intel_runtime_pm_disable_interrupts(i915);
-
- if (do_reset(i915, stalled_mask)) {
- dev_err(i915->drm.dev, "Failed to reset chip\n");
- goto taint;
- }
-
- if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
- intel_runtime_pm_enable_interrupts(i915);
-
- intel_overlay_reset(i915);
-
- /*
- * Next we need to restore the context, but we don't use those
- * yet either...
- *
- * Ring buffer needs to be re-initialized in the KMS case, or if X
- * was running at the time of the reset (i.e. we weren't VT
- * switched away).
- */
- ret = i915_gem_init_hw(i915);
- if (ret) {
- DRM_ERROR("Failed to initialise HW following reset (%d)\n",
- ret);
- goto error;
- }
-
- i915_queue_hangcheck(i915);
-
-finish:
- reset_finish(i915);
- if (!__i915_wedged(error))
- reset_restart(i915);
- return;
-
-taint:
- /*
- * History tells us that if we cannot reset the GPU now, we
- * never will. This then impacts everything that is run
- * subsequently. On failing the reset, we mark the driver
- * as wedged, preventing further execution on the GPU.
- * We also want to go one step further and add a taint to the
- * kernel so that any subsequent faults can be traced back to
- * this failure. This is important for CI, where if the
- * GPU/driver fails we would like to reboot and restart testing
- * rather than continue on into oblivion. For everyone else,
- * the system should still plod along, but they have been warned!
- */
- add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-error:
- __i915_gem_set_wedged(i915);
- goto finish;
-}
-
-static inline int intel_gt_reset_engine(struct drm_i915_private *i915,
- struct intel_engine_cs *engine)
-{
- return intel_gpu_reset(i915, engine->mask);
-}
-
-/**
- * i915_reset_engine - reset GPU engine to recover from a hang
- * @engine: engine to reset
- * @msg: reason for GPU reset; or NULL for no dev_notice()
- *
- * Reset a specific GPU engine. Useful if a hang is detected.
- * Returns zero on successful reset or otherwise an error code.
- *
- * Procedure is:
- * - identifies the request that caused the hang and it is dropped
- * - reset engine (which will force the engine to idle)
- * - re-init/configure engine
- */
-int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
-{
- struct i915_gpu_error *error = &engine->i915->gpu_error;
- int ret;
-
- GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
- GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
-
- reset_prepare_engine(engine);
-
- if (msg)
- dev_notice(engine->i915->drm.dev,
- "Resetting %s for %s\n", engine->name, msg);
- error->reset_engine_count[engine->id]++;
-
- if (!engine->i915->guc.execbuf_client)
- ret = intel_gt_reset_engine(engine->i915, engine);
- else
- ret = intel_guc_reset_engine(&engine->i915->guc, engine);
- if (ret) {
- /* If we fail here, we expect to fallback to a global reset */
- DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
- engine->i915->guc.execbuf_client ? "GuC " : "",
- engine->name, ret);
- goto out;
- }
-
- /*
- * The request that caused the hang is stuck on elsp, we know the
- * active request and can drop it, adjust head to skip the offending
- * request to resume executing remaining requests in the queue.
- */
- intel_engine_reset(engine, true);
-
- /*
- * The engine and its registers (and workarounds in case of render)
- * have been reset to their default values. Follow the init_ring
- * process to program RING_MODE, HWSP and re-enable submission.
- */
- ret = engine->init_hw(engine);
- if (ret)
- goto out;
-
-out:
- intel_engine_cancel_stop_cs(engine);
- reset_finish_engine(engine);
- return ret;
-}
-
-static void i915_reset_device(struct drm_i915_private *i915,
- u32 engine_mask,
- const char *reason)
-{
- struct i915_gpu_error *error = &i915->gpu_error;
- struct kobject *kobj = &i915->drm.primary->kdev->kobj;
- char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
- char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
- char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
- struct i915_wedge_me w;
-
- kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
-
- DRM_DEBUG_DRIVER("resetting chip\n");
- kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
-
- /* Use a watchdog to ensure that our reset completes */
- i915_wedge_on_timeout(&w, i915, 5 * HZ) {
- intel_prepare_reset(i915);
-
- /* Flush everyone using a resource about to be clobbered */
- synchronize_srcu_expedited(&error->reset_backoff_srcu);
-
- mutex_lock(&error->wedge_mutex);
- i915_reset(i915, engine_mask, reason);
- mutex_unlock(&error->wedge_mutex);
-
- intel_finish_reset(i915);
- }
-
- if (!test_bit(I915_WEDGED, &error->flags))
- kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event);
-}
-
-static void clear_register(struct intel_uncore *uncore, i915_reg_t reg)
-{
- intel_uncore_rmw(uncore, reg, 0, 0);
-}
-
-void i915_clear_error_registers(struct drm_i915_private *i915)
-{
- struct intel_uncore *uncore = &i915->uncore;
- u32 eir;
-
- if (!IS_GEN(i915, 2))
- clear_register(uncore, PGTBL_ER);
-
- if (INTEL_GEN(i915) < 4)
- clear_register(uncore, IPEIR(RENDER_RING_BASE));
- else
- clear_register(uncore, IPEIR_I965);
-
- clear_register(uncore, EIR);
- eir = intel_uncore_read(uncore, EIR);
- if (eir) {
- /*
- * some errors might have become stuck,
- * mask them.
- */
- DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
- rmw_set(uncore, EMR, eir);
- intel_uncore_write(uncore, GEN2_IIR,
- I915_MASTER_ERROR_INTERRUPT);
- }
-
- if (INTEL_GEN(i915) >= 8) {
- rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID);
- intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG);
- } else if (INTEL_GEN(i915) >= 6) {
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- for_each_engine(engine, i915, id) {
- rmw_clear(uncore,
- RING_FAULT_REG(engine), RING_FAULT_VALID);
- intel_uncore_posting_read(uncore,
- RING_FAULT_REG(engine));
- }
- }
-}
-
-/**
- * i915_handle_error - handle a gpu error
- * @i915: i915 device private
- * @engine_mask: mask representing engines that are hung
- * @flags: control flags
- * @fmt: Error message format string
- *
- * Do some basic checking of register state at error time and
- * dump it to the syslog. Also call i915_capture_error_state() to make
- * sure we get a record and make it available in debugfs. Fire a uevent
- * so userspace knows something bad happened (should trigger collection
- * of a ring dump etc.).
- */
-void i915_handle_error(struct drm_i915_private *i915,
- intel_engine_mask_t engine_mask,
- unsigned long flags,
- const char *fmt, ...)
-{
- struct i915_gpu_error *error = &i915->gpu_error;
- struct intel_engine_cs *engine;
- intel_wakeref_t wakeref;
- intel_engine_mask_t tmp;
- char error_msg[80];
- char *msg = NULL;
-
- if (fmt) {
- va_list args;
-
- va_start(args, fmt);
- vscnprintf(error_msg, sizeof(error_msg), fmt, args);
- va_end(args);
-
- msg = error_msg;
- }
-
- /*
- * In most cases it's guaranteed that we get here with an RPM
- * reference held, for example because there is a pending GPU
- * request that won't finish until the reset is done. This
- * isn't the case at least when we get here by doing a
- * simulated reset via debugfs, so get an RPM reference.
- */
- wakeref = intel_runtime_pm_get(i915);
-
- engine_mask &= INTEL_INFO(i915)->engine_mask;
-
- if (flags & I915_ERROR_CAPTURE) {
- i915_capture_error_state(i915, engine_mask, msg);
- i915_clear_error_registers(i915);
- }
-
- /*
- * Try engine reset when available. We fall back to full reset if
- * single reset fails.
- */
- if (intel_has_reset_engine(i915) && !__i915_wedged(error)) {
- for_each_engine_masked(engine, i915, engine_mask, tmp) {
- BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
- if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
- &error->flags))
- continue;
-
- if (i915_reset_engine(engine, msg) == 0)
- engine_mask &= ~engine->mask;
-
- clear_bit(I915_RESET_ENGINE + engine->id,
- &error->flags);
- wake_up_bit(&error->flags,
- I915_RESET_ENGINE + engine->id);
- }
- }
-
- if (!engine_mask)
- goto out;
-
- /* Full reset needs the mutex, stop any other user trying to do so. */
- if (test_and_set_bit(I915_RESET_BACKOFF, &error->flags)) {
- wait_event(error->reset_queue,
- !test_bit(I915_RESET_BACKOFF, &error->flags));
- goto out; /* piggy-back on the other reset */
- }
-
- /* Make sure i915_reset_trylock() sees the I915_RESET_BACKOFF */
- synchronize_rcu_expedited();
-
- /* Prevent any other reset-engine attempt. */
- for_each_engine(engine, i915, tmp) {
- while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
- &error->flags))
- wait_on_bit(&error->flags,
- I915_RESET_ENGINE + engine->id,
- TASK_UNINTERRUPTIBLE);
- }
-
- i915_reset_device(i915, engine_mask, msg);
-
- for_each_engine(engine, i915, tmp) {
- clear_bit(I915_RESET_ENGINE + engine->id,
- &error->flags);
- }
-
- clear_bit(I915_RESET_BACKOFF, &error->flags);
- wake_up_all(&error->reset_queue);
-
-out:
- intel_runtime_pm_put(i915, wakeref);
-}
-
-int i915_reset_trylock(struct drm_i915_private *i915)
-{
- struct i915_gpu_error *error = &i915->gpu_error;
- int srcu;
-
- might_lock(&error->reset_backoff_srcu);
- might_sleep();
-
- rcu_read_lock();
- while (test_bit(I915_RESET_BACKOFF, &error->flags)) {
- rcu_read_unlock();
-
- if (wait_event_interruptible(error->reset_queue,
- !test_bit(I915_RESET_BACKOFF,
- &error->flags)))
- return -EINTR;
-
- rcu_read_lock();
- }
- srcu = srcu_read_lock(&error->reset_backoff_srcu);
- rcu_read_unlock();
-
- return srcu;
-}
-
-void i915_reset_unlock(struct drm_i915_private *i915, int tag)
-__releases(&i915->gpu_error.reset_backoff_srcu)
-{
- struct i915_gpu_error *error = &i915->gpu_error;
-
- srcu_read_unlock(&error->reset_backoff_srcu, tag);
-}
-
-int i915_terminally_wedged(struct drm_i915_private *i915)
-{
- struct i915_gpu_error *error = &i915->gpu_error;
-
- might_sleep();
-
- if (!__i915_wedged(error))
- return 0;
-
- /* Reset still in progress? Maybe we will recover? */
- if (!test_bit(I915_RESET_BACKOFF, &error->flags))
- return -EIO;
-
- /* XXX intel_reset_finish() still takes struct_mutex!!! */
- if (mutex_is_locked(&i915->drm.struct_mutex))
- return -EAGAIN;
-
- if (wait_event_interruptible(error->reset_queue,
- !test_bit(I915_RESET_BACKOFF,
- &error->flags)))
- return -EINTR;
-
- return __i915_wedged(error) ? -EIO : 0;
-}
-
-bool i915_reset_flush(struct drm_i915_private *i915)
-{
- int err;
-
- cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
-
- flush_workqueue(i915->wq);
- GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart));
-
- mutex_lock(&i915->drm.struct_mutex);
- err = i915_gem_wait_for_idle(i915,
- I915_WAIT_LOCKED |
- I915_WAIT_FOR_IDLE_BOOST,
- MAX_SCHEDULE_TIMEOUT);
- mutex_unlock(&i915->drm.struct_mutex);
-
- return !err;
-}
-
-static void i915_wedge_me(struct work_struct *work)
-{
- struct i915_wedge_me *w = container_of(work, typeof(*w), work.work);
-
- dev_err(w->i915->drm.dev,
- "%s timed out, cancelling all in-flight rendering.\n",
- w->name);
- i915_gem_set_wedged(w->i915);
-}
-
-void __i915_init_wedge(struct i915_wedge_me *w,
- struct drm_i915_private *i915,
- long timeout,
- const char *name)
-{
- w->i915 = i915;
- w->name = name;
-
- INIT_DELAYED_WORK_ONSTACK(&w->work, i915_wedge_me);
- schedule_delayed_work(&w->work, timeout);
-}
-
-void __i915_fini_wedge(struct i915_wedge_me *w)
-{
- cancel_delayed_work_sync(&w->work);
- destroy_delayed_work_on_stack(&w->work);
- w->i915 = NULL;
-}
+++ /dev/null
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2008-2018 Intel Corporation
- */
-
-#ifndef I915_RESET_H
-#define I915_RESET_H
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/srcu.h>
-
-#include "intel_engine_types.h"
-
-struct drm_i915_private;
-struct i915_request;
-struct intel_engine_cs;
-struct intel_guc;
-
-__printf(4, 5)
-void i915_handle_error(struct drm_i915_private *i915,
- intel_engine_mask_t engine_mask,
- unsigned long flags,
- const char *fmt, ...);
-#define I915_ERROR_CAPTURE BIT(0)
-
-void i915_clear_error_registers(struct drm_i915_private *i915);
-
-void i915_reset(struct drm_i915_private *i915,
- intel_engine_mask_t stalled_mask,
- const char *reason);
-int i915_reset_engine(struct intel_engine_cs *engine,
- const char *reason);
-
-void i915_reset_request(struct i915_request *rq, bool guilty);
-bool i915_reset_flush(struct drm_i915_private *i915);
-
-int __must_check i915_reset_trylock(struct drm_i915_private *i915);
-void i915_reset_unlock(struct drm_i915_private *i915, int tag);
-
-int i915_terminally_wedged(struct drm_i915_private *i915);
-
-bool intel_has_gpu_reset(struct drm_i915_private *i915);
-bool intel_has_reset_engine(struct drm_i915_private *i915);
-
-int intel_gpu_reset(struct drm_i915_private *i915,
- intel_engine_mask_t engine_mask);
-
-int intel_reset_guc(struct drm_i915_private *i915);
-
-struct i915_wedge_me {
- struct delayed_work work;
- struct drm_i915_private *i915;
- const char *name;
-};
-
-void __i915_init_wedge(struct i915_wedge_me *w,
- struct drm_i915_private *i915,
- long timeout,
- const char *name);
-void __i915_fini_wedge(struct i915_wedge_me *w);
-
-#define i915_wedge_on_timeout(W, DEV, TIMEOUT) \
- for (__i915_init_wedge((W), (DEV), (TIMEOUT), __func__); \
- (W)->i915; \
- __i915_fini_wedge((W)))
-
-#endif /* I915_RESET_H */
#include <linux/list.h>
+#include "gt/intel_engine_types.h"
#include "i915_priolist_types.h"
-#include "intel_engine_types.h"
struct drm_i915_private;
struct i915_request;
#include <drm/drm_drv.h>
+#include "gt/intel_engine.h"
+
#include "i915_drv.h"
#include "intel_drv.h"
-#include "intel_ringbuffer.h"
#undef TRACE_SYSTEM
#define TRACE_SYSTEM i915
*
*/
+#include "gt/intel_engine.h"
+
#include "i915_vma.h"
#include "i915_drv.h"
#include "i915_globals.h"
-#include "intel_ringbuffer.h"
#include "intel_frontbuffer.h"
#include <drm/drm_gem.h>
+++ /dev/null
-/*
- * Copyright © 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#include <linux/kthread.h>
-#include <uapi/linux/sched/types.h>
-
-#include "i915_drv.h"
-
-static void irq_enable(struct intel_engine_cs *engine)
-{
- if (!engine->irq_enable)
- return;
-
- /* Caller disables interrupts */
- spin_lock(&engine->i915->irq_lock);
- engine->irq_enable(engine);
- spin_unlock(&engine->i915->irq_lock);
-}
-
-static void irq_disable(struct intel_engine_cs *engine)
-{
- if (!engine->irq_disable)
- return;
-
- /* Caller disables interrupts */
- spin_lock(&engine->i915->irq_lock);
- engine->irq_disable(engine);
- spin_unlock(&engine->i915->irq_lock);
-}
-
-static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
-{
- lockdep_assert_held(&b->irq_lock);
-
- GEM_BUG_ON(!b->irq_enabled);
- if (!--b->irq_enabled)
- irq_disable(container_of(b,
- struct intel_engine_cs,
- breadcrumbs));
-
- b->irq_armed = false;
-}
-
-void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
- if (!b->irq_armed)
- return;
-
- spin_lock_irq(&b->irq_lock);
- if (b->irq_armed)
- __intel_breadcrumbs_disarm_irq(b);
- spin_unlock_irq(&b->irq_lock);
-}
-
-static inline bool __request_completed(const struct i915_request *rq)
-{
- return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
-}
-
-void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
- struct intel_context *ce, *cn;
- struct list_head *pos, *next;
- LIST_HEAD(signal);
-
- spin_lock(&b->irq_lock);
-
- if (b->irq_armed && list_empty(&b->signalers))
- __intel_breadcrumbs_disarm_irq(b);
-
- list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
- GEM_BUG_ON(list_empty(&ce->signals));
-
- list_for_each_safe(pos, next, &ce->signals) {
- struct i915_request *rq =
- list_entry(pos, typeof(*rq), signal_link);
-
- if (!__request_completed(rq))
- break;
-
- GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
- &rq->fence.flags));
-
- /*
- * Queue for execution after dropping the signaling
- * spinlock as the callback chain may end up adding
- * more signalers to the same context or engine.
- */
- i915_request_get(rq);
-
- /*
- * We may race with direct invocation of
- * dma_fence_signal(), e.g. i915_request_retire(),
- * so we need to acquire our reference to the request
- * before we cancel the breadcrumb.
- */
- clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
- list_add_tail(&rq->signal_link, &signal);
- }
-
- /*
- * We process the list deletion in bulk, only using a list_add
- * (not list_move) above but keeping the status of
- * rq->signal_link known with the I915_FENCE_FLAG_SIGNAL bit.
- */
- if (!list_is_first(pos, &ce->signals)) {
- /* Advance the list to the first incomplete request */
- __list_del_many(&ce->signals, pos);
- if (&ce->signals == pos) /* now empty */
- list_del_init(&ce->signal_link);
- }
- }
-
- spin_unlock(&b->irq_lock);
-
- list_for_each_safe(pos, next, &signal) {
- struct i915_request *rq =
- list_entry(pos, typeof(*rq), signal_link);
-
- dma_fence_signal(&rq->fence);
- i915_request_put(rq);
- }
-}
-
-void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
-{
- local_irq_disable();
- intel_engine_breadcrumbs_irq(engine);
- local_irq_enable();
-}
-
-static void signal_irq_work(struct irq_work *work)
-{
- struct intel_engine_cs *engine =
- container_of(work, typeof(*engine), breadcrumbs.irq_work);
-
- intel_engine_breadcrumbs_irq(engine);
-}
-
-void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
- spin_lock_irq(&b->irq_lock);
- if (!b->irq_enabled++)
- irq_enable(engine);
- GEM_BUG_ON(!b->irq_enabled); /* no overflow! */
- spin_unlock_irq(&b->irq_lock);
-}
-
-void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
- spin_lock_irq(&b->irq_lock);
- GEM_BUG_ON(!b->irq_enabled); /* no underflow! */
- if (!--b->irq_enabled)
- irq_disable(engine);
- spin_unlock_irq(&b->irq_lock);
-}
-
-static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
-{
- struct intel_engine_cs *engine =
- container_of(b, struct intel_engine_cs, breadcrumbs);
-
- lockdep_assert_held(&b->irq_lock);
- if (b->irq_armed)
- return;
-
- /*
- * The breadcrumb irq will be disarmed on the interrupt after the
- * waiters are signaled. This gives us a single interrupt window in
- * which we can add a new waiter and avoid the cost of re-enabling
- * the irq.
- */
- b->irq_armed = true;
-
- /*
- * Since we are waiting on a request, the GPU should be busy
- * and should have its own rpm reference. This is tracked
- * by i915->gt.awake, we can forgo holding our own wakref
- * for the interrupt as before i915->gt.awake is released (when
- * the driver is idle) we disarm the breadcrumbs.
- */
-
- if (!b->irq_enabled++)
- irq_enable(engine);
-}
-
-void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
- spin_lock_init(&b->irq_lock);
- INIT_LIST_HEAD(&b->signalers);
-
- init_irq_work(&b->irq_work, signal_irq_work);
-}
-
-void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
- unsigned long flags;
-
- spin_lock_irqsave(&b->irq_lock, flags);
-
- if (b->irq_enabled)
- irq_enable(engine);
- else
- irq_disable(engine);
-
- spin_unlock_irqrestore(&b->irq_lock, flags);
-}
-
-void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
-{
-}
-
-bool i915_request_enable_breadcrumb(struct i915_request *rq)
-{
- struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
-
- GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
-
- if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
- return true;
-
- spin_lock(&b->irq_lock);
- if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags) &&
- !__request_completed(rq)) {
- struct intel_context *ce = rq->hw_context;
- struct list_head *pos;
-
- __intel_breadcrumbs_arm_irq(b);
-
- /*
- * We keep the seqno in retirement order, so we can break
- * inside intel_engine_breadcrumbs_irq as soon as we've passed
- * the last completed request (or seen a request that hasn't
- * event started). We could iterate the timeline->requests list,
- * but keeping a separate signalers_list has the advantage of
- * hopefully being much smaller than the full list and so
- * provides faster iteration and detection when there are no
- * more interrupts required for this context.
- *
- * We typically expect to add new signalers in order, so we
- * start looking for our insertion point from the tail of
- * the list.
- */
- list_for_each_prev(pos, &ce->signals) {
- struct i915_request *it =
- list_entry(pos, typeof(*it), signal_link);
-
- if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
- break;
- }
- list_add(&rq->signal_link, pos);
- if (pos == &ce->signals) /* catch transitions from empty list */
- list_move_tail(&ce->signal_link, &b->signalers);
-
- set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
- }
- spin_unlock(&b->irq_lock);
-
- return !__request_completed(rq);
-}
-
-void i915_request_cancel_breadcrumb(struct i915_request *rq)
-{
- struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
-
- if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
- return;
-
- spin_lock(&b->irq_lock);
- if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
- struct intel_context *ce = rq->hw_context;
-
- list_del(&rq->signal_link);
- if (list_empty(&ce->signals))
- list_del_init(&ce->signal_link);
-
- clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
- }
- spin_unlock(&b->irq_lock);
-}
-
-void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
- struct drm_printer *p)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
- struct intel_context *ce;
- struct i915_request *rq;
-
- if (list_empty(&b->signalers))
- return;
-
- drm_printf(p, "Signals:\n");
-
- spin_lock_irq(&b->irq_lock);
- list_for_each_entry(ce, &b->signalers, signal_link) {
- list_for_each_entry(rq, &ce->signals, signal_link) {
- drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
- rq->fence.context, rq->fence.seqno,
- i915_request_completed(rq) ? "!" :
- i915_request_started(rq) ? "*" :
- "",
- jiffies_to_msecs(jiffies - rq->emitted_jiffies));
- }
- }
- spin_unlock_irq(&b->irq_lock);
-}
+++ /dev/null
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2019 Intel Corporation
- */
-
-#include "i915_drv.h"
-#include "i915_gem_context.h"
-#include "i915_globals.h"
-#include "intel_context.h"
-#include "intel_ringbuffer.h"
-
-static struct i915_global_context {
- struct i915_global base;
- struct kmem_cache *slab_ce;
-} global;
-
-struct intel_context *intel_context_alloc(void)
-{
- return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL);
-}
-
-void intel_context_free(struct intel_context *ce)
-{
- kmem_cache_free(global.slab_ce, ce);
-}
-
-struct intel_context *
-intel_context_lookup(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
-{
- struct intel_context *ce = NULL;
- struct rb_node *p;
-
- spin_lock(&ctx->hw_contexts_lock);
- p = ctx->hw_contexts.rb_node;
- while (p) {
- struct intel_context *this =
- rb_entry(p, struct intel_context, node);
-
- if (this->engine == engine) {
- GEM_BUG_ON(this->gem_context != ctx);
- ce = this;
- break;
- }
-
- if (this->engine < engine)
- p = p->rb_right;
- else
- p = p->rb_left;
- }
- spin_unlock(&ctx->hw_contexts_lock);
-
- return ce;
-}
-
-struct intel_context *
-__intel_context_insert(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine,
- struct intel_context *ce)
-{
- struct rb_node **p, *parent;
- int err = 0;
-
- spin_lock(&ctx->hw_contexts_lock);
-
- parent = NULL;
- p = &ctx->hw_contexts.rb_node;
- while (*p) {
- struct intel_context *this;
-
- parent = *p;
- this = rb_entry(parent, struct intel_context, node);
-
- if (this->engine == engine) {
- err = -EEXIST;
- ce = this;
- break;
- }
-
- if (this->engine < engine)
- p = &parent->rb_right;
- else
- p = &parent->rb_left;
- }
- if (!err) {
- rb_link_node(&ce->node, parent, p);
- rb_insert_color(&ce->node, &ctx->hw_contexts);
- }
-
- spin_unlock(&ctx->hw_contexts_lock);
-
- return ce;
-}
-
-void __intel_context_remove(struct intel_context *ce)
-{
- struct i915_gem_context *ctx = ce->gem_context;
-
- spin_lock(&ctx->hw_contexts_lock);
- rb_erase(&ce->node, &ctx->hw_contexts);
- spin_unlock(&ctx->hw_contexts_lock);
-}
-
-static struct intel_context *
-intel_context_instance(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
-{
- struct intel_context *ce, *pos;
-
- ce = intel_context_lookup(ctx, engine);
- if (likely(ce))
- return ce;
-
- ce = intel_context_alloc();
- if (!ce)
- return ERR_PTR(-ENOMEM);
-
- intel_context_init(ce, ctx, engine);
-
- pos = __intel_context_insert(ctx, engine, ce);
- if (unlikely(pos != ce)) /* Beaten! Use their HW context instead */
- intel_context_free(ce);
-
- GEM_BUG_ON(intel_context_lookup(ctx, engine) != pos);
- return pos;
-}
-
-struct intel_context *
-intel_context_pin_lock(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
- __acquires(ce->pin_mutex)
-{
- struct intel_context *ce;
-
- ce = intel_context_instance(ctx, engine);
- if (IS_ERR(ce))
- return ce;
-
- if (mutex_lock_interruptible(&ce->pin_mutex))
- return ERR_PTR(-EINTR);
-
- return ce;
-}
-
-struct intel_context *
-intel_context_pin(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
-{
- struct intel_context *ce;
- int err;
-
- ce = intel_context_instance(ctx, engine);
- if (IS_ERR(ce))
- return ce;
-
- if (likely(atomic_inc_not_zero(&ce->pin_count)))
- return ce;
-
- if (mutex_lock_interruptible(&ce->pin_mutex))
- return ERR_PTR(-EINTR);
-
- if (likely(!atomic_read(&ce->pin_count))) {
- err = ce->ops->pin(ce);
- if (err)
- goto err;
-
- i915_gem_context_get(ctx);
- GEM_BUG_ON(ce->gem_context != ctx);
-
- mutex_lock(&ctx->mutex);
- list_add(&ce->active_link, &ctx->active_engines);
- mutex_unlock(&ctx->mutex);
-
- intel_context_get(ce);
- smp_mb__before_atomic(); /* flush pin before it is visible */
- }
-
- atomic_inc(&ce->pin_count);
- GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
-
- mutex_unlock(&ce->pin_mutex);
- return ce;
-
-err:
- mutex_unlock(&ce->pin_mutex);
- return ERR_PTR(err);
-}
-
-void intel_context_unpin(struct intel_context *ce)
-{
- if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
- return;
-
- /* We may be called from inside intel_context_pin() to evict another */
- intel_context_get(ce);
- mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING);
-
- if (likely(atomic_dec_and_test(&ce->pin_count))) {
- ce->ops->unpin(ce);
-
- mutex_lock(&ce->gem_context->mutex);
- list_del(&ce->active_link);
- mutex_unlock(&ce->gem_context->mutex);
-
- i915_gem_context_put(ce->gem_context);
- intel_context_put(ce);
- }
-
- mutex_unlock(&ce->pin_mutex);
- intel_context_put(ce);
-}
-
-static void intel_context_retire(struct i915_active_request *active,
- struct i915_request *rq)
-{
- struct intel_context *ce =
- container_of(active, typeof(*ce), active_tracker);
-
- intel_context_unpin(ce);
-}
-
-void
-intel_context_init(struct intel_context *ce,
- struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
-{
- kref_init(&ce->ref);
-
- ce->gem_context = ctx;
- ce->engine = engine;
- ce->ops = engine->cops;
- ce->sseu = engine->sseu;
-
- INIT_LIST_HEAD(&ce->signal_link);
- INIT_LIST_HEAD(&ce->signals);
-
- mutex_init(&ce->pin_mutex);
-
- i915_active_request_init(&ce->active_tracker,
- NULL, intel_context_retire);
-}
-
-static void i915_global_context_shrink(void)
-{
- kmem_cache_shrink(global.slab_ce);
-}
-
-static void i915_global_context_exit(void)
-{
- kmem_cache_destroy(global.slab_ce);
-}
-
-static struct i915_global_context global = { {
- .shrink = i915_global_context_shrink,
- .exit = i915_global_context_exit,
-} };
-
-int __init i915_global_context_init(void)
-{
- global.slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN);
- if (!global.slab_ce)
- return -ENOMEM;
-
- i915_global_register(&global.base);
- return 0;
-}
+++ /dev/null
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2019 Intel Corporation
- */
-
-#ifndef __INTEL_CONTEXT_H__
-#define __INTEL_CONTEXT_H__
-
-#include <linux/lockdep.h>
-
-#include "intel_context_types.h"
-#include "intel_engine_types.h"
-
-struct intel_context *intel_context_alloc(void);
-void intel_context_free(struct intel_context *ce);
-
-void intel_context_init(struct intel_context *ce,
- struct i915_gem_context *ctx,
- struct intel_engine_cs *engine);
-
-/**
- * intel_context_lookup - Find the matching HW context for this (ctx, engine)
- * @ctx - the parent GEM context
- * @engine - the target HW engine
- *
- * May return NULL if the HW context hasn't been instantiated (i.e. unused).
- */
-struct intel_context *
-intel_context_lookup(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine);
-
-/**
- * intel_context_pin_lock - Stablises the 'pinned' status of the HW context
- * @ctx - the parent GEM context
- * @engine - the target HW engine
- *
- * Acquire a lock on the pinned status of the HW context, such that the context
- * can neither be bound to the GPU or unbound whilst the lock is held, i.e.
- * intel_context_is_pinned() remains stable.
- */
-struct intel_context *
-intel_context_pin_lock(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine);
-
-static inline bool
-intel_context_is_pinned(struct intel_context *ce)
-{
- return atomic_read(&ce->pin_count);
-}
-
-static inline void intel_context_pin_unlock(struct intel_context *ce)
-__releases(ce->pin_mutex)
-{
- mutex_unlock(&ce->pin_mutex);
-}
-
-struct intel_context *
-__intel_context_insert(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine,
- struct intel_context *ce);
-void
-__intel_context_remove(struct intel_context *ce);
-
-struct intel_context *
-intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine);
-
-static inline void __intel_context_pin(struct intel_context *ce)
-{
- GEM_BUG_ON(!intel_context_is_pinned(ce));
- atomic_inc(&ce->pin_count);
-}
-
-void intel_context_unpin(struct intel_context *ce);
-
-static inline struct intel_context *intel_context_get(struct intel_context *ce)
-{
- kref_get(&ce->ref);
- return ce;
-}
-
-static inline void intel_context_put(struct intel_context *ce)
-{
- kref_put(&ce->ref, ce->ops->destroy);
-}
-
-#endif /* __INTEL_CONTEXT_H__ */
+++ /dev/null
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2019 Intel Corporation
- */
-
-#ifndef __INTEL_CONTEXT_TYPES__
-#define __INTEL_CONTEXT_TYPES__
-
-#include <linux/kref.h>
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/rbtree.h>
-#include <linux/types.h>
-
-#include "i915_active_types.h"
-#include "intel_sseu.h"
-
-struct i915_gem_context;
-struct i915_vma;
-struct intel_context;
-struct intel_ring;
-
-struct intel_context_ops {
- int (*pin)(struct intel_context *ce);
- void (*unpin)(struct intel_context *ce);
-
- void (*reset)(struct intel_context *ce);
- void (*destroy)(struct kref *kref);
-};
-
-struct intel_context {
- struct kref ref;
-
- struct i915_gem_context *gem_context;
- struct intel_engine_cs *engine;
- struct intel_engine_cs *active;
-
- struct list_head active_link;
- struct list_head signal_link;
- struct list_head signals;
-
- struct i915_vma *state;
- struct intel_ring *ring;
-
- u32 *lrc_reg_state;
- u64 lrc_desc;
-
- atomic_t pin_count;
- struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
-
- /**
- * active_tracker: Active tracker for the external rq activity
- * on this intel_context object.
- */
- struct i915_active_request active_tracker;
-
- const struct intel_context_ops *ops;
- struct rb_node node;
-
- /** sseu: Control eu/slice partitioning */
- struct intel_sseu sseu;
-};
-
-#endif /* __INTEL_CONTEXT_TYPES__ */
#include <uapi/drm/i915_drm.h>
-#include "intel_engine_types.h"
+#include "gt/intel_engine_types.h"
+#include "gt/intel_context_types.h"
+#include "gt/intel_sseu.h"
+
#include "intel_display.h"
-#include "intel_sseu.h"
struct drm_printer;
struct drm_i915_private;
#include "i915_drv.h"
#include "i915_gem_clflush.h"
-#include "i915_reset.h"
#include "i915_trace.h"
#include "intel_atomic_plane.h"
#include "intel_color.h"
+++ /dev/null
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#include <drm/drm_print.h>
-
-#include "i915_drv.h"
-#include "i915_reset.h"
-#include "intel_ringbuffer.h"
-#include "intel_lrc.h"
-
-/* Haswell does have the CXT_SIZE register however it does not appear to be
- * valid. Now, docs explain in dwords what is in the context object. The full
- * size is 70720 bytes, however, the power context and execlist context will
- * never be saved (power context is stored elsewhere, and execlists don't work
- * on HSW) - so the final size, including the extra state required for the
- * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
- */
-#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE)
-
-#define DEFAULT_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
-#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
-#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
-#define GEN10_LR_CONTEXT_RENDER_SIZE (18 * PAGE_SIZE)
-#define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE)
-
-#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE)
-
-struct engine_class_info {
- const char *name;
- int (*init_legacy)(struct intel_engine_cs *engine);
- int (*init_execlists)(struct intel_engine_cs *engine);
-
- u8 uabi_class;
-};
-
-static const struct engine_class_info intel_engine_classes[] = {
- [RENDER_CLASS] = {
- .name = "rcs",
- .init_execlists = logical_render_ring_init,
- .init_legacy = intel_init_render_ring_buffer,
- .uabi_class = I915_ENGINE_CLASS_RENDER,
- },
- [COPY_ENGINE_CLASS] = {
- .name = "bcs",
- .init_execlists = logical_xcs_ring_init,
- .init_legacy = intel_init_blt_ring_buffer,
- .uabi_class = I915_ENGINE_CLASS_COPY,
- },
- [VIDEO_DECODE_CLASS] = {
- .name = "vcs",
- .init_execlists = logical_xcs_ring_init,
- .init_legacy = intel_init_bsd_ring_buffer,
- .uabi_class = I915_ENGINE_CLASS_VIDEO,
- },
- [VIDEO_ENHANCEMENT_CLASS] = {
- .name = "vecs",
- .init_execlists = logical_xcs_ring_init,
- .init_legacy = intel_init_vebox_ring_buffer,
- .uabi_class = I915_ENGINE_CLASS_VIDEO_ENHANCE,
- },
-};
-
-#define MAX_MMIO_BASES 3
-struct engine_info {
- unsigned int hw_id;
- u8 class;
- u8 instance;
- /* mmio bases table *must* be sorted in reverse gen order */
- struct engine_mmio_base {
- u32 gen : 8;
- u32 base : 24;
- } mmio_bases[MAX_MMIO_BASES];
-};
-
-static const struct engine_info intel_engines[] = {
- [RCS0] = {
- .hw_id = RCS0_HW,
- .class = RENDER_CLASS,
- .instance = 0,
- .mmio_bases = {
- { .gen = 1, .base = RENDER_RING_BASE }
- },
- },
- [BCS0] = {
- .hw_id = BCS0_HW,
- .class = COPY_ENGINE_CLASS,
- .instance = 0,
- .mmio_bases = {
- { .gen = 6, .base = BLT_RING_BASE }
- },
- },
- [VCS0] = {
- .hw_id = VCS0_HW,
- .class = VIDEO_DECODE_CLASS,
- .instance = 0,
- .mmio_bases = {
- { .gen = 11, .base = GEN11_BSD_RING_BASE },
- { .gen = 6, .base = GEN6_BSD_RING_BASE },
- { .gen = 4, .base = BSD_RING_BASE }
- },
- },
- [VCS1] = {
- .hw_id = VCS1_HW,
- .class = VIDEO_DECODE_CLASS,
- .instance = 1,
- .mmio_bases = {
- { .gen = 11, .base = GEN11_BSD2_RING_BASE },
- { .gen = 8, .base = GEN8_BSD2_RING_BASE }
- },
- },
- [VCS2] = {
- .hw_id = VCS2_HW,
- .class = VIDEO_DECODE_CLASS,
- .instance = 2,
- .mmio_bases = {
- { .gen = 11, .base = GEN11_BSD3_RING_BASE }
- },
- },
- [VCS3] = {
- .hw_id = VCS3_HW,
- .class = VIDEO_DECODE_CLASS,
- .instance = 3,
- .mmio_bases = {
- { .gen = 11, .base = GEN11_BSD4_RING_BASE }
- },
- },
- [VECS0] = {
- .hw_id = VECS0_HW,
- .class = VIDEO_ENHANCEMENT_CLASS,
- .instance = 0,
- .mmio_bases = {
- { .gen = 11, .base = GEN11_VEBOX_RING_BASE },
- { .gen = 7, .base = VEBOX_RING_BASE }
- },
- },
- [VECS1] = {
- .hw_id = VECS1_HW,
- .class = VIDEO_ENHANCEMENT_CLASS,
- .instance = 1,
- .mmio_bases = {
- { .gen = 11, .base = GEN11_VEBOX2_RING_BASE }
- },
- },
-};
-
-/**
- * ___intel_engine_context_size() - return the size of the context for an engine
- * @dev_priv: i915 device private
- * @class: engine class
- *
- * Each engine class may require a different amount of space for a context
- * image.
- *
- * Return: size (in bytes) of an engine class specific context image
- *
- * Note: this size includes the HWSP, which is part of the context image
- * in LRC mode, but does not include the "shared data page" used with
- * GuC submission. The caller should account for this if using the GuC.
- */
-static u32
-__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
-{
- u32 cxt_size;
-
- BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
-
- switch (class) {
- case RENDER_CLASS:
- switch (INTEL_GEN(dev_priv)) {
- default:
- MISSING_CASE(INTEL_GEN(dev_priv));
- return DEFAULT_LR_CONTEXT_RENDER_SIZE;
- case 11:
- return GEN11_LR_CONTEXT_RENDER_SIZE;
- case 10:
- return GEN10_LR_CONTEXT_RENDER_SIZE;
- case 9:
- return GEN9_LR_CONTEXT_RENDER_SIZE;
- case 8:
- return GEN8_LR_CONTEXT_RENDER_SIZE;
- case 7:
- if (IS_HASWELL(dev_priv))
- return HSW_CXT_TOTAL_SIZE;
-
- cxt_size = I915_READ(GEN7_CXT_SIZE);
- return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
- PAGE_SIZE);
- case 6:
- cxt_size = I915_READ(CXT_SIZE);
- return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
- PAGE_SIZE);
- case 5:
- case 4:
- case 3:
- case 2:
- /* For the special day when i810 gets merged. */
- case 1:
- return 0;
- }
- break;
- default:
- MISSING_CASE(class);
- /* fall through */
- case VIDEO_DECODE_CLASS:
- case VIDEO_ENHANCEMENT_CLASS:
- case COPY_ENGINE_CLASS:
- if (INTEL_GEN(dev_priv) < 8)
- return 0;
- return GEN8_LR_CONTEXT_OTHER_SIZE;
- }
-}
-
-static u32 __engine_mmio_base(struct drm_i915_private *i915,
- const struct engine_mmio_base *bases)
-{
- int i;
-
- for (i = 0; i < MAX_MMIO_BASES; i++)
- if (INTEL_GEN(i915) >= bases[i].gen)
- break;
-
- GEM_BUG_ON(i == MAX_MMIO_BASES);
- GEM_BUG_ON(!bases[i].base);
-
- return bases[i].base;
-}
-
-static void __sprint_engine_name(char *name, const struct engine_info *info)
-{
- WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
- intel_engine_classes[info->class].name,
- info->instance) >= INTEL_ENGINE_CS_MAX_NAME);
-}
-
-void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask)
-{
- /*
- * Though they added more rings on g4x/ilk, they did not add
- * per-engine HWSTAM until gen6.
- */
- if (INTEL_GEN(engine->i915) < 6 && engine->class != RENDER_CLASS)
- return;
-
- if (INTEL_GEN(engine->i915) >= 3)
- ENGINE_WRITE(engine, RING_HWSTAM, mask);
- else
- ENGINE_WRITE16(engine, RING_HWSTAM, mask);
-}
-
-static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
-{
- /* Mask off all writes into the unknown HWSP */
- intel_engine_set_hwsp_writemask(engine, ~0u);
-}
-
-static int
-intel_engine_setup(struct drm_i915_private *dev_priv,
- enum intel_engine_id id)
-{
- const struct engine_info *info = &intel_engines[id];
- struct intel_engine_cs *engine;
-
- GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes));
-
- BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
- BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));
-
- if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS))
- return -EINVAL;
-
- if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE))
- return -EINVAL;
-
- if (GEM_DEBUG_WARN_ON(dev_priv->engine_class[info->class][info->instance]))
- return -EINVAL;
-
- GEM_BUG_ON(dev_priv->engine[id]);
- engine = kzalloc(sizeof(*engine), GFP_KERNEL);
- if (!engine)
- return -ENOMEM;
-
- BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);
-
- engine->id = id;
- engine->mask = BIT(id);
- engine->i915 = dev_priv;
- engine->uncore = &dev_priv->uncore;
- __sprint_engine_name(engine->name, info);
- engine->hw_id = engine->guc_id = info->hw_id;
- engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases);
- engine->class = info->class;
- engine->instance = info->instance;
-
- engine->uabi_class = intel_engine_classes[info->class].uabi_class;
-
- engine->context_size = __intel_engine_context_size(dev_priv,
- engine->class);
- if (WARN_ON(engine->context_size > BIT(20)))
- engine->context_size = 0;
- if (engine->context_size)
- DRIVER_CAPS(dev_priv)->has_logical_contexts = true;
-
- /* Nothing to do here, execute in order of dependencies */
- engine->schedule = NULL;
-
- seqlock_init(&engine->stats.lock);
-
- ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
-
- /* Scrub mmio state on takeover */
- intel_engine_sanitize_mmio(engine);
-
- dev_priv->engine_class[info->class][info->instance] = engine;
- dev_priv->engine[id] = engine;
- return 0;
-}
-
-/**
- * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
- * @dev_priv: i915 device private
- *
- * Return: non-zero if the initialization failed.
- */
-int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
-{
- struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
- const unsigned int engine_mask = INTEL_INFO(dev_priv)->engine_mask;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- unsigned int mask = 0;
- unsigned int i;
- int err;
-
- WARN_ON(engine_mask == 0);
- WARN_ON(engine_mask &
- GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES));
-
- if (i915_inject_load_failure())
- return -ENODEV;
-
- for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
- if (!HAS_ENGINE(dev_priv, i))
- continue;
-
- err = intel_engine_setup(dev_priv, i);
- if (err)
- goto cleanup;
-
- mask |= BIT(i);
- }
-
- /*
- * Catch failures to update intel_engines table when the new engines
- * are added to the driver by a warning and disabling the forgotten
- * engines.
- */
- if (WARN_ON(mask != engine_mask))
- device_info->engine_mask = mask;
-
- /* We always presume we have at least RCS available for later probing */
- if (WARN_ON(!HAS_ENGINE(dev_priv, RCS0))) {
- err = -ENODEV;
- goto cleanup;
- }
-
- RUNTIME_INFO(dev_priv)->num_engines = hweight32(mask);
-
- i915_check_and_clear_faults(dev_priv);
-
- return 0;
-
-cleanup:
- for_each_engine(engine, dev_priv, id)
- kfree(engine);
- return err;
-}
-
-/**
- * intel_engines_init() - init the Engine Command Streamers
- * @dev_priv: i915 device private
- *
- * Return: non-zero if the initialization failed.
- */
-int intel_engines_init(struct drm_i915_private *dev_priv)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id, err_id;
- int err;
-
- for_each_engine(engine, dev_priv, id) {
- const struct engine_class_info *class_info =
- &intel_engine_classes[engine->class];
- int (*init)(struct intel_engine_cs *engine);
-
- if (HAS_EXECLISTS(dev_priv))
- init = class_info->init_execlists;
- else
- init = class_info->init_legacy;
-
- err = -EINVAL;
- err_id = id;
-
- if (GEM_DEBUG_WARN_ON(!init))
- goto cleanup;
-
- err = init(engine);
- if (err)
- goto cleanup;
-
- GEM_BUG_ON(!engine->submit_request);
- }
-
- return 0;
-
-cleanup:
- for_each_engine(engine, dev_priv, id) {
- if (id >= err_id) {
- kfree(engine);
- dev_priv->engine[id] = NULL;
- } else {
- dev_priv->gt.cleanup_engine(engine);
- }
- }
- return err;
-}
-
-static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
-{
- i915_gem_batch_pool_init(&engine->batch_pool, engine);
-}
-
-static void intel_engine_init_execlist(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists * const execlists = &engine->execlists;
-
- execlists->port_mask = 1;
- GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
- GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
-
- execlists->queue_priority_hint = INT_MIN;
- execlists->queue = RB_ROOT_CACHED;
-}
-
-static void cleanup_status_page(struct intel_engine_cs *engine)
-{
- struct i915_vma *vma;
-
- /* Prevent writes into HWSP after returning the page to the system */
- intel_engine_set_hwsp_writemask(engine, ~0u);
-
- vma = fetch_and_zero(&engine->status_page.vma);
- if (!vma)
- return;
-
- if (!HWS_NEEDS_PHYSICAL(engine->i915))
- i915_vma_unpin(vma);
-
- i915_gem_object_unpin_map(vma->obj);
- __i915_gem_object_release_unless_active(vma->obj);
-}
-
-static int pin_ggtt_status_page(struct intel_engine_cs *engine,
- struct i915_vma *vma)
-{
- unsigned int flags;
-
- flags = PIN_GLOBAL;
- if (!HAS_LLC(engine->i915))
- /*
- * On g33, we cannot place HWS above 256MiB, so
- * restrict its pinning to the low mappable arena.
- * Though this restriction is not documented for
- * gen4, gen5, or byt, they also behave similarly
- * and hang if the HWS is placed at the top of the
- * GTT. To generalise, it appears that all !llc
- * platforms have issues with us placing the HWS
- * above the mappable region (even though we never
- * actually map it).
- */
- flags |= PIN_MAPPABLE;
- else
- flags |= PIN_HIGH;
-
- return i915_vma_pin(vma, 0, 0, flags);
-}
-
-static int init_status_page(struct intel_engine_cs *engine)
-{
- struct drm_i915_gem_object *obj;
- struct i915_vma *vma;
- void *vaddr;
- int ret;
-
- /*
- * Though the HWS register does support 36bit addresses, historically
- * we have had hangs and corruption reported due to wild writes if
- * the HWS is placed above 4G. We only allow objects to be allocated
- * in GFP_DMA32 for i965, and no earlier physical address users had
- * access to more than 4G.
- */
- obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
- if (IS_ERR(obj)) {
- DRM_ERROR("Failed to allocate status page\n");
- return PTR_ERR(obj);
- }
-
- i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
-
- vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
- if (IS_ERR(vma)) {
- ret = PTR_ERR(vma);
- goto err;
- }
-
- vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
- if (IS_ERR(vaddr)) {
- ret = PTR_ERR(vaddr);
- goto err;
- }
-
- engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
- engine->status_page.vma = vma;
-
- if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
- ret = pin_ggtt_status_page(engine, vma);
- if (ret)
- goto err_unpin;
- }
-
- return 0;
-
-err_unpin:
- i915_gem_object_unpin_map(obj);
-err:
- i915_gem_object_put(obj);
- return ret;
-}
-
-/**
- * intel_engines_setup_common - setup engine state not requiring hw access
- * @engine: Engine to setup.
- *
- * Initializes @engine@ structure members shared between legacy and execlists
- * submission modes which do not require hardware access.
- *
- * Typically done early in the submission mode specific engine setup stage.
- */
-int intel_engine_setup_common(struct intel_engine_cs *engine)
-{
- int err;
-
- err = init_status_page(engine);
- if (err)
- return err;
-
- err = i915_timeline_init(engine->i915,
- &engine->timeline,
- engine->status_page.vma);
- if (err)
- goto err_hwsp;
-
- i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
-
- intel_engine_init_breadcrumbs(engine);
- intel_engine_init_execlist(engine);
- intel_engine_init_hangcheck(engine);
- intel_engine_init_batch_pool(engine);
- intel_engine_init_cmd_parser(engine);
-
- /* Use the whole device by default */
- engine->sseu =
- intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);
-
- return 0;
-
-err_hwsp:
- cleanup_status_page(engine);
- return err;
-}
-
-void intel_engines_set_scheduler_caps(struct drm_i915_private *i915)
-{
- static const struct {
- u8 engine;
- u8 sched;
- } map[] = {
-#define MAP(x, y) { ilog2(I915_ENGINE_HAS_##x), ilog2(I915_SCHEDULER_CAP_##y) }
- MAP(PREEMPTION, PREEMPTION),
- MAP(SEMAPHORES, SEMAPHORES),
-#undef MAP
- };
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- u32 enabled, disabled;
-
- enabled = 0;
- disabled = 0;
- for_each_engine(engine, i915, id) { /* all engines must agree! */
- int i;
-
- if (engine->schedule)
- enabled |= (I915_SCHEDULER_CAP_ENABLED |
- I915_SCHEDULER_CAP_PRIORITY);
- else
- disabled |= (I915_SCHEDULER_CAP_ENABLED |
- I915_SCHEDULER_CAP_PRIORITY);
-
- for (i = 0; i < ARRAY_SIZE(map); i++) {
- if (engine->flags & BIT(map[i].engine))
- enabled |= BIT(map[i].sched);
- else
- disabled |= BIT(map[i].sched);
- }
- }
-
- i915->caps.scheduler = enabled & ~disabled;
- if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_ENABLED))
- i915->caps.scheduler = 0;
-}
-
-struct measure_breadcrumb {
- struct i915_request rq;
- struct i915_timeline timeline;
- struct intel_ring ring;
- u32 cs[1024];
-};
-
-static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
-{
- struct measure_breadcrumb *frame;
- int dw = -ENOMEM;
-
- GEM_BUG_ON(!engine->i915->gt.scratch);
-
- frame = kzalloc(sizeof(*frame), GFP_KERNEL);
- if (!frame)
- return -ENOMEM;
-
- if (i915_timeline_init(engine->i915,
- &frame->timeline,
- engine->status_page.vma))
- goto out_frame;
-
- INIT_LIST_HEAD(&frame->ring.request_list);
- frame->ring.timeline = &frame->timeline;
- frame->ring.vaddr = frame->cs;
- frame->ring.size = sizeof(frame->cs);
- frame->ring.effective_size = frame->ring.size;
- intel_ring_update_space(&frame->ring);
-
- frame->rq.i915 = engine->i915;
- frame->rq.engine = engine;
- frame->rq.ring = &frame->ring;
- frame->rq.timeline = &frame->timeline;
-
- dw = i915_timeline_pin(&frame->timeline);
- if (dw < 0)
- goto out_timeline;
-
- dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
-
- i915_timeline_unpin(&frame->timeline);
-
-out_timeline:
- i915_timeline_fini(&frame->timeline);
-out_frame:
- kfree(frame);
- return dw;
-}
-
-static int pin_context(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine,
- struct intel_context **out)
-{
- struct intel_context *ce;
-
- ce = intel_context_pin(ctx, engine);
- if (IS_ERR(ce))
- return PTR_ERR(ce);
-
- *out = ce;
- return 0;
-}
-
-/**
- * intel_engines_init_common - initialize cengine state which might require hw access
- * @engine: Engine to initialize.
- *
- * Initializes @engine@ structure members shared between legacy and execlists
- * submission modes which do require hardware access.
- *
- * Typcally done at later stages of submission mode specific engine setup.
- *
- * Returns zero on success or an error code on failure.
- */
-int intel_engine_init_common(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
- int ret;
-
- /* We may need to do things with the shrinker which
- * require us to immediately switch back to the default
- * context. This can cause a problem as pinning the
- * default context also requires GTT space which may not
- * be available. To avoid this we always pin the default
- * context.
- */
- ret = pin_context(i915->kernel_context, engine,
- &engine->kernel_context);
- if (ret)
- return ret;
-
- /*
- * Similarly the preempt context must always be available so that
- * we can interrupt the engine at any time. However, as preemption
- * is optional, we allow it to fail.
- */
- if (i915->preempt_context)
- pin_context(i915->preempt_context, engine,
- &engine->preempt_context);
-
- ret = measure_breadcrumb_dw(engine);
- if (ret < 0)
- goto err_unpin;
-
- engine->emit_fini_breadcrumb_dw = ret;
-
- engine->set_default_submission(engine);
-
- return 0;
-
-err_unpin:
- if (engine->preempt_context)
- intel_context_unpin(engine->preempt_context);
- intel_context_unpin(engine->kernel_context);
- return ret;
-}
-
-void intel_gt_resume(struct drm_i915_private *i915)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- /*
- * After resume, we may need to poke into the pinned kernel
- * contexts to paper over any damage caused by the sudden suspend.
- * Only the kernel contexts should remain pinned over suspend,
- * allowing us to fixup the user contexts on their first pin.
- */
- for_each_engine(engine, i915, id) {
- struct intel_context *ce;
-
- ce = engine->kernel_context;
- if (ce)
- ce->ops->reset(ce);
-
- ce = engine->preempt_context;
- if (ce)
- ce->ops->reset(ce);
- }
-}
-
-/**
- * intel_engines_cleanup_common - cleans up the engine state created by
- * the common initiailizers.
- * @engine: Engine to cleanup.
- *
- * This cleans up everything created by the common helpers.
- */
-void intel_engine_cleanup_common(struct intel_engine_cs *engine)
-{
- cleanup_status_page(engine);
-
- intel_engine_fini_breadcrumbs(engine);
- intel_engine_cleanup_cmd_parser(engine);
- i915_gem_batch_pool_fini(&engine->batch_pool);
-
- if (engine->default_state)
- i915_gem_object_put(engine->default_state);
-
- if (engine->preempt_context)
- intel_context_unpin(engine->preempt_context);
- intel_context_unpin(engine->kernel_context);
-
- i915_timeline_fini(&engine->timeline);
-
- intel_wa_list_free(&engine->ctx_wa_list);
- intel_wa_list_free(&engine->wa_list);
- intel_wa_list_free(&engine->whitelist);
-}
-
-u64 intel_engine_get_active_head(const struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
-
- u64 acthd;
-
- if (INTEL_GEN(i915) >= 8)
- acthd = ENGINE_READ64(engine, RING_ACTHD, RING_ACTHD_UDW);
- else if (INTEL_GEN(i915) >= 4)
- acthd = ENGINE_READ(engine, RING_ACTHD);
- else
- acthd = ENGINE_READ(engine, ACTHD);
-
- return acthd;
-}
-
-u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)
-{
- u64 bbaddr;
-
- if (INTEL_GEN(engine->i915) >= 8)
- bbaddr = ENGINE_READ64(engine, RING_BBADDR, RING_BBADDR_UDW);
- else
- bbaddr = ENGINE_READ(engine, RING_BBADDR);
-
- return bbaddr;
-}
-
-int intel_engine_stop_cs(struct intel_engine_cs *engine)
-{
- struct intel_uncore *uncore = engine->uncore;
- const u32 base = engine->mmio_base;
- const i915_reg_t mode = RING_MI_MODE(base);
- int err;
-
- if (INTEL_GEN(engine->i915) < 3)
- return -ENODEV;
-
- GEM_TRACE("%s\n", engine->name);
-
- intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
-
- err = 0;
- if (__intel_wait_for_register_fw(uncore,
- mode, MODE_IDLE, MODE_IDLE,
- 1000, 0,
- NULL)) {
- GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name);
- err = -ETIMEDOUT;
- }
-
- /* A final mmio read to let GPU writes be hopefully flushed to memory */
- intel_uncore_posting_read_fw(uncore, mode);
-
- return err;
-}
-
-void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
-{
- GEM_TRACE("%s\n", engine->name);
-
- ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
-}
-
-const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
-{
- switch (type) {
- case I915_CACHE_NONE: return " uncached";
- case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped";
- case I915_CACHE_L3_LLC: return " L3+LLC";
- case I915_CACHE_WT: return " WT";
- default: return "";
- }
-}
-
-u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv)
-{
- const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
- u32 mcr_s_ss_select;
- u32 slice = fls(sseu->slice_mask);
- u32 subslice = fls(sseu->subslice_mask[slice]);
-
- if (IS_GEN(dev_priv, 10))
- mcr_s_ss_select = GEN8_MCR_SLICE(slice) |
- GEN8_MCR_SUBSLICE(subslice);
- else if (INTEL_GEN(dev_priv) >= 11)
- mcr_s_ss_select = GEN11_MCR_SLICE(slice) |
- GEN11_MCR_SUBSLICE(subslice);
- else
- mcr_s_ss_select = 0;
-
- return mcr_s_ss_select;
-}
-
-static inline u32
-read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
- int subslice, i915_reg_t reg)
-{
- struct intel_uncore *uncore = &dev_priv->uncore;
- u32 mcr_slice_subslice_mask;
- u32 mcr_slice_subslice_select;
- u32 default_mcr_s_ss_select;
- u32 mcr;
- u32 ret;
- enum forcewake_domains fw_domains;
-
- if (INTEL_GEN(dev_priv) >= 11) {
- mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
- GEN11_MCR_SUBSLICE_MASK;
- mcr_slice_subslice_select = GEN11_MCR_SLICE(slice) |
- GEN11_MCR_SUBSLICE(subslice);
- } else {
- mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
- GEN8_MCR_SUBSLICE_MASK;
- mcr_slice_subslice_select = GEN8_MCR_SLICE(slice) |
- GEN8_MCR_SUBSLICE(subslice);
- }
-
- default_mcr_s_ss_select = intel_calculate_mcr_s_ss_select(dev_priv);
-
- fw_domains = intel_uncore_forcewake_for_reg(uncore, reg,
- FW_REG_READ);
- fw_domains |= intel_uncore_forcewake_for_reg(uncore,
- GEN8_MCR_SELECTOR,
- FW_REG_READ | FW_REG_WRITE);
-
- spin_lock_irq(&uncore->lock);
- intel_uncore_forcewake_get__locked(uncore, fw_domains);
-
- mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
-
- WARN_ON_ONCE((mcr & mcr_slice_subslice_mask) !=
- default_mcr_s_ss_select);
-
- mcr &= ~mcr_slice_subslice_mask;
- mcr |= mcr_slice_subslice_select;
- intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
-
- ret = intel_uncore_read_fw(uncore, reg);
-
- mcr &= ~mcr_slice_subslice_mask;
- mcr |= default_mcr_s_ss_select;
-
- intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
-
- intel_uncore_forcewake_put__locked(uncore, fw_domains);
- spin_unlock_irq(&uncore->lock);
-
- return ret;
-}
-
-/* NB: please notice the memset */
-void intel_engine_get_instdone(struct intel_engine_cs *engine,
- struct intel_instdone *instdone)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- struct intel_uncore *uncore = engine->uncore;
- u32 mmio_base = engine->mmio_base;
- int slice;
- int subslice;
-
- memset(instdone, 0, sizeof(*instdone));
-
- switch (INTEL_GEN(dev_priv)) {
- default:
- instdone->instdone =
- intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
-
- if (engine->id != RCS0)
- break;
-
- instdone->slice_common =
- intel_uncore_read(uncore, GEN7_SC_INSTDONE);
- for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
- instdone->sampler[slice][subslice] =
- read_subslice_reg(dev_priv, slice, subslice,
- GEN7_SAMPLER_INSTDONE);
- instdone->row[slice][subslice] =
- read_subslice_reg(dev_priv, slice, subslice,
- GEN7_ROW_INSTDONE);
- }
- break;
- case 7:
- instdone->instdone =
- intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
-
- if (engine->id != RCS0)
- break;
-
- instdone->slice_common =
- intel_uncore_read(uncore, GEN7_SC_INSTDONE);
- instdone->sampler[0][0] =
- intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE);
- instdone->row[0][0] =
- intel_uncore_read(uncore, GEN7_ROW_INSTDONE);
-
- break;
- case 6:
- case 5:
- case 4:
- instdone->instdone =
- intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
- if (engine->id == RCS0)
- /* HACK: Using the wrong struct member */
- instdone->slice_common =
- intel_uncore_read(uncore, GEN4_INSTDONE1);
- break;
- case 3:
- case 2:
- instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE);
- break;
- }
-}
-
-static bool ring_is_idle(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- intel_wakeref_t wakeref;
- bool idle = true;
-
- if (I915_SELFTEST_ONLY(!engine->mmio_base))
- return true;
-
- /* If the whole device is asleep, the engine must be idle */
- wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
- if (!wakeref)
- return true;
-
- /* First check that no commands are left in the ring */
- if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) !=
- (ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR))
- idle = false;
-
- /* No bit for gen2, so assume the CS parser is idle */
- if (INTEL_GEN(dev_priv) > 2 &&
- !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE))
- idle = false;
-
- intel_runtime_pm_put(dev_priv, wakeref);
-
- return idle;
-}
-
-/**
- * intel_engine_is_idle() - Report if the engine has finished process all work
- * @engine: the intel_engine_cs
- *
- * Return true if there are no requests pending, nothing left to be submitted
- * to hardware, and that the engine is idle.
- */
-bool intel_engine_is_idle(struct intel_engine_cs *engine)
-{
- /* More white lies, if wedged, hw state is inconsistent */
- if (i915_reset_failed(engine->i915))
- return true;
-
- /* Waiting to drain ELSP? */
- if (READ_ONCE(engine->execlists.active)) {
- struct tasklet_struct *t = &engine->execlists.tasklet;
-
- local_bh_disable();
- if (tasklet_trylock(t)) {
- /* Must wait for any GPU reset in progress. */
- if (__tasklet_is_enabled(t))
- t->func(t->data);
- tasklet_unlock(t);
- }
- local_bh_enable();
-
- /* Otherwise flush the tasklet if it was on another cpu */
- tasklet_unlock_wait(t);
-
- if (READ_ONCE(engine->execlists.active))
- return false;
- }
-
- /* ELSP is empty, but there are ready requests? E.g. after reset */
- if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root))
- return false;
-
- /* Ring stopped? */
- return ring_is_idle(engine);
-}
-
-bool intel_engines_are_idle(struct drm_i915_private *i915)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- /*
- * If the driver is wedged, HW state may be very inconsistent and
- * report that it is still busy, even though we have stopped using it.
- */
- if (i915_reset_failed(i915))
- return true;
-
- /* Already parked (and passed an idleness test); must still be idle */
- if (!READ_ONCE(i915->gt.awake))
- return true;
-
- for_each_engine(engine, i915, id) {
- if (!intel_engine_is_idle(engine))
- return false;
- }
-
- return true;
-}
-
-void intel_engines_reset_default_submission(struct drm_i915_private *i915)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- for_each_engine(engine, i915, id)
- engine->set_default_submission(engine);
-}
-
-static bool reset_engines(struct drm_i915_private *i915)
-{
- if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
- return false;
-
- return intel_gpu_reset(i915, ALL_ENGINES) == 0;
-}
-
-/**
- * intel_engines_sanitize: called after the GPU has lost power
- * @i915: the i915 device
- * @force: ignore a failed reset and sanitize engine state anyway
- *
- * Anytime we reset the GPU, either with an explicit GPU reset or through a
- * PCI power cycle, the GPU loses state and we must reset our state tracking
- * to match. Note that calling intel_engines_sanitize() if the GPU has not
- * been reset results in much confusion!
- */
-void intel_engines_sanitize(struct drm_i915_private *i915, bool force)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- GEM_TRACE("\n");
-
- if (!reset_engines(i915) && !force)
- return;
-
- for_each_engine(engine, i915, id)
- intel_engine_reset(engine, false);
-}
-
-/**
- * intel_engines_park: called when the GT is transitioning from busy->idle
- * @i915: the i915 device
- *
- * The GT is now idle and about to go to sleep (maybe never to wake again?).
- * Time for us to tidy and put away our toys (release resources back to the
- * system).
- */
-void intel_engines_park(struct drm_i915_private *i915)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- for_each_engine(engine, i915, id) {
- /* Flush the residual irq tasklets first. */
- intel_engine_disarm_breadcrumbs(engine);
- tasklet_kill(&engine->execlists.tasklet);
-
- /*
- * We are committed now to parking the engines, make sure there
- * will be no more interrupts arriving later and the engines
- * are truly idle.
- */
- if (wait_for(intel_engine_is_idle(engine), 10)) {
- struct drm_printer p = drm_debug_printer(__func__);
-
- dev_err(i915->drm.dev,
- "%s is not idle before parking\n",
- engine->name);
- intel_engine_dump(engine, &p, NULL);
- }
-
- /* Must be reset upon idling, or we may miss the busy wakeup. */
- GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
-
- if (engine->park)
- engine->park(engine);
-
- if (engine->pinned_default_state) {
- i915_gem_object_unpin_map(engine->default_state);
- engine->pinned_default_state = NULL;
- }
-
- i915_gem_batch_pool_fini(&engine->batch_pool);
- engine->execlists.no_priolist = false;
- }
-
- i915->gt.active_engines = 0;
-}
-
-/**
- * intel_engines_unpark: called when the GT is transitioning from idle->busy
- * @i915: the i915 device
- *
- * The GT was idle and now about to fire up with some new user requests.
- */
-void intel_engines_unpark(struct drm_i915_private *i915)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- for_each_engine(engine, i915, id) {
- void *map;
-
- /* Pin the default state for fast resets from atomic context. */
- map = NULL;
- if (engine->default_state)
- map = i915_gem_object_pin_map(engine->default_state,
- I915_MAP_WB);
- if (!IS_ERR_OR_NULL(map))
- engine->pinned_default_state = map;
-
- if (engine->unpark)
- engine->unpark(engine);
-
- intel_engine_init_hangcheck(engine);
- }
-}
-
-/**
- * intel_engine_lost_context: called when the GPU is reset into unknown state
- * @engine: the engine
- *
- * We have either reset the GPU or otherwise about to lose state tracking of
- * the current GPU logical state (e.g. suspend). On next use, it is therefore
- * imperative that we make no presumptions about the current state and load
- * from scratch.
- */
-void intel_engine_lost_context(struct intel_engine_cs *engine)
-{
- struct intel_context *ce;
-
- lockdep_assert_held(&engine->i915->drm.struct_mutex);
-
- ce = fetch_and_zero(&engine->last_retired_context);
- if (ce)
- intel_context_unpin(ce);
-}
-
-bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
-{
- switch (INTEL_GEN(engine->i915)) {
- case 2:
- return false; /* uses physical not virtual addresses */
- case 3:
- /* maybe only uses physical not virtual addresses */
- return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915));
- case 6:
- return engine->class != VIDEO_DECODE_CLASS; /* b0rked */
- default:
- return true;
- }
-}
-
-unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- unsigned int which;
-
- which = 0;
- for_each_engine(engine, i915, id)
- if (engine->default_state)
- which |= BIT(engine->uabi_class);
-
- return which;
-}
-
-static int print_sched_attr(struct drm_i915_private *i915,
- const struct i915_sched_attr *attr,
- char *buf, int x, int len)
-{
- if (attr->priority == I915_PRIORITY_INVALID)
- return x;
-
- x += snprintf(buf + x, len - x,
- " prio=%d", attr->priority);
-
- return x;
-}
-
-static void print_request(struct drm_printer *m,
- struct i915_request *rq,
- const char *prefix)
-{
- const char *name = rq->fence.ops->get_timeline_name(&rq->fence);
- char buf[80] = "";
- int x = 0;
-
- x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf));
-
- drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n",
- prefix,
- rq->fence.context, rq->fence.seqno,
- i915_request_completed(rq) ? "!" :
- i915_request_started(rq) ? "*" :
- "",
- test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
- &rq->fence.flags) ? "+" : "",
- buf,
- jiffies_to_msecs(jiffies - rq->emitted_jiffies),
- name);
-}
-
-static void hexdump(struct drm_printer *m, const void *buf, size_t len)
-{
- const size_t rowsize = 8 * sizeof(u32);
- const void *prev = NULL;
- bool skip = false;
- size_t pos;
-
- for (pos = 0; pos < len; pos += rowsize) {
- char line[128];
-
- if (prev && !memcmp(prev, buf + pos, rowsize)) {
- if (!skip) {
- drm_printf(m, "*\n");
- skip = true;
- }
- continue;
- }
-
- WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
- rowsize, sizeof(u32),
- line, sizeof(line),
- false) >= sizeof(line));
- drm_printf(m, "[%04zx] %s\n", pos, line);
-
- prev = buf + pos;
- skip = false;
- }
-}
-
-static void intel_engine_print_registers(const struct intel_engine_cs *engine,
- struct drm_printer *m)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- const struct intel_engine_execlists * const execlists =
- &engine->execlists;
- u64 addr;
-
- if (engine->id == RCS0 && IS_GEN_RANGE(dev_priv, 4, 7))
- drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
- drm_printf(m, "\tRING_START: 0x%08x\n",
- ENGINE_READ(engine, RING_START));
- drm_printf(m, "\tRING_HEAD: 0x%08x\n",
- ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR);
- drm_printf(m, "\tRING_TAIL: 0x%08x\n",
- ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR);
- drm_printf(m, "\tRING_CTL: 0x%08x%s\n",
- ENGINE_READ(engine, RING_CTL),
- ENGINE_READ(engine, RING_CTL) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : "");
- if (INTEL_GEN(engine->i915) > 2) {
- drm_printf(m, "\tRING_MODE: 0x%08x%s\n",
- ENGINE_READ(engine, RING_MI_MODE),
- ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : "");
- }
-
- if (INTEL_GEN(dev_priv) >= 6) {
- drm_printf(m, "\tRING_IMR: %08x\n",
- ENGINE_READ(engine, RING_IMR));
- }
-
- addr = intel_engine_get_active_head(engine);
- drm_printf(m, "\tACTHD: 0x%08x_%08x\n",
- upper_32_bits(addr), lower_32_bits(addr));
- addr = intel_engine_get_last_batch_head(engine);
- drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
- upper_32_bits(addr), lower_32_bits(addr));
- if (INTEL_GEN(dev_priv) >= 8)
- addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW);
- else if (INTEL_GEN(dev_priv) >= 4)
- addr = ENGINE_READ(engine, RING_DMA_FADD);
- else
- addr = ENGINE_READ(engine, DMA_FADD_I8XX);
- drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n",
- upper_32_bits(addr), lower_32_bits(addr));
- if (INTEL_GEN(dev_priv) >= 4) {
- drm_printf(m, "\tIPEIR: 0x%08x\n",
- ENGINE_READ(engine, RING_IPEIR));
- drm_printf(m, "\tIPEHR: 0x%08x\n",
- ENGINE_READ(engine, RING_IPEHR));
- } else {
- drm_printf(m, "\tIPEIR: 0x%08x\n", ENGINE_READ(engine, IPEIR));
- drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
- }
-
- if (HAS_EXECLISTS(dev_priv)) {
- const u32 *hws =
- &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
- const u8 num_entries = execlists->csb_size;
- unsigned int idx;
- u8 read, write;
-
- drm_printf(m, "\tExeclist status: 0x%08x %08x, entries %u\n",
- ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
- ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
- num_entries);
-
- read = execlists->csb_head;
- write = READ_ONCE(*execlists->csb_write);
-
- drm_printf(m, "\tExeclist CSB read %d, write %d, tasklet queued? %s (%s)\n",
- read, write,
- yesno(test_bit(TASKLET_STATE_SCHED,
- &engine->execlists.tasklet.state)),
- enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
- if (read >= num_entries)
- read = 0;
- if (write >= num_entries)
- write = 0;
- if (read > write)
- write += num_entries;
- while (read < write) {
- idx = ++read % num_entries;
- drm_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
- idx, hws[idx * 2], hws[idx * 2 + 1]);
- }
-
- rcu_read_lock();
- for (idx = 0; idx < execlists_num_ports(execlists); idx++) {
- struct i915_request *rq;
- unsigned int count;
-
- rq = port_unpack(&execlists->port[idx], &count);
- if (rq) {
- char hdr[80];
-
- snprintf(hdr, sizeof(hdr),
- "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
- idx, count,
- i915_ggtt_offset(rq->ring->vma),
- rq->timeline->hwsp_offset,
- hwsp_seqno(rq));
- print_request(m, rq, hdr);
- } else {
- drm_printf(m, "\t\tELSP[%d] idle\n", idx);
- }
- }
- drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active);
- rcu_read_unlock();
- } else if (INTEL_GEN(dev_priv) > 6) {
- drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
- ENGINE_READ(engine, RING_PP_DIR_BASE));
- drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n",
- ENGINE_READ(engine, RING_PP_DIR_BASE_READ));
- drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n",
- ENGINE_READ(engine, RING_PP_DIR_DCLV));
- }
-}
-
-static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
-{
- void *ring;
- int size;
-
- drm_printf(m,
- "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
- rq->head, rq->postfix, rq->tail,
- rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
- rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
-
- size = rq->tail - rq->head;
- if (rq->tail < rq->head)
- size += rq->ring->size;
-
- ring = kmalloc(size, GFP_ATOMIC);
- if (ring) {
- const void *vaddr = rq->ring->vaddr;
- unsigned int head = rq->head;
- unsigned int len = 0;
-
- if (rq->tail < head) {
- len = rq->ring->size - head;
- memcpy(ring, vaddr + head, len);
- head = 0;
- }
- memcpy(ring + len, vaddr + head, size - len);
-
- hexdump(m, ring, size);
- kfree(ring);
- }
-}
-
-void intel_engine_dump(struct intel_engine_cs *engine,
- struct drm_printer *m,
- const char *header, ...)
-{
- struct i915_gpu_error * const error = &engine->i915->gpu_error;
- struct i915_request *rq;
- intel_wakeref_t wakeref;
-
- if (header) {
- va_list ap;
-
- va_start(ap, header);
- drm_vprintf(m, header, &ap);
- va_end(ap);
- }
-
- if (i915_reset_failed(engine->i915))
- drm_printf(m, "*** WEDGED ***\n");
-
- drm_printf(m, "\tHangcheck %x:%x [%d ms]\n",
- engine->hangcheck.last_seqno,
- engine->hangcheck.next_seqno,
- jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
- drm_printf(m, "\tReset count: %d (global %d)\n",
- i915_reset_engine_count(error, engine),
- i915_reset_count(error));
-
- rcu_read_lock();
-
- drm_printf(m, "\tRequests:\n");
-
- rq = list_first_entry(&engine->timeline.requests,
- struct i915_request, link);
- if (&rq->link != &engine->timeline.requests)
- print_request(m, rq, "\t\tfirst ");
-
- rq = list_last_entry(&engine->timeline.requests,
- struct i915_request, link);
- if (&rq->link != &engine->timeline.requests)
- print_request(m, rq, "\t\tlast ");
-
- rq = intel_engine_find_active_request(engine);
- if (rq) {
- print_request(m, rq, "\t\tactive ");
-
- drm_printf(m, "\t\tring->start: 0x%08x\n",
- i915_ggtt_offset(rq->ring->vma));
- drm_printf(m, "\t\tring->head: 0x%08x\n",
- rq->ring->head);
- drm_printf(m, "\t\tring->tail: 0x%08x\n",
- rq->ring->tail);
- drm_printf(m, "\t\tring->emit: 0x%08x\n",
- rq->ring->emit);
- drm_printf(m, "\t\tring->space: 0x%08x\n",
- rq->ring->space);
- drm_printf(m, "\t\tring->hwsp: 0x%08x\n",
- rq->timeline->hwsp_offset);
-
- print_request_ring(m, rq);
- }
-
- rcu_read_unlock();
-
- wakeref = intel_runtime_pm_get_if_in_use(engine->i915);
- if (wakeref) {
- intel_engine_print_registers(engine, m);
- intel_runtime_pm_put(engine->i915, wakeref);
- } else {
- drm_printf(m, "\tDevice is asleep; skipping register dump\n");
- }
-
- intel_execlists_show_requests(engine, m, print_request, 8);
-
- drm_printf(m, "HWSP:\n");
- hexdump(m, engine->status_page.addr, PAGE_SIZE);
-
- drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
-
- intel_engine_print_breadcrumbs(engine, m);
-}
-
-static u8 user_class_map[] = {
- [I915_ENGINE_CLASS_RENDER] = RENDER_CLASS,
- [I915_ENGINE_CLASS_COPY] = COPY_ENGINE_CLASS,
- [I915_ENGINE_CLASS_VIDEO] = VIDEO_DECODE_CLASS,
- [I915_ENGINE_CLASS_VIDEO_ENHANCE] = VIDEO_ENHANCEMENT_CLASS,
-};
-
-struct intel_engine_cs *
-intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
-{
- if (class >= ARRAY_SIZE(user_class_map))
- return NULL;
-
- class = user_class_map[class];
-
- GEM_BUG_ON(class > MAX_ENGINE_CLASS);
-
- if (instance > MAX_ENGINE_INSTANCE)
- return NULL;
-
- return i915->engine_class[class][instance];
-}
-
-/**
- * intel_enable_engine_stats() - Enable engine busy tracking on engine
- * @engine: engine to enable stats collection
- *
- * Start collecting the engine busyness data for @engine.
- *
- * Returns 0 on success or a negative error code.
- */
-int intel_enable_engine_stats(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists *execlists = &engine->execlists;
- unsigned long flags;
- int err = 0;
-
- if (!intel_engine_supports_stats(engine))
- return -ENODEV;
-
- spin_lock_irqsave(&engine->timeline.lock, flags);
- write_seqlock(&engine->stats.lock);
-
- if (unlikely(engine->stats.enabled == ~0)) {
- err = -EBUSY;
- goto unlock;
- }
-
- if (engine->stats.enabled++ == 0) {
- const struct execlist_port *port = execlists->port;
- unsigned int num_ports = execlists_num_ports(execlists);
-
- engine->stats.enabled_at = ktime_get();
-
- /* XXX submission method oblivious? */
- while (num_ports-- && port_isset(port)) {
- engine->stats.active++;
- port++;
- }
-
- if (engine->stats.active)
- engine->stats.start = engine->stats.enabled_at;
- }
-
-unlock:
- write_sequnlock(&engine->stats.lock);
- spin_unlock_irqrestore(&engine->timeline.lock, flags);
-
- return err;
-}
-
-static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
-{
- ktime_t total = engine->stats.total;
-
- /*
- * If the engine is executing something at the moment
- * add it to the total.
- */
- if (engine->stats.active)
- total = ktime_add(total,
- ktime_sub(ktime_get(), engine->stats.start));
-
- return total;
-}
-
-/**
- * intel_engine_get_busy_time() - Return current accumulated engine busyness
- * @engine: engine to report on
- *
- * Returns accumulated time @engine was busy since engine stats were enabled.
- */
-ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine)
-{
- unsigned int seq;
- ktime_t total;
-
- do {
- seq = read_seqbegin(&engine->stats.lock);
- total = __intel_engine_get_busy_time(engine);
- } while (read_seqretry(&engine->stats.lock, seq));
-
- return total;
-}
-
-/**
- * intel_disable_engine_stats() - Disable engine busy tracking on engine
- * @engine: engine to disable stats collection
- *
- * Stops collecting the engine busyness data for @engine.
- */
-void intel_disable_engine_stats(struct intel_engine_cs *engine)
-{
- unsigned long flags;
-
- if (!intel_engine_supports_stats(engine))
- return;
-
- write_seqlock_irqsave(&engine->stats.lock, flags);
- WARN_ON_ONCE(engine->stats.enabled == 0);
- if (--engine->stats.enabled == 0) {
- engine->stats.total = __intel_engine_get_busy_time(engine);
- engine->stats.active = 0;
- }
- write_sequnlock_irqrestore(&engine->stats.lock, flags);
-}
-
-static bool match_ring(struct i915_request *rq)
-{
- u32 ring = ENGINE_READ(rq->engine, RING_START);
-
- return ring == i915_ggtt_offset(rq->ring->vma);
-}
-
-struct i915_request *
-intel_engine_find_active_request(struct intel_engine_cs *engine)
-{
- struct i915_request *request, *active = NULL;
- unsigned long flags;
-
- /*
- * We are called by the error capture, reset and to dump engine
- * state at random points in time. In particular, note that neither is
- * crucially ordered with an interrupt. After a hang, the GPU is dead
- * and we assume that no more writes can happen (we waited long enough
- * for all writes that were in transaction to be flushed) - adding an
- * extra delay for a recent interrupt is pointless. Hence, we do
- * not need an engine->irq_seqno_barrier() before the seqno reads.
- * At all other times, we must assume the GPU is still running, but
- * we only care about the snapshot of this moment.
- */
- spin_lock_irqsave(&engine->timeline.lock, flags);
- list_for_each_entry(request, &engine->timeline.requests, link) {
- if (i915_request_completed(request))
- continue;
-
- if (!i915_request_started(request))
- break;
-
- /* More than one preemptible request may match! */
- if (!match_ring(request))
- break;
-
- active = request;
- break;
- }
- spin_unlock_irqrestore(&engine->timeline.lock, flags);
-
- return active;
-}
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/mock_engine.c"
-#include "selftests/intel_engine_cs.c"
-#endif
+++ /dev/null
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2019 Intel Corporation
- */
-
-#ifndef __INTEL_ENGINE_TYPES__
-#define __INTEL_ENGINE_TYPES__
-
-#include <linux/hashtable.h>
-#include <linux/irq_work.h>
-#include <linux/kref.h>
-#include <linux/list.h>
-#include <linux/types.h>
-
-#include "i915_gem.h"
-#include "i915_priolist_types.h"
-#include "i915_selftest.h"
-#include "i915_timeline_types.h"
-#include "intel_sseu.h"
-#include "intel_workarounds_types.h"
-
-#include "i915_gem_batch_pool.h"
-#include "i915_pmu.h"
-
-#define I915_MAX_SLICES 3
-#define I915_MAX_SUBSLICES 8
-
-#define I915_CMD_HASH_ORDER 9
-
-struct dma_fence;
-struct drm_i915_reg_table;
-struct i915_gem_context;
-struct i915_request;
-struct i915_sched_attr;
-struct intel_uncore;
-
-typedef u8 intel_engine_mask_t;
-#define ALL_ENGINES ((intel_engine_mask_t)~0ul)
-
-struct intel_hw_status_page {
- struct i915_vma *vma;
- u32 *addr;
-};
-
-struct intel_instdone {
- u32 instdone;
- /* The following exist only in the RCS engine */
- u32 slice_common;
- u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES];
- u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
-};
-
-struct intel_engine_hangcheck {
- u64 acthd;
- u32 last_seqno;
- u32 next_seqno;
- unsigned long action_timestamp;
- struct intel_instdone instdone;
-};
-
-struct intel_ring {
- struct kref ref;
- struct i915_vma *vma;
- void *vaddr;
-
- struct i915_timeline *timeline;
- struct list_head request_list;
- struct list_head active_link;
-
- u32 head;
- u32 tail;
- u32 emit;
-
- u32 space;
- u32 size;
- u32 effective_size;
-};
-
-/*
- * we use a single page to load ctx workarounds so all of these
- * values are referred in terms of dwords
- *
- * struct i915_wa_ctx_bb:
- * offset: specifies batch starting position, also helpful in case
- * if we want to have multiple batches at different offsets based on
- * some criteria. It is not a requirement at the moment but provides
- * an option for future use.
- * size: size of the batch in DWORDS
- */
-struct i915_ctx_workarounds {
- struct i915_wa_ctx_bb {
- u32 offset;
- u32 size;
- } indirect_ctx, per_ctx;
- struct i915_vma *vma;
-};
-
-#define I915_MAX_VCS 4
-#define I915_MAX_VECS 2
-
-/*
- * Engine IDs definitions.
- * Keep instances of the same type engine together.
- */
-enum intel_engine_id {
- RCS0 = 0,
- BCS0,
- VCS0,
- VCS1,
- VCS2,
- VCS3,
-#define _VCS(n) (VCS0 + (n))
- VECS0,
- VECS1,
-#define _VECS(n) (VECS0 + (n))
- I915_NUM_ENGINES
-};
-
-struct st_preempt_hang {
- struct completion completion;
- unsigned int count;
- bool inject_hang;
-};
-
-/**
- * struct intel_engine_execlists - execlist submission queue and port state
- *
- * The struct intel_engine_execlists represents the combined logical state of
- * driver and the hardware state for execlist mode of submission.
- */
-struct intel_engine_execlists {
- /**
- * @tasklet: softirq tasklet for bottom handler
- */
- struct tasklet_struct tasklet;
-
- /**
- * @default_priolist: priority list for I915_PRIORITY_NORMAL
- */
- struct i915_priolist default_priolist;
-
- /**
- * @no_priolist: priority lists disabled
- */
- bool no_priolist;
-
- /**
- * @submit_reg: gen-specific execlist submission register
- * set to the ExecList Submission Port (elsp) register pre-Gen11 and to
- * the ExecList Submission Queue Contents register array for Gen11+
- */
- u32 __iomem *submit_reg;
-
- /**
- * @ctrl_reg: the enhanced execlists control register, used to load the
- * submit queue on the HW and to request preemptions to idle
- */
- u32 __iomem *ctrl_reg;
-
- /**
- * @port: execlist port states
- *
- * For each hardware ELSP (ExecList Submission Port) we keep
- * track of the last request and the number of times we submitted
- * that port to hw. We then count the number of times the hw reports
- * a context completion or preemption. As only one context can
- * be active on hw, we limit resubmission of context to port[0]. This
- * is called Lite Restore, of the context.
- */
- struct execlist_port {
- /**
- * @request_count: combined request and submission count
- */
- struct i915_request *request_count;
-#define EXECLIST_COUNT_BITS 2
-#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
-#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
-#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
-#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
-#define port_set(p, packed) ((p)->request_count = (packed))
-#define port_isset(p) ((p)->request_count)
-#define port_index(p, execlists) ((p) - (execlists)->port)
-
- /**
- * @context_id: context ID for port
- */
- GEM_DEBUG_DECL(u32 context_id);
-
-#define EXECLIST_MAX_PORTS 2
- } port[EXECLIST_MAX_PORTS];
-
- /**
- * @active: is the HW active? We consider the HW as active after
- * submitting any context for execution and until we have seen the
- * last context completion event. After that, we do not expect any
- * more events until we submit, and so can park the HW.
- *
- * As we have a small number of different sources from which we feed
- * the HW, we track the state of each inside a single bitfield.
- */
- unsigned int active;
-#define EXECLISTS_ACTIVE_USER 0
-#define EXECLISTS_ACTIVE_PREEMPT 1
-#define EXECLISTS_ACTIVE_HWACK 2
-
- /**
- * @port_mask: number of execlist ports - 1
- */
- unsigned int port_mask;
-
- /**
- * @queue_priority_hint: Highest pending priority.
- *
- * When we add requests into the queue, or adjust the priority of
- * executing requests, we compute the maximum priority of those
- * pending requests. We can then use this value to determine if
- * we need to preempt the executing requests to service the queue.
- * However, since the we may have recorded the priority of an inflight
- * request we wanted to preempt but since completed, at the time of
- * dequeuing the priority hint may no longer may match the highest
- * available request priority.
- */
- int queue_priority_hint;
-
- /**
- * @queue: queue of requests, in priority lists
- */
- struct rb_root_cached queue;
-
- /**
- * @csb_write: control register for Context Switch buffer
- *
- * Note this register may be either mmio or HWSP shadow.
- */
- u32 *csb_write;
-
- /**
- * @csb_status: status array for Context Switch buffer
- *
- * Note these register may be either mmio or HWSP shadow.
- */
- u32 *csb_status;
-
- /**
- * @preempt_complete_status: expected CSB upon completing preemption
- */
- u32 preempt_complete_status;
-
- /**
- * @csb_size: context status buffer FIFO size
- */
- u8 csb_size;
-
- /**
- * @csb_head: context status buffer head
- */
- u8 csb_head;
-
- I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;)
-};
-
-#define INTEL_ENGINE_CS_MAX_NAME 8
-
-struct intel_engine_cs {
- struct drm_i915_private *i915;
- struct intel_uncore *uncore;
- char name[INTEL_ENGINE_CS_MAX_NAME];
-
- enum intel_engine_id id;
- unsigned int hw_id;
- unsigned int guc_id;
- intel_engine_mask_t mask;
-
- u8 uabi_class;
-
- u8 class;
- u8 instance;
- u32 context_size;
- u32 mmio_base;
-
- struct intel_sseu sseu;
-
- struct intel_ring *buffer;
-
- struct i915_timeline timeline;
-
- struct intel_context *kernel_context; /* pinned */
- struct intel_context *preempt_context; /* pinned; optional */
-
- struct drm_i915_gem_object *default_state;
- void *pinned_default_state;
-
- /* Rather than have every client wait upon all user interrupts,
- * with the herd waking after every interrupt and each doing the
- * heavyweight seqno dance, we delegate the task (of being the
- * bottom-half of the user interrupt) to the first client. After
- * every interrupt, we wake up one client, who does the heavyweight
- * coherent seqno read and either goes back to sleep (if incomplete),
- * or wakes up all the completed clients in parallel, before then
- * transferring the bottom-half status to the next client in the queue.
- *
- * Compared to walking the entire list of waiters in a single dedicated
- * bottom-half, we reduce the latency of the first waiter by avoiding
- * a context switch, but incur additional coherent seqno reads when
- * following the chain of request breadcrumbs. Since it is most likely
- * that we have a single client waiting on each seqno, then reducing
- * the overhead of waking that client is much preferred.
- */
- struct intel_breadcrumbs {
- spinlock_t irq_lock;
- struct list_head signalers;
-
- struct irq_work irq_work; /* for use from inside irq_lock */
-
- unsigned int irq_enabled;
-
- bool irq_armed;
- } breadcrumbs;
-
- struct intel_engine_pmu {
- /**
- * @enable: Bitmask of enable sample events on this engine.
- *
- * Bits correspond to sample event types, for instance
- * I915_SAMPLE_QUEUED is bit 0 etc.
- */
- u32 enable;
- /**
- * @enable_count: Reference count for the enabled samplers.
- *
- * Index number corresponds to @enum drm_i915_pmu_engine_sample.
- */
- unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT];
- /**
- * @sample: Counter values for sampling events.
- *
- * Our internal timer stores the current counters in this field.
- *
- * Index number corresponds to @enum drm_i915_pmu_engine_sample.
- */
- struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT];
- } pmu;
-
- /*
- * A pool of objects to use as shadow copies of client batch buffers
- * when the command parser is enabled. Prevents the client from
- * modifying the batch contents after software parsing.
- */
- struct i915_gem_batch_pool batch_pool;
-
- struct intel_hw_status_page status_page;
- struct i915_ctx_workarounds wa_ctx;
- struct i915_wa_list ctx_wa_list;
- struct i915_wa_list wa_list;
- struct i915_wa_list whitelist;
-
- u32 irq_keep_mask; /* always keep these interrupts */
- u32 irq_enable_mask; /* bitmask to enable ring interrupt */
- void (*irq_enable)(struct intel_engine_cs *engine);
- void (*irq_disable)(struct intel_engine_cs *engine);
-
- int (*init_hw)(struct intel_engine_cs *engine);
-
- struct {
- void (*prepare)(struct intel_engine_cs *engine);
- void (*reset)(struct intel_engine_cs *engine, bool stalled);
- void (*finish)(struct intel_engine_cs *engine);
- } reset;
-
- void (*park)(struct intel_engine_cs *engine);
- void (*unpark)(struct intel_engine_cs *engine);
-
- void (*set_default_submission)(struct intel_engine_cs *engine);
-
- const struct intel_context_ops *cops;
-
- int (*request_alloc)(struct i915_request *rq);
- int (*init_context)(struct i915_request *rq);
-
- int (*emit_flush)(struct i915_request *request, u32 mode);
-#define EMIT_INVALIDATE BIT(0)
-#define EMIT_FLUSH BIT(1)
-#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH)
- int (*emit_bb_start)(struct i915_request *rq,
- u64 offset, u32 length,
- unsigned int dispatch_flags);
-#define I915_DISPATCH_SECURE BIT(0)
-#define I915_DISPATCH_PINNED BIT(1)
- int (*emit_init_breadcrumb)(struct i915_request *rq);
- u32 *(*emit_fini_breadcrumb)(struct i915_request *rq,
- u32 *cs);
- unsigned int emit_fini_breadcrumb_dw;
-
- /* Pass the request to the hardware queue (e.g. directly into
- * the legacy ringbuffer or to the end of an execlist).
- *
- * This is called from an atomic context with irqs disabled; must
- * be irq safe.
- */
- void (*submit_request)(struct i915_request *rq);
-
- /*
- * Call when the priority on a request has changed and it and its
- * dependencies may need rescheduling. Note the request itself may
- * not be ready to run!
- */
- void (*schedule)(struct i915_request *request,
- const struct i915_sched_attr *attr);
-
- /*
- * Cancel all requests on the hardware, or queued for execution.
- * This should only cancel the ready requests that have been
- * submitted to the engine (via the engine->submit_request callback).
- * This is called when marking the device as wedged.
- */
- void (*cancel_requests)(struct intel_engine_cs *engine);
-
- void (*cleanup)(struct intel_engine_cs *engine);
-
- struct intel_engine_execlists execlists;
-
- /* Contexts are pinned whilst they are active on the GPU. The last
- * context executed remains active whilst the GPU is idle - the
- * switch away and write to the context object only occurs on the
- * next execution. Contexts are only unpinned on retirement of the
- * following request ensuring that we can always write to the object
- * on the context switch even after idling. Across suspend, we switch
- * to the kernel context and trash it as the save may not happen
- * before the hardware is powered down.
- */
- struct intel_context *last_retired_context;
-
- /* status_notifier: list of callbacks for context-switch changes */
- struct atomic_notifier_head context_status_notifier;
-
- struct intel_engine_hangcheck hangcheck;
-
-#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
-#define I915_ENGINE_SUPPORTS_STATS BIT(1)
-#define I915_ENGINE_HAS_PREEMPTION BIT(2)
-#define I915_ENGINE_HAS_SEMAPHORES BIT(3)
-#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
- unsigned int flags;
-
- /*
- * Table of commands the command parser needs to know about
- * for this engine.
- */
- DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER);
-
- /*
- * Table of registers allowed in commands that read/write registers.
- */
- const struct drm_i915_reg_table *reg_tables;
- int reg_table_count;
-
- /*
- * Returns the bitmask for the length field of the specified command.
- * Return 0 for an unrecognized/invalid command.
- *
- * If the command parser finds an entry for a command in the engine's
- * cmd_tables, it gets the command's length based on the table entry.
- * If not, it calls this function to determine the per-engine length
- * field encoding for the command (i.e. different opcode ranges use
- * certain bits to encode the command length in the header).
- */
- u32 (*get_cmd_length_mask)(u32 cmd_header);
-
- struct {
- /**
- * @lock: Lock protecting the below fields.
- */
- seqlock_t lock;
- /**
- * @enabled: Reference count indicating number of listeners.
- */
- unsigned int enabled;
- /**
- * @active: Number of contexts currently scheduled in.
- */
- unsigned int active;
- /**
- * @enabled_at: Timestamp when busy stats were enabled.
- */
- ktime_t enabled_at;
- /**
- * @start: Timestamp of the last idle to active transition.
- *
- * Idle is defined as active == 0, active is active > 0.
- */
- ktime_t start;
- /**
- * @total: Total time this engine was busy.
- *
- * Accumulated time not counting the most recent block in cases
- * where engine is currently busy (active > 0).
- */
- ktime_t total;
- } stats;
-};
-
-static inline bool
-intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine)
-{
- return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
-}
-
-static inline bool
-intel_engine_supports_stats(const struct intel_engine_cs *engine)
-{
- return engine->flags & I915_ENGINE_SUPPORTS_STATS;
-}
-
-static inline bool
-intel_engine_has_preemption(const struct intel_engine_cs *engine)
-{
- return engine->flags & I915_ENGINE_HAS_PREEMPTION;
-}
-
-static inline bool
-intel_engine_has_semaphores(const struct intel_engine_cs *engine)
-{
- return engine->flags & I915_ENGINE_HAS_SEMAPHORES;
-}
-
-static inline bool
-intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
-{
- return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
-}
-
-#define instdone_slice_mask(dev_priv__) \
- (IS_GEN(dev_priv__, 7) ? \
- 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
-
-#define instdone_subslice_mask(dev_priv__) \
- (IS_GEN(dev_priv__, 7) ? \
- 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0])
-
-#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
- for ((slice__) = 0, (subslice__) = 0; \
- (slice__) < I915_MAX_SLICES; \
- (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
- (slice__) += ((subslice__) == 0)) \
- for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
- (BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
-
-#endif /* __INTEL_ENGINE_TYPES_H__ */
+++ /dev/null
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright � 2003-2018 Intel Corporation
- */
-
-#ifndef _INTEL_GPU_COMMANDS_H_
-#define _INTEL_GPU_COMMANDS_H_
-
-/*
- * Instruction field definitions used by the command parser
- */
-#define INSTR_CLIENT_SHIFT 29
-#define INSTR_MI_CLIENT 0x0
-#define INSTR_BC_CLIENT 0x2
-#define INSTR_RC_CLIENT 0x3
-#define INSTR_SUBCLIENT_SHIFT 27
-#define INSTR_SUBCLIENT_MASK 0x18000000
-#define INSTR_MEDIA_SUBCLIENT 0x2
-#define INSTR_26_TO_24_MASK 0x7000000
-#define INSTR_26_TO_24_SHIFT 24
-
-/*
- * Memory interface instructions used by the kernel
- */
-#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
-/* Many MI commands use bit 22 of the header dword for GGTT vs PPGTT */
-#define MI_GLOBAL_GTT (1<<22)
-
-#define MI_NOOP MI_INSTR(0, 0)
-#define MI_USER_INTERRUPT MI_INSTR(0x02, 0)
-#define MI_WAIT_FOR_EVENT MI_INSTR(0x03, 0)
-#define MI_WAIT_FOR_OVERLAY_FLIP (1<<16)
-#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6)
-#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2)
-#define MI_WAIT_FOR_PLANE_A_SCANLINES (1<<1)
-#define MI_FLUSH MI_INSTR(0x04, 0)
-#define MI_READ_FLUSH (1 << 0)
-#define MI_EXE_FLUSH (1 << 1)
-#define MI_NO_WRITE_FLUSH (1 << 2)
-#define MI_SCENE_COUNT (1 << 3) /* just increment scene count */
-#define MI_END_SCENE (1 << 4) /* flush binner and incr scene count */
-#define MI_INVALIDATE_ISP (1 << 5) /* invalidate indirect state pointers */
-#define MI_REPORT_HEAD MI_INSTR(0x07, 0)
-#define MI_ARB_ON_OFF MI_INSTR(0x08, 0)
-#define MI_ARB_ENABLE (1<<0)
-#define MI_ARB_DISABLE (0<<0)
-#define MI_BATCH_BUFFER_END MI_INSTR(0x0a, 0)
-#define MI_SUSPEND_FLUSH MI_INSTR(0x0b, 0)
-#define MI_SUSPEND_FLUSH_EN (1<<0)
-#define MI_SET_APPID MI_INSTR(0x0e, 0)
-#define MI_OVERLAY_FLIP MI_INSTR(0x11, 0)
-#define MI_OVERLAY_CONTINUE (0x0<<21)
-#define MI_OVERLAY_ON (0x1<<21)
-#define MI_OVERLAY_OFF (0x2<<21)
-#define MI_LOAD_SCAN_LINES_INCL MI_INSTR(0x12, 0)
-#define MI_DISPLAY_FLIP MI_INSTR(0x14, 2)
-#define MI_DISPLAY_FLIP_I915 MI_INSTR(0x14, 1)
-#define MI_DISPLAY_FLIP_PLANE(n) ((n) << 20)
-/* IVB has funny definitions for which plane to flip. */
-#define MI_DISPLAY_FLIP_IVB_PLANE_A (0 << 19)
-#define MI_DISPLAY_FLIP_IVB_PLANE_B (1 << 19)
-#define MI_DISPLAY_FLIP_IVB_SPRITE_A (2 << 19)
-#define MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19)
-#define MI_DISPLAY_FLIP_IVB_PLANE_C (4 << 19)
-#define MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19)
-/* SKL ones */
-#define MI_DISPLAY_FLIP_SKL_PLANE_1_A (0 << 8)
-#define MI_DISPLAY_FLIP_SKL_PLANE_1_B (1 << 8)
-#define MI_DISPLAY_FLIP_SKL_PLANE_1_C (2 << 8)
-#define MI_DISPLAY_FLIP_SKL_PLANE_2_A (4 << 8)
-#define MI_DISPLAY_FLIP_SKL_PLANE_2_B (5 << 8)
-#define MI_DISPLAY_FLIP_SKL_PLANE_2_C (6 << 8)
-#define MI_DISPLAY_FLIP_SKL_PLANE_3_A (7 << 8)
-#define MI_DISPLAY_FLIP_SKL_PLANE_3_B (8 << 8)
-#define MI_DISPLAY_FLIP_SKL_PLANE_3_C (9 << 8)
-#define MI_SEMAPHORE_MBOX MI_INSTR(0x16, 1) /* gen6, gen7 */
-#define MI_SEMAPHORE_GLOBAL_GTT (1<<22)
-#define MI_SEMAPHORE_UPDATE (1<<21)
-#define MI_SEMAPHORE_COMPARE (1<<20)
-#define MI_SEMAPHORE_REGISTER (1<<18)
-#define MI_SEMAPHORE_SYNC_VR (0<<16) /* RCS wait for VCS (RVSYNC) */
-#define MI_SEMAPHORE_SYNC_VER (1<<16) /* RCS wait for VECS (RVESYNC) */
-#define MI_SEMAPHORE_SYNC_BR (2<<16) /* RCS wait for BCS (RBSYNC) */
-#define MI_SEMAPHORE_SYNC_BV (0<<16) /* VCS wait for BCS (VBSYNC) */
-#define MI_SEMAPHORE_SYNC_VEV (1<<16) /* VCS wait for VECS (VVESYNC) */
-#define MI_SEMAPHORE_SYNC_RV (2<<16) /* VCS wait for RCS (VRSYNC) */
-#define MI_SEMAPHORE_SYNC_RB (0<<16) /* BCS wait for RCS (BRSYNC) */
-#define MI_SEMAPHORE_SYNC_VEB (1<<16) /* BCS wait for VECS (BVESYNC) */
-#define MI_SEMAPHORE_SYNC_VB (2<<16) /* BCS wait for VCS (BVSYNC) */
-#define MI_SEMAPHORE_SYNC_BVE (0<<16) /* VECS wait for BCS (VEBSYNC) */
-#define MI_SEMAPHORE_SYNC_VVE (1<<16) /* VECS wait for VCS (VEVSYNC) */
-#define MI_SEMAPHORE_SYNC_RVE (2<<16) /* VECS wait for RCS (VERSYNC) */
-#define MI_SEMAPHORE_SYNC_INVALID (3<<16)
-#define MI_SEMAPHORE_SYNC_MASK (3<<16)
-#define MI_SET_CONTEXT MI_INSTR(0x18, 0)
-#define MI_MM_SPACE_GTT (1<<8)
-#define MI_MM_SPACE_PHYSICAL (0<<8)
-#define MI_SAVE_EXT_STATE_EN (1<<3)
-#define MI_RESTORE_EXT_STATE_EN (1<<2)
-#define MI_FORCE_RESTORE (1<<1)
-#define MI_RESTORE_INHIBIT (1<<0)
-#define HSW_MI_RS_SAVE_STATE_EN (1<<3)
-#define HSW_MI_RS_RESTORE_STATE_EN (1<<2)
-#define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */
-#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15)
-#define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */
-#define MI_SEMAPHORE_POLL (1 << 15)
-#define MI_SEMAPHORE_SAD_GT_SDD (0 << 12)
-#define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12)
-#define MI_SEMAPHORE_SAD_LT_SDD (2 << 12)
-#define MI_SEMAPHORE_SAD_LTE_SDD (3 << 12)
-#define MI_SEMAPHORE_SAD_EQ_SDD (4 << 12)
-#define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12)
-#define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1)
-#define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2)
-#define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */
-#define MI_USE_GGTT (1 << 22) /* g4x+ */
-#define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1)
-/*
- * Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM:
- * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw
- * simply ignores the register load under certain conditions.
- * - One can actually load arbitrary many arbitrary registers: Simply issue x
- * address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
- */
-#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
-#define MI_LRI_FORCE_POSTED (1<<12)
-#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
-#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
-#define MI_SRM_LRM_GLOBAL_GTT (1<<22)
-#define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */
-#define MI_FLUSH_DW_STORE_INDEX (1<<21)
-#define MI_INVALIDATE_TLB (1<<18)
-#define MI_FLUSH_DW_OP_STOREDW (1<<14)
-#define MI_FLUSH_DW_OP_MASK (3<<14)
-#define MI_FLUSH_DW_NOTIFY (1<<8)
-#define MI_INVALIDATE_BSD (1<<7)
-#define MI_FLUSH_DW_USE_GTT (1<<2)
-#define MI_FLUSH_DW_USE_PPGTT (0<<2)
-#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1)
-#define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2)
-#define MI_BATCH_BUFFER MI_INSTR(0x30, 1)
-#define MI_BATCH_NON_SECURE (1)
-/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
-#define MI_BATCH_NON_SECURE_I965 (1<<8)
-#define MI_BATCH_PPGTT_HSW (1<<8)
-#define MI_BATCH_NON_SECURE_HSW (1<<13)
-#define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0)
-#define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */
-#define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1)
-#define MI_BATCH_RESOURCE_STREAMER (1<<10)
-
-/*
- * 3D instructions used by the kernel
- */
-#define GFX_INSTR(opcode, flags) ((0x3 << 29) | ((opcode) << 24) | (flags))
-
-#define GEN9_MEDIA_POOL_STATE ((0x3 << 29) | (0x2 << 27) | (0x5 << 16) | 4)
-#define GEN9_MEDIA_POOL_ENABLE (1 << 31)
-#define GFX_OP_RASTER_RULES ((0x3<<29)|(0x7<<24))
-#define GFX_OP_SCISSOR ((0x3<<29)|(0x1c<<24)|(0x10<<19))
-#define SC_UPDATE_SCISSOR (0x1<<1)
-#define SC_ENABLE_MASK (0x1<<0)
-#define SC_ENABLE (0x1<<0)
-#define GFX_OP_LOAD_INDIRECT ((0x3<<29)|(0x1d<<24)|(0x7<<16))
-#define GFX_OP_SCISSOR_INFO ((0x3<<29)|(0x1d<<24)|(0x81<<16)|(0x1))
-#define SCI_YMIN_MASK (0xffff<<16)
-#define SCI_XMIN_MASK (0xffff<<0)
-#define SCI_YMAX_MASK (0xffff<<16)
-#define SCI_XMAX_MASK (0xffff<<0)
-#define GFX_OP_SCISSOR_ENABLE ((0x3<<29)|(0x1c<<24)|(0x10<<19))
-#define GFX_OP_SCISSOR_RECT ((0x3<<29)|(0x1d<<24)|(0x81<<16)|1)
-#define GFX_OP_COLOR_FACTOR ((0x3<<29)|(0x1d<<24)|(0x1<<16)|0x0)
-#define GFX_OP_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16))
-#define GFX_OP_MAP_INFO ((0x3<<29)|(0x1d<<24)|0x4)
-#define GFX_OP_DESTBUFFER_VARS ((0x3<<29)|(0x1d<<24)|(0x85<<16)|0x0)
-#define GFX_OP_DESTBUFFER_INFO ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1)
-#define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3))
-#define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2)
-
-#define COLOR_BLT_CMD (2<<29 | 0x40<<22 | (5-2))
-#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|4)
-#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6)
-#define XY_MONO_SRC_COPY_IMM_BLT ((2<<29)|(0x71<<22)|5)
-#define BLT_WRITE_A (2<<20)
-#define BLT_WRITE_RGB (1<<20)
-#define BLT_WRITE_RGBA (BLT_WRITE_RGB | BLT_WRITE_A)
-#define BLT_DEPTH_8 (0<<24)
-#define BLT_DEPTH_16_565 (1<<24)
-#define BLT_DEPTH_16_1555 (2<<24)
-#define BLT_DEPTH_32 (3<<24)
-#define BLT_ROP_SRC_COPY (0xcc<<16)
-#define BLT_ROP_COLOR_COPY (0xf0<<16)
-#define XY_SRC_COPY_BLT_SRC_TILED (1<<15) /* 965+ only */
-#define XY_SRC_COPY_BLT_DST_TILED (1<<11) /* 965+ only */
-#define CMD_OP_DISPLAYBUFFER_INFO ((0x0<<29)|(0x14<<23)|2)
-#define ASYNC_FLIP (1<<22)
-#define DISPLAY_PLANE_A (0<<20)
-#define DISPLAY_PLANE_B (1<<20)
-#define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
-#define PIPE_CONTROL_FLUSH_L3 (1<<27)
-#define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */
-#define PIPE_CONTROL_MMIO_WRITE (1<<23)
-#define PIPE_CONTROL_STORE_DATA_INDEX (1<<21)
-#define PIPE_CONTROL_CS_STALL (1<<20)
-#define PIPE_CONTROL_TLB_INVALIDATE (1<<18)
-#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16)
-#define PIPE_CONTROL_QW_WRITE (1<<14)
-#define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14)
-#define PIPE_CONTROL_DEPTH_STALL (1<<13)
-#define PIPE_CONTROL_WRITE_FLUSH (1<<12)
-#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */
-#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */
-#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */
-#define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9)
-#define PIPE_CONTROL_NOTIFY (1<<8)
-#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */
-#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5)
-#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1<<4)
-#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1<<3)
-#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1<<2)
-#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1<<1)
-#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0)
-#define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
-
-/*
- * Commands used only by the command parser
- */
-#define MI_SET_PREDICATE MI_INSTR(0x01, 0)
-#define MI_ARB_CHECK MI_INSTR(0x05, 0)
-#define MI_RS_CONTROL MI_INSTR(0x06, 0)
-#define MI_URB_ATOMIC_ALLOC MI_INSTR(0x09, 0)
-#define MI_PREDICATE MI_INSTR(0x0C, 0)
-#define MI_RS_CONTEXT MI_INSTR(0x0F, 0)
-#define MI_TOPOLOGY_FILTER MI_INSTR(0x0D, 0)
-#define MI_LOAD_SCAN_LINES_EXCL MI_INSTR(0x13, 0)
-#define MI_URB_CLEAR MI_INSTR(0x19, 0)
-#define MI_UPDATE_GTT MI_INSTR(0x23, 0)
-#define MI_CLFLUSH MI_INSTR(0x27, 0)
-#define MI_REPORT_PERF_COUNT MI_INSTR(0x28, 0)
-#define MI_REPORT_PERF_COUNT_GGTT (1<<0)
-#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 0)
-#define MI_RS_STORE_DATA_IMM MI_INSTR(0x2B, 0)
-#define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0)
-#define MI_STORE_URB_MEM MI_INSTR(0x2D, 0)
-#define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0)
-
-#define PIPELINE_SELECT ((0x3<<29)|(0x1<<27)|(0x1<<24)|(0x4<<16))
-#define GFX_OP_3DSTATE_VF_STATISTICS ((0x3<<29)|(0x1<<27)|(0x0<<24)|(0xB<<16))
-#define MEDIA_VFE_STATE ((0x3<<29)|(0x2<<27)|(0x0<<24)|(0x0<<16))
-#define MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18)
-#define GPGPU_OBJECT ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x4<<16))
-#define GPGPU_WALKER ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x5<<16))
-#define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \
- ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x39<<16))
-#define GFX_OP_3DSTATE_DX9_CONSTANTF_PS \
- ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x3A<<16))
-#define GFX_OP_3DSTATE_SO_DECL_LIST \
- ((0x3<<29)|(0x3<<27)|(0x1<<24)|(0x17<<16))
-
-#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_VS \
- ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x43<<16))
-#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_GS \
- ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x44<<16))
-#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_HS \
- ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x45<<16))
-#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_DS \
- ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x46<<16))
-#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS \
- ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x47<<16))
-
-#define MFX_WAIT ((0x3<<29)|(0x1<<27)|(0x0<<16))
-
-#define COLOR_BLT ((0x2<<29)|(0x40<<22))
-#define SRC_COPY_BLT ((0x2<<29)|(0x43<<22))
-
-#endif /* _INTEL_GPU_COMMANDS_H_ */
#include <linux/circ_buf.h>
#include <trace/events/dma_fence.h>
+#include "gt/intel_lrc_reg.h"
+
#include "intel_guc_submission.h"
-#include "intel_lrc_reg.h"
#include "i915_drv.h"
#define GUC_PREEMPT_FINISHED 0x1
#include <linux/spinlock.h>
+#include "gt/intel_engine_types.h"
+
#include "i915_gem.h"
#include "i915_selftest.h"
-#include "intel_engine_types.h"
struct drm_i915_private;
+++ /dev/null
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#include "i915_drv.h"
-#include "i915_reset.h"
-
-struct hangcheck {
- u64 acthd;
- u32 seqno;
- enum intel_engine_hangcheck_action action;
- unsigned long action_timestamp;
- int deadlock;
- struct intel_instdone instdone;
- bool wedged:1;
- bool stalled:1;
-};
-
-static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
-{
- u32 tmp = current_instdone | *old_instdone;
- bool unchanged;
-
- unchanged = tmp == *old_instdone;
- *old_instdone |= tmp;
-
- return unchanged;
-}
-
-static bool subunits_stuck(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- struct intel_instdone instdone;
- struct intel_instdone *accu_instdone = &engine->hangcheck.instdone;
- bool stuck;
- int slice;
- int subslice;
-
- if (engine->id != RCS0)
- return true;
-
- intel_engine_get_instdone(engine, &instdone);
-
- /* There might be unstable subunit states even when
- * actual head is not moving. Filter out the unstable ones by
- * accumulating the undone -> done transitions and only
- * consider those as progress.
- */
- stuck = instdone_unchanged(instdone.instdone,
- &accu_instdone->instdone);
- stuck &= instdone_unchanged(instdone.slice_common,
- &accu_instdone->slice_common);
-
- for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
- stuck &= instdone_unchanged(instdone.sampler[slice][subslice],
- &accu_instdone->sampler[slice][subslice]);
- stuck &= instdone_unchanged(instdone.row[slice][subslice],
- &accu_instdone->row[slice][subslice]);
- }
-
- return stuck;
-}
-
-static enum intel_engine_hangcheck_action
-head_stuck(struct intel_engine_cs *engine, u64 acthd)
-{
- if (acthd != engine->hangcheck.acthd) {
-
- /* Clear subunit states on head movement */
- memset(&engine->hangcheck.instdone, 0,
- sizeof(engine->hangcheck.instdone));
-
- return ENGINE_ACTIVE_HEAD;
- }
-
- if (!subunits_stuck(engine))
- return ENGINE_ACTIVE_SUBUNITS;
-
- return ENGINE_DEAD;
-}
-
-static enum intel_engine_hangcheck_action
-engine_stuck(struct intel_engine_cs *engine, u64 acthd)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- enum intel_engine_hangcheck_action ha;
- u32 tmp;
-
- ha = head_stuck(engine, acthd);
- if (ha != ENGINE_DEAD)
- return ha;
-
- if (IS_GEN(dev_priv, 2))
- return ENGINE_DEAD;
-
- /* Is the chip hanging on a WAIT_FOR_EVENT?
- * If so we can simply poke the RB_WAIT bit
- * and break the hang. This should work on
- * all but the second generation chipsets.
- */
- tmp = ENGINE_READ(engine, RING_CTL);
- if (tmp & RING_WAIT) {
- i915_handle_error(dev_priv, engine->mask, 0,
- "stuck wait on %s", engine->name);
- ENGINE_WRITE(engine, RING_CTL, tmp);
- return ENGINE_WAIT_KICK;
- }
-
- return ENGINE_DEAD;
-}
-
-static void hangcheck_load_sample(struct intel_engine_cs *engine,
- struct hangcheck *hc)
-{
- hc->acthd = intel_engine_get_active_head(engine);
- hc->seqno = intel_engine_get_hangcheck_seqno(engine);
-}
-
-static void hangcheck_store_sample(struct intel_engine_cs *engine,
- const struct hangcheck *hc)
-{
- engine->hangcheck.acthd = hc->acthd;
- engine->hangcheck.last_seqno = hc->seqno;
-}
-
-static enum intel_engine_hangcheck_action
-hangcheck_get_action(struct intel_engine_cs *engine,
- const struct hangcheck *hc)
-{
- if (engine->hangcheck.last_seqno != hc->seqno)
- return ENGINE_ACTIVE_SEQNO;
-
- if (intel_engine_is_idle(engine))
- return ENGINE_IDLE;
-
- return engine_stuck(engine, hc->acthd);
-}
-
-static void hangcheck_accumulate_sample(struct intel_engine_cs *engine,
- struct hangcheck *hc)
-{
- unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT;
-
- hc->action = hangcheck_get_action(engine, hc);
-
- /* We always increment the progress
- * if the engine is busy and still processing
- * the same request, so that no single request
- * can run indefinitely (such as a chain of
- * batches). The only time we do not increment
- * the hangcheck score on this ring, if this
- * engine is in a legitimate wait for another
- * engine. In that case the waiting engine is a
- * victim and we want to be sure we catch the
- * right culprit. Then every time we do kick
- * the ring, make it as a progress as the seqno
- * advancement might ensure and if not, it
- * will catch the hanging engine.
- */
-
- switch (hc->action) {
- case ENGINE_IDLE:
- case ENGINE_ACTIVE_SEQNO:
- /* Clear head and subunit states on seqno movement */
- hc->acthd = 0;
-
- memset(&engine->hangcheck.instdone, 0,
- sizeof(engine->hangcheck.instdone));
-
- /* Intentional fall through */
- case ENGINE_WAIT_KICK:
- case ENGINE_WAIT:
- engine->hangcheck.action_timestamp = jiffies;
- break;
-
- case ENGINE_ACTIVE_HEAD:
- case ENGINE_ACTIVE_SUBUNITS:
- /*
- * Seqno stuck with still active engine gets leeway,
- * in hopes that it is just a long shader.
- */
- timeout = I915_SEQNO_DEAD_TIMEOUT;
- break;
-
- case ENGINE_DEAD:
- break;
-
- default:
- MISSING_CASE(hc->action);
- }
-
- hc->stalled = time_after(jiffies,
- engine->hangcheck.action_timestamp + timeout);
- hc->wedged = time_after(jiffies,
- engine->hangcheck.action_timestamp +
- I915_ENGINE_WEDGED_TIMEOUT);
-}
-
-static void hangcheck_declare_hang(struct drm_i915_private *i915,
- unsigned int hung,
- unsigned int stuck)
-{
- struct intel_engine_cs *engine;
- intel_engine_mask_t tmp;
- char msg[80];
- int len;
-
- /* If some rings hung but others were still busy, only
- * blame the hanging rings in the synopsis.
- */
- if (stuck != hung)
- hung &= ~stuck;
- len = scnprintf(msg, sizeof(msg),
- "%s on ", stuck == hung ? "no progress" : "hang");
- for_each_engine_masked(engine, i915, hung, tmp)
- len += scnprintf(msg + len, sizeof(msg) - len,
- "%s, ", engine->name);
- msg[len-2] = '\0';
-
- return i915_handle_error(i915, hung, I915_ERROR_CAPTURE, "%s", msg);
-}
-
-/*
- * This is called when the chip hasn't reported back with completed
- * batchbuffers in a long time. We keep track per ring seqno progress and
- * if there are no progress, hangcheck score for that ring is increased.
- * Further, acthd is inspected to see if the ring is stuck. On stuck case
- * we kick the ring. If we see no progress on three subsequent calls
- * we assume chip is wedged and try to fix it by resetting the chip.
- */
-static void i915_hangcheck_elapsed(struct work_struct *work)
-{
- struct drm_i915_private *dev_priv =
- container_of(work, typeof(*dev_priv),
- gpu_error.hangcheck_work.work);
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- unsigned int hung = 0, stuck = 0, wedged = 0;
-
- if (!i915_modparams.enable_hangcheck)
- return;
-
- if (!READ_ONCE(dev_priv->gt.awake))
- return;
-
- if (i915_terminally_wedged(dev_priv))
- return;
-
- /* As enabling the GPU requires fairly extensive mmio access,
- * periodically arm the mmio checker to see if we are triggering
- * any invalid access.
- */
- intel_uncore_arm_unclaimed_mmio_detection(&dev_priv->uncore);
-
- for_each_engine(engine, dev_priv, id) {
- struct hangcheck hc;
-
- intel_engine_signal_breadcrumbs(engine);
-
- hangcheck_load_sample(engine, &hc);
- hangcheck_accumulate_sample(engine, &hc);
- hangcheck_store_sample(engine, &hc);
-
- if (hc.stalled) {
- hung |= engine->mask;
- if (hc.action != ENGINE_DEAD)
- stuck |= engine->mask;
- }
-
- if (hc.wedged)
- wedged |= engine->mask;
- }
-
- if (GEM_SHOW_DEBUG() && (hung | stuck)) {
- struct drm_printer p = drm_debug_printer("hangcheck");
-
- for_each_engine(engine, dev_priv, id) {
- if (intel_engine_is_idle(engine))
- continue;
-
- intel_engine_dump(engine, &p, "%s\n", engine->name);
- }
- }
-
- if (wedged) {
- dev_err(dev_priv->drm.dev,
- "GPU recovery timed out,"
- " cancelling all in-flight rendering.\n");
- GEM_TRACE_DUMP();
- i915_gem_set_wedged(dev_priv);
- }
-
- if (hung)
- hangcheck_declare_hang(dev_priv, hung, stuck);
-
- /* Reset timer in case GPU hangs without another request being added */
- i915_queue_hangcheck(dev_priv);
-}
-
-void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
-{
- memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
- engine->hangcheck.action_timestamp = jiffies;
-}
-
-void intel_hangcheck_init(struct drm_i915_private *i915)
-{
- INIT_DELAYED_WORK(&i915->gpu_error.hangcheck_work,
- i915_hangcheck_elapsed);
-}
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/intel_hangcheck.c"
-#endif
+++ /dev/null
-/*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Ben Widawsky <ben@bwidawsk.net>
- * Michel Thierry <michel.thierry@intel.com>
- * Thomas Daniel <thomas.daniel@intel.com>
- * Oscar Mateo <oscar.mateo@intel.com>
- *
- */
-
-/**
- * DOC: Logical Rings, Logical Ring Contexts and Execlists
- *
- * Motivation:
- * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
- * These expanded contexts enable a number of new abilities, especially
- * "Execlists" (also implemented in this file).
- *
- * One of the main differences with the legacy HW contexts is that logical
- * ring contexts incorporate many more things to the context's state, like
- * PDPs or ringbuffer control registers:
- *
- * The reason why PDPs are included in the context is straightforward: as
- * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
- * contained there mean you don't need to do a ppgtt->switch_mm yourself,
- * instead, the GPU will do it for you on the context switch.
- *
- * But, what about the ringbuffer control registers (head, tail, etc..)?
- * shouldn't we just need a set of those per engine command streamer? This is
- * where the name "Logical Rings" starts to make sense: by virtualizing the
- * rings, the engine cs shifts to a new "ring buffer" with every context
- * switch. When you want to submit a workload to the GPU you: A) choose your
- * context, B) find its appropriate virtualized ring, C) write commands to it
- * and then, finally, D) tell the GPU to switch to that context.
- *
- * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
- * to a contexts is via a context execution list, ergo "Execlists".
- *
- * LRC implementation:
- * Regarding the creation of contexts, we have:
- *
- * - One global default context.
- * - One local default context for each opened fd.
- * - One local extra context for each context create ioctl call.
- *
- * Now that ringbuffers belong per-context (and not per-engine, like before)
- * and that contexts are uniquely tied to a given engine (and not reusable,
- * like before) we need:
- *
- * - One ringbuffer per-engine inside each context.
- * - One backing object per-engine inside each context.
- *
- * The global default context starts its life with these new objects fully
- * allocated and populated. The local default context for each opened fd is
- * more complex, because we don't know at creation time which engine is going
- * to use them. To handle this, we have implemented a deferred creation of LR
- * contexts:
- *
- * The local context starts its life as a hollow or blank holder, that only
- * gets populated for a given engine once we receive an execbuffer. If later
- * on we receive another execbuffer ioctl for the same context but a different
- * engine, we allocate/populate a new ringbuffer and context backing object and
- * so on.
- *
- * Finally, regarding local contexts created using the ioctl call: as they are
- * only allowed with the render ring, we can allocate & populate them right
- * away (no need to defer anything, at least for now).
- *
- * Execlists implementation:
- * Execlists are the new method by which, on gen8+ hardware, workloads are
- * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
- * This method works as follows:
- *
- * When a request is committed, its commands (the BB start and any leading or
- * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
- * for the appropriate context. The tail pointer in the hardware context is not
- * updated at this time, but instead, kept by the driver in the ringbuffer
- * structure. A structure representing this request is added to a request queue
- * for the appropriate engine: this structure contains a copy of the context's
- * tail after the request was written to the ring buffer and a pointer to the
- * context itself.
- *
- * If the engine's request queue was empty before the request was added, the
- * queue is processed immediately. Otherwise the queue will be processed during
- * a context switch interrupt. In any case, elements on the queue will get sent
- * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
- * globally unique 20-bits submission ID.
- *
- * When execution of a request completes, the GPU updates the context status
- * buffer with a context complete event and generates a context switch interrupt.
- * During the interrupt handling, the driver examines the events in the buffer:
- * for each context complete event, if the announced ID matches that on the head
- * of the request queue, then that request is retired and removed from the queue.
- *
- * After processing, if any requests were retired and the queue is not empty
- * then a new execution list can be submitted. The two requests at the front of
- * the queue are next to be submitted but since a context may not occur twice in
- * an execution list, if subsequent requests have the same ID as the first then
- * the two requests must be combined. This is done simply by discarding requests
- * at the head of the queue until either only one requests is left (in which case
- * we use a NULL second context) or the first two requests have unique IDs.
- *
- * By always executing the first two requests in the queue the driver ensures
- * that the GPU is kept as busy as possible. In the case where a single context
- * completes but a second context is still executing, the request for this second
- * context will be at the head of the queue when we remove the first one. This
- * request will then be resubmitted along with a new request for a different context,
- * which will cause the hardware to continue executing the second request and queue
- * the new request (the GPU detects the condition of a context getting preempted
- * with the same context and optimizes the context switch flow by not doing
- * preemption, but just sampling the new tail pointer).
- *
- */
-#include <linux/interrupt.h>
-
-#include <drm/i915_drm.h>
-#include "i915_drv.h"
-#include "i915_gem_render_state.h"
-#include "i915_reset.h"
-#include "i915_vgpu.h"
-#include "intel_lrc_reg.h"
-#include "intel_mocs.h"
-#include "intel_workarounds.h"
-
-#define RING_EXECLIST_QFULL (1 << 0x2)
-#define RING_EXECLIST1_VALID (1 << 0x3)
-#define RING_EXECLIST0_VALID (1 << 0x4)
-#define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE)
-#define RING_EXECLIST1_ACTIVE (1 << 0x11)
-#define RING_EXECLIST0_ACTIVE (1 << 0x12)
-
-#define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0)
-#define GEN8_CTX_STATUS_PREEMPTED (1 << 1)
-#define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2)
-#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
-#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
-#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
-
-#define GEN8_CTX_STATUS_COMPLETED_MASK \
- (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
-
-/* Typical size of the average request (2 pipecontrols and a MI_BB) */
-#define EXECLISTS_REQUEST_SIZE 64 /* bytes */
-#define WA_TAIL_DWORDS 2
-#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
-
-#define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT | I915_PRIORITY_NOSEMAPHORE)
-
-static int execlists_context_deferred_alloc(struct intel_context *ce,
- struct intel_engine_cs *engine);
-static void execlists_init_reg_state(u32 *reg_state,
- struct intel_context *ce,
- struct intel_engine_cs *engine,
- struct intel_ring *ring);
-
-static inline struct i915_priolist *to_priolist(struct rb_node *rb)
-{
- return rb_entry(rb, struct i915_priolist, node);
-}
-
-static inline int rq_prio(const struct i915_request *rq)
-{
- return rq->sched.attr.priority;
-}
-
-static int effective_prio(const struct i915_request *rq)
-{
- int prio = rq_prio(rq);
-
- /*
- * On unwinding the active request, we give it a priority bump
- * equivalent to a freshly submitted request. This protects it from
- * being gazumped again, but it would be preferable if we didn't
- * let it be gazumped in the first place!
- *
- * See __unwind_incomplete_requests()
- */
- if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(rq)) {
- /*
- * After preemption, we insert the active request at the
- * end of the new priority level. This means that we will be
- * _lower_ priority than the preemptee all things equal (and
- * so the preemption is valid), so adjust our comparison
- * accordingly.
- */
- prio |= ACTIVE_PRIORITY;
- prio--;
- }
-
- /* Restrict mere WAIT boosts from triggering preemption */
- return prio | __NO_PREEMPTION;
-}
-
-static int queue_prio(const struct intel_engine_execlists *execlists)
-{
- struct i915_priolist *p;
- struct rb_node *rb;
-
- rb = rb_first_cached(&execlists->queue);
- if (!rb)
- return INT_MIN;
-
- /*
- * As the priolist[] are inverted, with the highest priority in [0],
- * we have to flip the index value to become priority.
- */
- p = to_priolist(rb);
- return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
-}
-
-static inline bool need_preempt(const struct intel_engine_cs *engine,
- const struct i915_request *rq)
-{
- int last_prio;
-
- if (!engine->preempt_context)
- return false;
-
- if (i915_request_completed(rq))
- return false;
-
- /*
- * Check if the current priority hint merits a preemption attempt.
- *
- * We record the highest value priority we saw during rescheduling
- * prior to this dequeue, therefore we know that if it is strictly
- * less than the current tail of ESLP[0], we do not need to force
- * a preempt-to-idle cycle.
- *
- * However, the priority hint is a mere hint that we may need to
- * preempt. If that hint is stale or we may be trying to preempt
- * ourselves, ignore the request.
- */
- last_prio = effective_prio(rq);
- if (!__execlists_need_preempt(engine->execlists.queue_priority_hint,
- last_prio))
- return false;
-
- /*
- * Check against the first request in ELSP[1], it will, thanks to the
- * power of PI, be the highest priority of that context.
- */
- if (!list_is_last(&rq->link, &engine->timeline.requests) &&
- rq_prio(list_next_entry(rq, link)) > last_prio)
- return true;
-
- /*
- * If the inflight context did not trigger the preemption, then maybe
- * it was the set of queued requests? Pick the highest priority in
- * the queue (the first active priolist) and see if it deserves to be
- * running instead of ELSP[0].
- *
- * The highest priority request in the queue can not be either
- * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
- * context, it's priority would not exceed ELSP[0] aka last_prio.
- */
- return queue_prio(&engine->execlists) > last_prio;
-}
-
-__maybe_unused static inline bool
-assert_priority_queue(const struct i915_request *prev,
- const struct i915_request *next)
-{
- const struct intel_engine_execlists *execlists =
- &prev->engine->execlists;
-
- /*
- * Without preemption, the prev may refer to the still active element
- * which we refuse to let go.
- *
- * Even with preemption, there are times when we think it is better not
- * to preempt and leave an ostensibly lower priority request in flight.
- */
- if (port_request(execlists->port) == prev)
- return true;
-
- return rq_prio(prev) >= rq_prio(next);
-}
-
-/*
- * The context descriptor encodes various attributes of a context,
- * including its GTT address and some flags. Because it's fairly
- * expensive to calculate, we'll just do it once and cache the result,
- * which remains valid until the context is unpinned.
- *
- * This is what a descriptor looks like, from LSB to MSB::
- *
- * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template)
- * bits 12-31: LRCA, GTT address of (the HWSP of) this context
- * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC)
- * bits 53-54: mbz, reserved for use by hardware
- * bits 55-63: group ID, currently unused and set to 0
- *
- * Starting from Gen11, the upper dword of the descriptor has a new format:
- *
- * bits 32-36: reserved
- * bits 37-47: SW context ID
- * bits 48:53: engine instance
- * bit 54: mbz, reserved for use by hardware
- * bits 55-60: SW counter
- * bits 61-63: engine class
- *
- * engine info, SW context ID and SW counter need to form a unique number
- * (Context ID) per lrc.
- */
-static u64
-lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
-{
- struct i915_gem_context *ctx = ce->gem_context;
- u64 desc;
-
- BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
- BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
-
- desc = ctx->desc_template; /* bits 0-11 */
- GEM_BUG_ON(desc & GENMASK_ULL(63, 12));
-
- desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
- /* bits 12-31 */
- GEM_BUG_ON(desc & GENMASK_ULL(63, 32));
-
- /*
- * The following 32bits are copied into the OA reports (dword 2).
- * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
- * anything below.
- */
- if (INTEL_GEN(engine->i915) >= 11) {
- GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
- desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
- /* bits 37-47 */
-
- desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
- /* bits 48-53 */
-
- /* TODO: decide what to do with SW counter (bits 55-60) */
-
- desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
- /* bits 61-63 */
- } else {
- GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH));
- desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */
- }
-
- return desc;
-}
-
-static void unwind_wa_tail(struct i915_request *rq)
-{
- rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
- assert_ring_tail_valid(rq->ring, rq->tail);
-}
-
-static struct i915_request *
-__unwind_incomplete_requests(struct intel_engine_cs *engine)
-{
- struct i915_request *rq, *rn, *active = NULL;
- struct list_head *uninitialized_var(pl);
- int prio = I915_PRIORITY_INVALID | ACTIVE_PRIORITY;
-
- lockdep_assert_held(&engine->timeline.lock);
-
- list_for_each_entry_safe_reverse(rq, rn,
- &engine->timeline.requests,
- link) {
- if (i915_request_completed(rq))
- break;
-
- __i915_request_unsubmit(rq);
- unwind_wa_tail(rq);
-
- GEM_BUG_ON(rq->hw_context->active);
-
- GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
- if (rq_prio(rq) != prio) {
- prio = rq_prio(rq);
- pl = i915_sched_lookup_priolist(engine, prio);
- }
- GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
-
- list_add(&rq->sched.link, pl);
-
- active = rq;
- }
-
- /*
- * The active request is now effectively the start of a new client
- * stream, so give it the equivalent small priority bump to prevent
- * it being gazumped a second time by another peer.
- *
- * Note we have to be careful not to apply a priority boost to a request
- * still spinning on its semaphores. If the request hasn't started, that
- * means it is still waiting for its dependencies to be signaled, and
- * if we apply a priority boost to this request, we will boost it past
- * its signalers and so break PI.
- *
- * One consequence of this preemption boost is that we may jump
- * over lesser priorities (such as I915_PRIORITY_WAIT), effectively
- * making those priorities non-preemptible. They will be moved forward
- * in the priority queue, but they will not gain immediate access to
- * the GPU.
- */
- if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(active)) {
- prio |= ACTIVE_PRIORITY;
- active->sched.attr.priority = prio;
- list_move_tail(&active->sched.link,
- i915_sched_lookup_priolist(engine, prio));
- }
-
- return active;
-}
-
-struct i915_request *
-execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
-{
- struct intel_engine_cs *engine =
- container_of(execlists, typeof(*engine), execlists);
-
- return __unwind_incomplete_requests(engine);
-}
-
-static inline void
-execlists_context_status_change(struct i915_request *rq, unsigned long status)
-{
- /*
- * Only used when GVT-g is enabled now. When GVT-g is disabled,
- * The compiler should eliminate this function as dead-code.
- */
- if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
- return;
-
- atomic_notifier_call_chain(&rq->engine->context_status_notifier,
- status, rq);
-}
-
-inline void
-execlists_user_begin(struct intel_engine_execlists *execlists,
- const struct execlist_port *port)
-{
- execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER);
-}
-
-inline void
-execlists_user_end(struct intel_engine_execlists *execlists)
-{
- execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
-}
-
-static inline void
-execlists_context_schedule_in(struct i915_request *rq)
-{
- GEM_BUG_ON(rq->hw_context->active);
-
- execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
- intel_engine_context_in(rq->engine);
- rq->hw_context->active = rq->engine;
-}
-
-static inline void
-execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
-{
- rq->hw_context->active = NULL;
- intel_engine_context_out(rq->engine);
- execlists_context_status_change(rq, status);
- trace_i915_request_out(rq);
-}
-
-static u64 execlists_update_context(struct i915_request *rq)
-{
- struct intel_context *ce = rq->hw_context;
-
- ce->lrc_reg_state[CTX_RING_TAIL + 1] =
- intel_ring_set_tail(rq->ring, rq->tail);
-
- /*
- * Make sure the context image is complete before we submit it to HW.
- *
- * Ostensibly, writes (including the WCB) should be flushed prior to
- * an uncached write such as our mmio register access, the empirical
- * evidence (esp. on Braswell) suggests that the WC write into memory
- * may not be visible to the HW prior to the completion of the UC
- * register write and that we may begin execution from the context
- * before its image is complete leading to invalid PD chasing.
- *
- * Furthermore, Braswell, at least, wants a full mb to be sure that
- * the writes are coherent in memory (visible to the GPU) prior to
- * execution, and not just visible to other CPUs (as is the result of
- * wmb).
- */
- mb();
- return ce->lrc_desc;
-}
-
-static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
-{
- if (execlists->ctrl_reg) {
- writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
- writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
- } else {
- writel(upper_32_bits(desc), execlists->submit_reg);
- writel(lower_32_bits(desc), execlists->submit_reg);
- }
-}
-
-static void execlists_submit_ports(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists *execlists = &engine->execlists;
- struct execlist_port *port = execlists->port;
- unsigned int n;
-
- /*
- * We can skip acquiring intel_runtime_pm_get() here as it was taken
- * on our behalf by the request (see i915_gem_mark_busy()) and it will
- * not be relinquished until the device is idle (see
- * i915_gem_idle_work_handler()). As a precaution, we make sure
- * that all ELSP are drained i.e. we have processed the CSB,
- * before allowing ourselves to idle and calling intel_runtime_pm_put().
- */
- GEM_BUG_ON(!engine->i915->gt.awake);
-
- /*
- * ELSQ note: the submit queue is not cleared after being submitted
- * to the HW so we need to make sure we always clean it up. This is
- * currently ensured by the fact that we always write the same number
- * of elsq entries, keep this in mind before changing the loop below.
- */
- for (n = execlists_num_ports(execlists); n--; ) {
- struct i915_request *rq;
- unsigned int count;
- u64 desc;
-
- rq = port_unpack(&port[n], &count);
- if (rq) {
- GEM_BUG_ON(count > !n);
- if (!count++)
- execlists_context_schedule_in(rq);
- port_set(&port[n], port_pack(rq, count));
- desc = execlists_update_context(rq);
- GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
-
- GEM_TRACE("%s in[%d]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
- engine->name, n,
- port[n].context_id, count,
- rq->fence.context, rq->fence.seqno,
- hwsp_seqno(rq),
- rq_prio(rq));
- } else {
- GEM_BUG_ON(!n);
- desc = 0;
- }
-
- write_desc(execlists, desc, n);
- }
-
- /* we need to manually load the submit queue */
- if (execlists->ctrl_reg)
- writel(EL_CTRL_LOAD, execlists->ctrl_reg);
-
- execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
-}
-
-static bool ctx_single_port_submission(const struct intel_context *ce)
-{
- return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
- i915_gem_context_force_single_submission(ce->gem_context));
-}
-
-static bool can_merge_ctx(const struct intel_context *prev,
- const struct intel_context *next)
-{
- if (prev != next)
- return false;
-
- if (ctx_single_port_submission(prev))
- return false;
-
- return true;
-}
-
-static bool can_merge_rq(const struct i915_request *prev,
- const struct i915_request *next)
-{
- GEM_BUG_ON(!assert_priority_queue(prev, next));
-
- if (!can_merge_ctx(prev->hw_context, next->hw_context))
- return false;
-
- return true;
-}
-
-static void port_assign(struct execlist_port *port, struct i915_request *rq)
-{
- GEM_BUG_ON(rq == port_request(port));
-
- if (port_isset(port))
- i915_request_put(port_request(port));
-
- port_set(port, port_pack(i915_request_get(rq), port_count(port)));
-}
-
-static void inject_preempt_context(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists *execlists = &engine->execlists;
- struct intel_context *ce = engine->preempt_context;
- unsigned int n;
-
- GEM_BUG_ON(execlists->preempt_complete_status !=
- upper_32_bits(ce->lrc_desc));
-
- /*
- * Switch to our empty preempt context so
- * the state of the GPU is known (idle).
- */
- GEM_TRACE("%s\n", engine->name);
- for (n = execlists_num_ports(execlists); --n; )
- write_desc(execlists, 0, n);
-
- write_desc(execlists, ce->lrc_desc, n);
-
- /* we need to manually load the submit queue */
- if (execlists->ctrl_reg)
- writel(EL_CTRL_LOAD, execlists->ctrl_reg);
-
- execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
- execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
-
- (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
-}
-
-static void complete_preempt_context(struct intel_engine_execlists *execlists)
-{
- GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
-
- if (inject_preempt_hang(execlists))
- return;
-
- execlists_cancel_port_requests(execlists);
- __unwind_incomplete_requests(container_of(execlists,
- struct intel_engine_cs,
- execlists));
-}
-
-static void execlists_dequeue(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists * const execlists = &engine->execlists;
- struct execlist_port *port = execlists->port;
- const struct execlist_port * const last_port =
- &execlists->port[execlists->port_mask];
- struct i915_request *last = port_request(port);
- struct rb_node *rb;
- bool submit = false;
-
- /*
- * Hardware submission is through 2 ports. Conceptually each port
- * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
- * static for a context, and unique to each, so we only execute
- * requests belonging to a single context from each ring. RING_HEAD
- * is maintained by the CS in the context image, it marks the place
- * where it got up to last time, and through RING_TAIL we tell the CS
- * where we want to execute up to this time.
- *
- * In this list the requests are in order of execution. Consecutive
- * requests from the same context are adjacent in the ringbuffer. We
- * can combine these requests into a single RING_TAIL update:
- *
- * RING_HEAD...req1...req2
- * ^- RING_TAIL
- * since to execute req2 the CS must first execute req1.
- *
- * Our goal then is to point each port to the end of a consecutive
- * sequence of requests as being the most optimal (fewest wake ups
- * and context switches) submission.
- */
-
- if (last) {
- /*
- * Don't resubmit or switch until all outstanding
- * preemptions (lite-restore) are seen. Then we
- * know the next preemption status we see corresponds
- * to this ELSP update.
- */
- GEM_BUG_ON(!execlists_is_active(execlists,
- EXECLISTS_ACTIVE_USER));
- GEM_BUG_ON(!port_count(&port[0]));
-
- /*
- * If we write to ELSP a second time before the HW has had
- * a chance to respond to the previous write, we can confuse
- * the HW and hit "undefined behaviour". After writing to ELSP,
- * we must then wait until we see a context-switch event from
- * the HW to indicate that it has had a chance to respond.
- */
- if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
- return;
-
- if (need_preempt(engine, last)) {
- inject_preempt_context(engine);
- return;
- }
-
- /*
- * In theory, we could coalesce more requests onto
- * the second port (the first port is active, with
- * no preemptions pending). However, that means we
- * then have to deal with the possible lite-restore
- * of the second port (as we submit the ELSP, there
- * may be a context-switch) but also we may complete
- * the resubmission before the context-switch. Ergo,
- * coalescing onto the second port will cause a
- * preemption event, but we cannot predict whether
- * that will affect port[0] or port[1].
- *
- * If the second port is already active, we can wait
- * until the next context-switch before contemplating
- * new requests. The GPU will be busy and we should be
- * able to resubmit the new ELSP before it idles,
- * avoiding pipeline bubbles (momentary pauses where
- * the driver is unable to keep up the supply of new
- * work). However, we have to double check that the
- * priorities of the ports haven't been switch.
- */
- if (port_count(&port[1]))
- return;
-
- /*
- * WaIdleLiteRestore:bdw,skl
- * Apply the wa NOOPs to prevent
- * ring:HEAD == rq:TAIL as we resubmit the
- * request. See gen8_emit_fini_breadcrumb() for
- * where we prepare the padding after the
- * end of the request.
- */
- last->tail = last->wa_tail;
- }
-
- while ((rb = rb_first_cached(&execlists->queue))) {
- struct i915_priolist *p = to_priolist(rb);
- struct i915_request *rq, *rn;
- int i;
-
- priolist_for_each_request_consume(rq, rn, p, i) {
- /*
- * Can we combine this request with the current port?
- * It has to be the same context/ringbuffer and not
- * have any exceptions (e.g. GVT saying never to
- * combine contexts).
- *
- * If we can combine the requests, we can execute both
- * by updating the RING_TAIL to point to the end of the
- * second request, and so we never need to tell the
- * hardware about the first.
- */
- if (last && !can_merge_rq(last, rq)) {
- /*
- * If we are on the second port and cannot
- * combine this request with the last, then we
- * are done.
- */
- if (port == last_port)
- goto done;
-
- /*
- * We must not populate both ELSP[] with the
- * same LRCA, i.e. we must submit 2 different
- * contexts if we submit 2 ELSP.
- */
- if (last->hw_context == rq->hw_context)
- goto done;
-
- /*
- * If GVT overrides us we only ever submit
- * port[0], leaving port[1] empty. Note that we
- * also have to be careful that we don't queue
- * the same context (even though a different
- * request) to the second port.
- */
- if (ctx_single_port_submission(last->hw_context) ||
- ctx_single_port_submission(rq->hw_context))
- goto done;
-
-
- if (submit)
- port_assign(port, last);
- port++;
-
- GEM_BUG_ON(port_isset(port));
- }
-
- list_del_init(&rq->sched.link);
-
- __i915_request_submit(rq);
- trace_i915_request_in(rq, port_index(port, execlists));
-
- last = rq;
- submit = true;
- }
-
- rb_erase_cached(&p->node, &execlists->queue);
- i915_priolist_free(p);
- }
-
-done:
- /*
- * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
- *
- * We choose the priority hint such that if we add a request of greater
- * priority than this, we kick the submission tasklet to decide on
- * the right order of submitting the requests to hardware. We must
- * also be prepared to reorder requests as they are in-flight on the
- * HW. We derive the priority hint then as the first "hole" in
- * the HW submission ports and if there are no available slots,
- * the priority of the lowest executing request, i.e. last.
- *
- * When we do receive a higher priority request ready to run from the
- * user, see queue_request(), the priority hint is bumped to that
- * request triggering preemption on the next dequeue (or subsequent
- * interrupt for secondary ports).
- */
- execlists->queue_priority_hint = queue_prio(execlists);
-
- if (submit) {
- port_assign(port, last);
- execlists_submit_ports(engine);
- }
-
- /* We must always keep the beast fed if we have work piled up */
- GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
- !port_isset(execlists->port));
-
- /* Re-evaluate the executing context setup after each preemptive kick */
- if (last)
- execlists_user_begin(execlists, execlists->port);
-
- /* If the engine is now idle, so should be the flag; and vice versa. */
- GEM_BUG_ON(execlists_is_active(&engine->execlists,
- EXECLISTS_ACTIVE_USER) ==
- !port_isset(engine->execlists.port));
-}
-
-void
-execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
-{
- struct execlist_port *port = execlists->port;
- unsigned int num_ports = execlists_num_ports(execlists);
-
- while (num_ports-- && port_isset(port)) {
- struct i915_request *rq = port_request(port);
-
- GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n",
- rq->engine->name,
- (unsigned int)(port - execlists->port),
- rq->fence.context, rq->fence.seqno,
- hwsp_seqno(rq));
-
- GEM_BUG_ON(!execlists->active);
- execlists_context_schedule_out(rq,
- i915_request_completed(rq) ?
- INTEL_CONTEXT_SCHEDULE_OUT :
- INTEL_CONTEXT_SCHEDULE_PREEMPTED);
-
- i915_request_put(rq);
-
- memset(port, 0, sizeof(*port));
- port++;
- }
-
- execlists_clear_all_active(execlists);
-}
-
-static inline void
-invalidate_csb_entries(const u32 *first, const u32 *last)
-{
- clflush((void *)first);
- clflush((void *)last);
-}
-
-static inline bool
-reset_in_progress(const struct intel_engine_execlists *execlists)
-{
- return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
-}
-
-static void process_csb(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists * const execlists = &engine->execlists;
- struct execlist_port *port = execlists->port;
- const u32 * const buf = execlists->csb_status;
- const u8 num_entries = execlists->csb_size;
- u8 head, tail;
-
- lockdep_assert_held(&engine->timeline.lock);
-
- /*
- * Note that csb_write, csb_status may be either in HWSP or mmio.
- * When reading from the csb_write mmio register, we have to be
- * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
- * the low 4bits. As it happens we know the next 4bits are always
- * zero and so we can simply masked off the low u8 of the register
- * and treat it identically to reading from the HWSP (without having
- * to use explicit shifting and masking, and probably bifurcating
- * the code to handle the legacy mmio read).
- */
- head = execlists->csb_head;
- tail = READ_ONCE(*execlists->csb_write);
- GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
- if (unlikely(head == tail))
- return;
-
- /*
- * Hopefully paired with a wmb() in HW!
- *
- * We must complete the read of the write pointer before any reads
- * from the CSB, so that we do not see stale values. Without an rmb
- * (lfence) the HW may speculatively perform the CSB[] reads *before*
- * we perform the READ_ONCE(*csb_write).
- */
- rmb();
-
- do {
- struct i915_request *rq;
- unsigned int status;
- unsigned int count;
-
- if (++head == num_entries)
- head = 0;
-
- /*
- * We are flying near dragons again.
- *
- * We hold a reference to the request in execlist_port[]
- * but no more than that. We are operating in softirq
- * context and so cannot hold any mutex or sleep. That
- * prevents us stopping the requests we are processing
- * in port[] from being retired simultaneously (the
- * breadcrumb will be complete before we see the
- * context-switch). As we only hold the reference to the
- * request, any pointer chasing underneath the request
- * is subject to a potential use-after-free. Thus we
- * store all of the bookkeeping within port[] as
- * required, and avoid using unguarded pointers beneath
- * request itself. The same applies to the atomic
- * status notifier.
- */
-
- GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
- engine->name, head,
- buf[2 * head + 0], buf[2 * head + 1],
- execlists->active);
-
- status = buf[2 * head];
- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
- execlists_set_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
- if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
- execlists_clear_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
-
- if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
- continue;
-
- /* We should never get a COMPLETED | IDLE_ACTIVE! */
- GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
-
- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
- GEM_TRACE("%s preempt-idle\n", engine->name);
- complete_preempt_context(execlists);
- continue;
- }
-
- if (status & GEN8_CTX_STATUS_PREEMPTED &&
- execlists_is_active(execlists,
- EXECLISTS_ACTIVE_PREEMPT))
- continue;
-
- GEM_BUG_ON(!execlists_is_active(execlists,
- EXECLISTS_ACTIVE_USER));
-
- rq = port_unpack(port, &count);
- GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
- engine->name,
- port->context_id, count,
- rq ? rq->fence.context : 0,
- rq ? rq->fence.seqno : 0,
- rq ? hwsp_seqno(rq) : 0,
- rq ? rq_prio(rq) : 0);
-
- /* Check the context/desc id for this event matches */
- GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
-
- GEM_BUG_ON(count == 0);
- if (--count == 0) {
- /*
- * On the final event corresponding to the
- * submission of this context, we expect either
- * an element-switch event or a completion
- * event (and on completion, the active-idle
- * marker). No more preemptions, lite-restore
- * or otherwise.
- */
- GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
- GEM_BUG_ON(port_isset(&port[1]) &&
- !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
- GEM_BUG_ON(!port_isset(&port[1]) &&
- !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
-
- /*
- * We rely on the hardware being strongly
- * ordered, that the breadcrumb write is
- * coherent (visible from the CPU) before the
- * user interrupt and CSB is processed.
- */
- GEM_BUG_ON(!i915_request_completed(rq));
-
- execlists_context_schedule_out(rq,
- INTEL_CONTEXT_SCHEDULE_OUT);
- i915_request_put(rq);
-
- GEM_TRACE("%s completed ctx=%d\n",
- engine->name, port->context_id);
-
- port = execlists_port_complete(execlists, port);
- if (port_isset(port))
- execlists_user_begin(execlists, port);
- else
- execlists_user_end(execlists);
- } else {
- port_set(port, port_pack(rq, count));
- }
- } while (head != tail);
-
- execlists->csb_head = head;
-
- /*
- * Gen11 has proven to fail wrt global observation point between
- * entry and tail update, failing on the ordering and thus
- * we see an old entry in the context status buffer.
- *
- * Forcibly evict out entries for the next gpu csb update,
- * to increase the odds that we get a fresh entries with non
- * working hardware. The cost for doing so comes out mostly with
- * the wash as hardware, working or not, will need to do the
- * invalidation before.
- */
- invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
-}
-
-static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
-{
- lockdep_assert_held(&engine->timeline.lock);
-
- process_csb(engine);
- if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
- execlists_dequeue(engine);
-}
-
-/*
- * Check the unread Context Status Buffers and manage the submission of new
- * contexts to the ELSP accordingly.
- */
-static void execlists_submission_tasklet(unsigned long data)
-{
- struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
- unsigned long flags;
-
- GEM_TRACE("%s awake?=%d, active=%x\n",
- engine->name,
- !!engine->i915->gt.awake,
- engine->execlists.active);
-
- spin_lock_irqsave(&engine->timeline.lock, flags);
- __execlists_submission_tasklet(engine);
- spin_unlock_irqrestore(&engine->timeline.lock, flags);
-}
-
-static void queue_request(struct intel_engine_cs *engine,
- struct i915_sched_node *node,
- int prio)
-{
- list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio));
-}
-
-static void __submit_queue_imm(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists * const execlists = &engine->execlists;
-
- if (reset_in_progress(execlists))
- return; /* defer until we restart the engine following reset */
-
- if (execlists->tasklet.func == execlists_submission_tasklet)
- __execlists_submission_tasklet(engine);
- else
- tasklet_hi_schedule(&execlists->tasklet);
-}
-
-static void submit_queue(struct intel_engine_cs *engine, int prio)
-{
- if (prio > engine->execlists.queue_priority_hint) {
- engine->execlists.queue_priority_hint = prio;
- __submit_queue_imm(engine);
- }
-}
-
-static void execlists_submit_request(struct i915_request *request)
-{
- struct intel_engine_cs *engine = request->engine;
- unsigned long flags;
-
- /* Will be called from irq-context when using foreign fences. */
- spin_lock_irqsave(&engine->timeline.lock, flags);
-
- queue_request(engine, &request->sched, rq_prio(request));
-
- GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
- GEM_BUG_ON(list_empty(&request->sched.link));
-
- submit_queue(engine, rq_prio(request));
-
- spin_unlock_irqrestore(&engine->timeline.lock, flags);
-}
-
-static void __execlists_context_fini(struct intel_context *ce)
-{
- intel_ring_put(ce->ring);
-
- GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
- i915_gem_object_put(ce->state->obj);
-}
-
-static void execlists_context_destroy(struct kref *kref)
-{
- struct intel_context *ce = container_of(kref, typeof(*ce), ref);
-
- GEM_BUG_ON(intel_context_is_pinned(ce));
-
- if (ce->state)
- __execlists_context_fini(ce);
-
- intel_context_free(ce);
-}
-
-static int __context_pin(struct i915_vma *vma)
-{
- unsigned int flags;
- int err;
-
- flags = PIN_GLOBAL | PIN_HIGH;
- flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
-
- err = i915_vma_pin(vma, 0, 0, flags);
- if (err)
- return err;
-
- vma->obj->pin_global++;
- vma->obj->mm.dirty = true;
-
- return 0;
-}
-
-static void __context_unpin(struct i915_vma *vma)
-{
- vma->obj->pin_global--;
- __i915_vma_unpin(vma);
-}
-
-static void execlists_context_unpin(struct intel_context *ce)
-{
- struct intel_engine_cs *engine;
-
- /*
- * The tasklet may still be using a pointer to our state, via an
- * old request. However, since we know we only unpin the context
- * on retirement of the following request, we know that the last
- * request referencing us will have had a completion CS interrupt.
- * If we see that it is still active, it means that the tasklet hasn't
- * had the chance to run yet; let it run before we teardown the
- * reference it may use.
- */
- engine = READ_ONCE(ce->active);
- if (unlikely(engine)) {
- unsigned long flags;
-
- spin_lock_irqsave(&engine->timeline.lock, flags);
- process_csb(engine);
- spin_unlock_irqrestore(&engine->timeline.lock, flags);
-
- GEM_BUG_ON(READ_ONCE(ce->active));
- }
-
- i915_gem_context_unpin_hw_id(ce->gem_context);
-
- intel_ring_unpin(ce->ring);
-
- i915_gem_object_unpin_map(ce->state->obj);
- __context_unpin(ce->state);
-}
-
-static void
-__execlists_update_reg_state(struct intel_context *ce,
- struct intel_engine_cs *engine)
-{
- struct intel_ring *ring = ce->ring;
- u32 *regs = ce->lrc_reg_state;
-
- GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
- GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
-
- regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma);
- regs[CTX_RING_HEAD + 1] = ring->head;
- regs[CTX_RING_TAIL + 1] = ring->tail;
-
- /* RPCS */
- if (engine->class == RENDER_CLASS)
- regs[CTX_R_PWR_CLK_STATE + 1] =
- intel_sseu_make_rpcs(engine->i915, &ce->sseu);
-}
-
-static int
-__execlists_context_pin(struct intel_context *ce,
- struct intel_engine_cs *engine)
-{
- void *vaddr;
- int ret;
-
- GEM_BUG_ON(!ce->gem_context->ppgtt);
-
- ret = execlists_context_deferred_alloc(ce, engine);
- if (ret)
- goto err;
- GEM_BUG_ON(!ce->state);
-
- ret = __context_pin(ce->state);
- if (ret)
- goto err;
-
- vaddr = i915_gem_object_pin_map(ce->state->obj,
- i915_coherent_map_type(engine->i915) |
- I915_MAP_OVERRIDE);
- if (IS_ERR(vaddr)) {
- ret = PTR_ERR(vaddr);
- goto unpin_vma;
- }
-
- ret = intel_ring_pin(ce->ring);
- if (ret)
- goto unpin_map;
-
- ret = i915_gem_context_pin_hw_id(ce->gem_context);
- if (ret)
- goto unpin_ring;
-
- ce->lrc_desc = lrc_descriptor(ce, engine);
- ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
- __execlists_update_reg_state(ce, engine);
-
- return 0;
-
-unpin_ring:
- intel_ring_unpin(ce->ring);
-unpin_map:
- i915_gem_object_unpin_map(ce->state->obj);
-unpin_vma:
- __context_unpin(ce->state);
-err:
- return ret;
-}
-
-static int execlists_context_pin(struct intel_context *ce)
-{
- return __execlists_context_pin(ce, ce->engine);
-}
-
-static void execlists_context_reset(struct intel_context *ce)
-{
- /*
- * Because we emit WA_TAIL_DWORDS there may be a disparity
- * between our bookkeeping in ce->ring->head and ce->ring->tail and
- * that stored in context. As we only write new commands from
- * ce->ring->tail onwards, everything before that is junk. If the GPU
- * starts reading from its RING_HEAD from the context, it may try to
- * execute that junk and die.
- *
- * The contexts that are stilled pinned on resume belong to the
- * kernel, and are local to each engine. All other contexts will
- * have their head/tail sanitized upon pinning before use, so they
- * will never see garbage,
- *
- * So to avoid that we reset the context images upon resume. For
- * simplicity, we just zero everything out.
- */
- intel_ring_reset(ce->ring, 0);
- __execlists_update_reg_state(ce, ce->engine);
-}
-
-static const struct intel_context_ops execlists_context_ops = {
- .pin = execlists_context_pin,
- .unpin = execlists_context_unpin,
-
- .reset = execlists_context_reset,
- .destroy = execlists_context_destroy,
-};
-
-static int gen8_emit_init_breadcrumb(struct i915_request *rq)
-{
- u32 *cs;
-
- GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb);
-
- cs = intel_ring_begin(rq, 6);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- /*
- * Check if we have been preempted before we even get started.
- *
- * After this point i915_request_started() reports true, even if
- * we get preempted and so are no longer running.
- */
- *cs++ = MI_ARB_CHECK;
- *cs++ = MI_NOOP;
-
- *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
- *cs++ = rq->timeline->hwsp_offset;
- *cs++ = 0;
- *cs++ = rq->fence.seqno - 1;
-
- intel_ring_advance(rq, cs);
-
- /* Record the updated position of the request's payload */
- rq->infix = intel_ring_offset(rq, cs);
-
- return 0;
-}
-
-static int emit_pdps(struct i915_request *rq)
-{
- const struct intel_engine_cs * const engine = rq->engine;
- struct i915_hw_ppgtt * const ppgtt = rq->gem_context->ppgtt;
- int err, i;
- u32 *cs;
-
- GEM_BUG_ON(intel_vgpu_active(rq->i915));
-
- /*
- * Beware ye of the dragons, this sequence is magic!
- *
- * Small changes to this sequence can cause anything from
- * GPU hangs to forcewake errors and machine lockups!
- */
-
- /* Flush any residual operations from the context load */
- err = engine->emit_flush(rq, EMIT_FLUSH);
- if (err)
- return err;
-
- /* Magic required to prevent forcewake errors! */
- err = engine->emit_flush(rq, EMIT_INVALIDATE);
- if (err)
- return err;
-
- cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- /* Ensure the LRI have landed before we invalidate & continue */
- *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
- for (i = GEN8_3LVL_PDPES; i--; ) {
- const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
- u32 base = engine->mmio_base;
-
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
- *cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
- *cs++ = lower_32_bits(pd_daddr);
- }
- *cs++ = MI_NOOP;
-
- intel_ring_advance(rq, cs);
-
- /* Be doubly sure the LRI have landed before proceeding */
- err = engine->emit_flush(rq, EMIT_FLUSH);
- if (err)
- return err;
-
- /* Re-invalidate the TLB for luck */
- return engine->emit_flush(rq, EMIT_INVALIDATE);
-}
-
-static int execlists_request_alloc(struct i915_request *request)
-{
- int ret;
-
- GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
-
- /*
- * Flush enough space to reduce the likelihood of waiting after
- * we start building the request - in which case we will just
- * have to repeat work.
- */
- request->reserved_space += EXECLISTS_REQUEST_SIZE;
-
- /*
- * Note that after this point, we have committed to using
- * this request as it is being used to both track the
- * state of engine initialisation and liveness of the
- * golden renderstate above. Think twice before you try
- * to cancel/unwind this request now.
- */
-
- /* Unconditionally invalidate GPU caches and TLBs. */
- if (i915_vm_is_4lvl(&request->gem_context->ppgtt->vm))
- ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
- else
- ret = emit_pdps(request);
- if (ret)
- return ret;
-
- request->reserved_space -= EXECLISTS_REQUEST_SIZE;
- return 0;
-}
-
-/*
- * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
- * PIPE_CONTROL instruction. This is required for the flush to happen correctly
- * but there is a slight complication as this is applied in WA batch where the
- * values are only initialized once so we cannot take register value at the
- * beginning and reuse it further; hence we save its value to memory, upload a
- * constant value with bit21 set and then we restore it back with the saved value.
- * To simplify the WA, a constant value is formed by using the default value
- * of this register. This shouldn't be a problem because we are only modifying
- * it for a short period and this batch in non-premptible. We can ofcourse
- * use additional instructions that read the actual value of the register
- * at that time and set our bit of interest but it makes the WA complicated.
- *
- * This WA is also required for Gen9 so extracting as a function avoids
- * code duplication.
- */
-static u32 *
-gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
-{
- /* NB no one else is allowed to scribble over scratch + 256! */
- *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
- *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
- *batch++ = i915_scratch_offset(engine->i915) + 256;
- *batch++ = 0;
-
- *batch++ = MI_LOAD_REGISTER_IMM(1);
- *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
- *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
-
- batch = gen8_emit_pipe_control(batch,
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_DC_FLUSH_ENABLE,
- 0);
-
- *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
- *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
- *batch++ = i915_scratch_offset(engine->i915) + 256;
- *batch++ = 0;
-
- return batch;
-}
-
-/*
- * Typically we only have one indirect_ctx and per_ctx batch buffer which are
- * initialized at the beginning and shared across all contexts but this field
- * helps us to have multiple batches at different offsets and select them based
- * on a criteria. At the moment this batch always start at the beginning of the page
- * and at this point we don't have multiple wa_ctx batch buffers.
- *
- * The number of WA applied are not known at the beginning; we use this field
- * to return the no of DWORDS written.
- *
- * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
- * so it adds NOOPs as padding to make it cacheline aligned.
- * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
- * makes a complete batch buffer.
- */
-static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
-{
- /* WaDisableCtxRestoreArbitration:bdw,chv */
- *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-
- /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
- if (IS_BROADWELL(engine->i915))
- batch = gen8_emit_flush_coherentl3_wa(engine, batch);
-
- /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
- /* Actual scratch location is at 128 bytes offset */
- batch = gen8_emit_pipe_control(batch,
- PIPE_CONTROL_FLUSH_L3 |
- PIPE_CONTROL_GLOBAL_GTT_IVB |
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_QW_WRITE,
- i915_scratch_offset(engine->i915) +
- 2 * CACHELINE_BYTES);
-
- *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-
- /* Pad to end of cacheline */
- while ((unsigned long)batch % CACHELINE_BYTES)
- *batch++ = MI_NOOP;
-
- /*
- * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
- * execution depends on the length specified in terms of cache lines
- * in the register CTX_RCS_INDIRECT_CTX
- */
-
- return batch;
-}
-
-struct lri {
- i915_reg_t reg;
- u32 value;
-};
-
-static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
-{
- GEM_BUG_ON(!count || count > 63);
-
- *batch++ = MI_LOAD_REGISTER_IMM(count);
- do {
- *batch++ = i915_mmio_reg_offset(lri->reg);
- *batch++ = lri->value;
- } while (lri++, --count);
- *batch++ = MI_NOOP;
-
- return batch;
-}
-
-static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
-{
- static const struct lri lri[] = {
- /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
- {
- COMMON_SLICE_CHICKEN2,
- __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
- 0),
- },
-
- /* BSpec: 11391 */
- {
- FF_SLICE_CHICKEN,
- __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
- FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
- },
-
- /* BSpec: 11299 */
- {
- _3D_CHICKEN3,
- __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
- _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
- }
- };
-
- *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-
- /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
- batch = gen8_emit_flush_coherentl3_wa(engine, batch);
-
- batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
-
- /* WaMediaPoolStateCmdInWABB:bxt,glk */
- if (HAS_POOLED_EU(engine->i915)) {
- /*
- * EU pool configuration is setup along with golden context
- * during context initialization. This value depends on
- * device type (2x6 or 3x6) and needs to be updated based
- * on which subslice is disabled especially for 2x6
- * devices, however it is safe to load default
- * configuration of 3x6 device instead of masking off
- * corresponding bits because HW ignores bits of a disabled
- * subslice and drops down to appropriate config. Please
- * see render_state_setup() in i915_gem_render_state.c for
- * possible configurations, to avoid duplication they are
- * not shown here again.
- */
- *batch++ = GEN9_MEDIA_POOL_STATE;
- *batch++ = GEN9_MEDIA_POOL_ENABLE;
- *batch++ = 0x00777000;
- *batch++ = 0;
- *batch++ = 0;
- *batch++ = 0;
- }
-
- *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-
- /* Pad to end of cacheline */
- while ((unsigned long)batch % CACHELINE_BYTES)
- *batch++ = MI_NOOP;
-
- return batch;
-}
-
-static u32 *
-gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
-{
- int i;
-
- /*
- * WaPipeControlBefore3DStateSamplePattern: cnl
- *
- * Ensure the engine is idle prior to programming a
- * 3DSTATE_SAMPLE_PATTERN during a context restore.
- */
- batch = gen8_emit_pipe_control(batch,
- PIPE_CONTROL_CS_STALL,
- 0);
- /*
- * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
- * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
- * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
- * confusing. Since gen8_emit_pipe_control() already advances the
- * batch by 6 dwords, we advance the other 10 here, completing a
- * cacheline. It's not clear if the workaround requires this padding
- * before other commands, or if it's just the regular padding we would
- * already have for the workaround bb, so leave it here for now.
- */
- for (i = 0; i < 10; i++)
- *batch++ = MI_NOOP;
-
- /* Pad to end of cacheline */
- while ((unsigned long)batch % CACHELINE_BYTES)
- *batch++ = MI_NOOP;
-
- return batch;
-}
-
-#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
-
-static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
-{
- struct drm_i915_gem_object *obj;
- struct i915_vma *vma;
- int err;
-
- obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
- vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto err;
- }
-
- err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
- if (err)
- goto err;
-
- engine->wa_ctx.vma = vma;
- return 0;
-
-err:
- i915_gem_object_put(obj);
- return err;
-}
-
-static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
-{
- i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
-}
-
-typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
-
-static int intel_init_workaround_bb(struct intel_engine_cs *engine)
-{
- struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
- struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
- &wa_ctx->per_ctx };
- wa_bb_func_t wa_bb_fn[2];
- struct page *page;
- void *batch, *batch_ptr;
- unsigned int i;
- int ret;
-
- if (GEM_DEBUG_WARN_ON(engine->id != RCS0))
- return -EINVAL;
-
- switch (INTEL_GEN(engine->i915)) {
- case 11:
- return 0;
- case 10:
- wa_bb_fn[0] = gen10_init_indirectctx_bb;
- wa_bb_fn[1] = NULL;
- break;
- case 9:
- wa_bb_fn[0] = gen9_init_indirectctx_bb;
- wa_bb_fn[1] = NULL;
- break;
- case 8:
- wa_bb_fn[0] = gen8_init_indirectctx_bb;
- wa_bb_fn[1] = NULL;
- break;
- default:
- MISSING_CASE(INTEL_GEN(engine->i915));
- return 0;
- }
-
- ret = lrc_setup_wa_ctx(engine);
- if (ret) {
- DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
- return ret;
- }
-
- page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
- batch = batch_ptr = kmap_atomic(page);
-
- /*
- * Emit the two workaround batch buffers, recording the offset from the
- * start of the workaround batch buffer object for each and their
- * respective sizes.
- */
- for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
- wa_bb[i]->offset = batch_ptr - batch;
- if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
- CACHELINE_BYTES))) {
- ret = -EINVAL;
- break;
- }
- if (wa_bb_fn[i])
- batch_ptr = wa_bb_fn[i](engine, batch_ptr);
- wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
- }
-
- BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
-
- kunmap_atomic(batch);
- if (ret)
- lrc_destroy_wa_ctx(engine);
-
- return ret;
-}
-
-static void enable_execlists(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
-
- intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
-
- if (INTEL_GEN(dev_priv) >= 11)
- I915_WRITE(RING_MODE_GEN7(engine),
- _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
- else
- I915_WRITE(RING_MODE_GEN7(engine),
- _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
-
- I915_WRITE(RING_MI_MODE(engine->mmio_base),
- _MASKED_BIT_DISABLE(STOP_RING));
-
- I915_WRITE(RING_HWS_PGA(engine->mmio_base),
- i915_ggtt_offset(engine->status_page.vma));
- POSTING_READ(RING_HWS_PGA(engine->mmio_base));
-}
-
-static bool unexpected_starting_state(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- bool unexpected = false;
-
- if (I915_READ(RING_MI_MODE(engine->mmio_base)) & STOP_RING) {
- DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
- unexpected = true;
- }
-
- return unexpected;
-}
-
-static int gen8_init_common_ring(struct intel_engine_cs *engine)
-{
- intel_engine_apply_workarounds(engine);
- intel_engine_apply_whitelist(engine);
-
- intel_mocs_init_engine(engine);
-
- intel_engine_reset_breadcrumbs(engine);
-
- if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
- struct drm_printer p = drm_debug_printer(__func__);
-
- intel_engine_dump(engine, &p, NULL);
- }
-
- enable_execlists(engine);
-
- return 0;
-}
-
-static void execlists_reset_prepare(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists * const execlists = &engine->execlists;
- unsigned long flags;
-
- GEM_TRACE("%s: depth<-%d\n", engine->name,
- atomic_read(&execlists->tasklet.count));
-
- /*
- * Prevent request submission to the hardware until we have
- * completed the reset in i915_gem_reset_finish(). If a request
- * is completed by one engine, it may then queue a request
- * to a second via its execlists->tasklet *just* as we are
- * calling engine->init_hw() and also writing the ELSP.
- * Turning off the execlists->tasklet until the reset is over
- * prevents the race.
- */
- __tasklet_disable_sync_once(&execlists->tasklet);
- GEM_BUG_ON(!reset_in_progress(execlists));
-
- intel_engine_stop_cs(engine);
-
- /* And flush any current direct submission. */
- spin_lock_irqsave(&engine->timeline.lock, flags);
- spin_unlock_irqrestore(&engine->timeline.lock, flags);
-}
-
-static bool lrc_regs_ok(const struct i915_request *rq)
-{
- const struct intel_ring *ring = rq->ring;
- const u32 *regs = rq->hw_context->lrc_reg_state;
-
- /* Quick spot check for the common signs of context corruption */
-
- if (regs[CTX_RING_BUFFER_CONTROL + 1] !=
- (RING_CTL_SIZE(ring->size) | RING_VALID))
- return false;
-
- if (regs[CTX_RING_BUFFER_START + 1] != i915_ggtt_offset(ring->vma))
- return false;
-
- return true;
-}
-
-static void reset_csb_pointers(struct intel_engine_execlists *execlists)
-{
- const unsigned int reset_value = execlists->csb_size - 1;
-
- /*
- * After a reset, the HW starts writing into CSB entry [0]. We
- * therefore have to set our HEAD pointer back one entry so that
- * the *first* entry we check is entry 0. To complicate this further,
- * as we don't wait for the first interrupt after reset, we have to
- * fake the HW write to point back to the last entry so that our
- * inline comparison of our cached head position against the last HW
- * write works even before the first interrupt.
- */
- execlists->csb_head = reset_value;
- WRITE_ONCE(*execlists->csb_write, reset_value);
- wmb(); /* Make sure this is visible to HW (paranoia?) */
-
- invalidate_csb_entries(&execlists->csb_status[0],
- &execlists->csb_status[reset_value]);
-}
-
-static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
-{
- struct intel_engine_execlists * const execlists = &engine->execlists;
- struct intel_context *ce;
- struct i915_request *rq;
- u32 *regs;
-
- process_csb(engine); /* drain preemption events */
-
- /* Following the reset, we need to reload the CSB read/write pointers */
- reset_csb_pointers(&engine->execlists);
-
- /*
- * Save the currently executing context, even if we completed
- * its request, it was still running at the time of the
- * reset and will have been clobbered.
- */
- if (!port_isset(execlists->port))
- goto out_clear;
-
- ce = port_request(execlists->port)->hw_context;
-
- /*
- * Catch up with any missed context-switch interrupts.
- *
- * Ideally we would just read the remaining CSB entries now that we
- * know the gpu is idle. However, the CSB registers are sometimes^W
- * often trashed across a GPU reset! Instead we have to rely on
- * guessing the missed context-switch events by looking at what
- * requests were completed.
- */
- execlists_cancel_port_requests(execlists);
-
- /* Push back any incomplete requests for replay after the reset. */
- rq = __unwind_incomplete_requests(engine);
- if (!rq)
- goto out_replay;
-
- if (rq->hw_context != ce) { /* caught just before a CS event */
- rq = NULL;
- goto out_replay;
- }
-
- /*
- * If this request hasn't started yet, e.g. it is waiting on a
- * semaphore, we need to avoid skipping the request or else we
- * break the signaling chain. However, if the context is corrupt
- * the request will not restart and we will be stuck with a wedged
- * device. It is quite often the case that if we issue a reset
- * while the GPU is loading the context image, that the context
- * image becomes corrupt.
- *
- * Otherwise, if we have not started yet, the request should replay
- * perfectly and we do not need to flag the result as being erroneous.
- */
- if (!i915_request_started(rq) && lrc_regs_ok(rq))
- goto out_replay;
-
- /*
- * If the request was innocent, we leave the request in the ELSP
- * and will try to replay it on restarting. The context image may
- * have been corrupted by the reset, in which case we may have
- * to service a new GPU hang, but more likely we can continue on
- * without impact.
- *
- * If the request was guilty, we presume the context is corrupt
- * and have to at least restore the RING register in the context
- * image back to the expected values to skip over the guilty request.
- */
- i915_reset_request(rq, stalled);
- if (!stalled && lrc_regs_ok(rq))
- goto out_replay;
-
- /*
- * We want a simple context + ring to execute the breadcrumb update.
- * We cannot rely on the context being intact across the GPU hang,
- * so clear it and rebuild just what we need for the breadcrumb.
- * All pending requests for this context will be zapped, and any
- * future request will be after userspace has had the opportunity
- * to recreate its own state.
- */
- regs = ce->lrc_reg_state;
- if (engine->pinned_default_state) {
- memcpy(regs, /* skip restoring the vanilla PPHWSP */
- engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
- engine->context_size - PAGE_SIZE);
- }
- execlists_init_reg_state(regs, ce, engine, ce->ring);
-
- /* Rerun the request; its payload has been neutered (if guilty). */
-out_replay:
- ce->ring->head =
- rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail;
- intel_ring_update_space(ce->ring);
- __execlists_update_reg_state(ce, engine);
-
-out_clear:
- execlists_clear_all_active(execlists);
-}
-
-static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
-{
- unsigned long flags;
-
- GEM_TRACE("%s\n", engine->name);
-
- spin_lock_irqsave(&engine->timeline.lock, flags);
-
- __execlists_reset(engine, stalled);
-
- spin_unlock_irqrestore(&engine->timeline.lock, flags);
-}
-
-static void nop_submission_tasklet(unsigned long data)
-{
- /* The driver is wedged; don't process any more events. */
-}
-
-static void execlists_cancel_requests(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists * const execlists = &engine->execlists;
- struct i915_request *rq, *rn;
- struct rb_node *rb;
- unsigned long flags;
-
- GEM_TRACE("%s\n", engine->name);
-
- /*
- * Before we call engine->cancel_requests(), we should have exclusive
- * access to the submission state. This is arranged for us by the
- * caller disabling the interrupt generation, the tasklet and other
- * threads that may then access the same state, giving us a free hand
- * to reset state. However, we still need to let lockdep be aware that
- * we know this state may be accessed in hardirq context, so we
- * disable the irq around this manipulation and we want to keep
- * the spinlock focused on its duties and not accidentally conflate
- * coverage to the submission's irq state. (Similarly, although we
- * shouldn't need to disable irq around the manipulation of the
- * submission's irq state, we also wish to remind ourselves that
- * it is irq state.)
- */
- spin_lock_irqsave(&engine->timeline.lock, flags);
-
- __execlists_reset(engine, true);
-
- /* Mark all executing requests as skipped. */
- list_for_each_entry(rq, &engine->timeline.requests, link) {
- if (!i915_request_signaled(rq))
- dma_fence_set_error(&rq->fence, -EIO);
-
- i915_request_mark_complete(rq);
- }
-
- /* Flush the queued requests to the timeline list (for retiring). */
- while ((rb = rb_first_cached(&execlists->queue))) {
- struct i915_priolist *p = to_priolist(rb);
- int i;
-
- priolist_for_each_request_consume(rq, rn, p, i) {
- list_del_init(&rq->sched.link);
- __i915_request_submit(rq);
- dma_fence_set_error(&rq->fence, -EIO);
- i915_request_mark_complete(rq);
- }
-
- rb_erase_cached(&p->node, &execlists->queue);
- i915_priolist_free(p);
- }
-
- /* Remaining _unready_ requests will be nop'ed when submitted */
-
- execlists->queue_priority_hint = INT_MIN;
- execlists->queue = RB_ROOT_CACHED;
- GEM_BUG_ON(port_isset(execlists->port));
-
- GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
- execlists->tasklet.func = nop_submission_tasklet;
-
- spin_unlock_irqrestore(&engine->timeline.lock, flags);
-}
-
-static void execlists_reset_finish(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists * const execlists = &engine->execlists;
-
- /*
- * After a GPU reset, we may have requests to replay. Do so now while
- * we still have the forcewake to be sure that the GPU is not allowed
- * to sleep before we restart and reload a context.
- */
- GEM_BUG_ON(!reset_in_progress(execlists));
- if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
- execlists->tasklet.func(execlists->tasklet.data);
-
- if (__tasklet_enable(&execlists->tasklet))
- /* And kick in case we missed a new request submission. */
- tasklet_hi_schedule(&execlists->tasklet);
- GEM_TRACE("%s: depth->%d\n", engine->name,
- atomic_read(&execlists->tasklet.count));
-}
-
-static int gen8_emit_bb_start(struct i915_request *rq,
- u64 offset, u32 len,
- const unsigned int flags)
-{
- u32 *cs;
-
- cs = intel_ring_begin(rq, 4);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- /*
- * WaDisableCtxRestoreArbitration:bdw,chv
- *
- * We don't need to perform MI_ARB_ENABLE as often as we do (in
- * particular all the gen that do not need the w/a at all!), if we
- * took care to make sure that on every switch into this context
- * (both ordinary and for preemption) that arbitrartion was enabled
- * we would be fine. However, for gen8 there is another w/a that
- * requires us to not preempt inside GPGPU execution, so we keep
- * arbitration disabled for gen8 batches. Arbitration will be
- * re-enabled before we close the request
- * (engine->emit_fini_breadcrumb).
- */
- *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-
- /* FIXME(BDW+): Address space and security selectors. */
- *cs++ = MI_BATCH_BUFFER_START_GEN8 |
- (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
- *cs++ = lower_32_bits(offset);
- *cs++ = upper_32_bits(offset);
-
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static int gen9_emit_bb_start(struct i915_request *rq,
- u64 offset, u32 len,
- const unsigned int flags)
-{
- u32 *cs;
-
- cs = intel_ring_begin(rq, 6);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-
- *cs++ = MI_BATCH_BUFFER_START_GEN8 |
- (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
- *cs++ = lower_32_bits(offset);
- *cs++ = upper_32_bits(offset);
-
- *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
- *cs++ = MI_NOOP;
-
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
-{
- ENGINE_WRITE(engine, RING_IMR,
- ~(engine->irq_enable_mask | engine->irq_keep_mask));
- ENGINE_POSTING_READ(engine, RING_IMR);
-}
-
-static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
-{
- ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
-}
-
-static int gen8_emit_flush(struct i915_request *request, u32 mode)
-{
- u32 cmd, *cs;
-
- cs = intel_ring_begin(request, 4);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- cmd = MI_FLUSH_DW + 1;
-
- /* We always require a command barrier so that subsequent
- * commands, such as breadcrumb interrupts, are strictly ordered
- * wrt the contents of the write cache being flushed to memory
- * (and thus being coherent from the CPU).
- */
- cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
-
- if (mode & EMIT_INVALIDATE) {
- cmd |= MI_INVALIDATE_TLB;
- if (request->engine->class == VIDEO_DECODE_CLASS)
- cmd |= MI_INVALIDATE_BSD;
- }
-
- *cs++ = cmd;
- *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
- *cs++ = 0; /* upper addr */
- *cs++ = 0; /* value */
- intel_ring_advance(request, cs);
-
- return 0;
-}
-
-static int gen8_emit_flush_render(struct i915_request *request,
- u32 mode)
-{
- struct intel_engine_cs *engine = request->engine;
- u32 scratch_addr =
- i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES;
- bool vf_flush_wa = false, dc_flush_wa = false;
- u32 *cs, flags = 0;
- int len;
-
- flags |= PIPE_CONTROL_CS_STALL;
-
- if (mode & EMIT_FLUSH) {
- flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
- flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
- flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
- flags |= PIPE_CONTROL_FLUSH_ENABLE;
- }
-
- if (mode & EMIT_INVALIDATE) {
- flags |= PIPE_CONTROL_TLB_INVALIDATE;
- flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
-
- /*
- * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
- * pipe control.
- */
- if (IS_GEN(request->i915, 9))
- vf_flush_wa = true;
-
- /* WaForGAMHang:kbl */
- if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
- dc_flush_wa = true;
- }
-
- len = 6;
-
- if (vf_flush_wa)
- len += 6;
-
- if (dc_flush_wa)
- len += 12;
-
- cs = intel_ring_begin(request, len);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- if (vf_flush_wa)
- cs = gen8_emit_pipe_control(cs, 0, 0);
-
- if (dc_flush_wa)
- cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
- 0);
-
- cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
-
- if (dc_flush_wa)
- cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
-
- intel_ring_advance(request, cs);
-
- return 0;
-}
-
-/*
- * Reserve space for 2 NOOPs at the end of each request to be
- * used as a workaround for not being allowed to do lite
- * restore with HEAD==TAIL (WaIdleLiteRestore).
- */
-static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
-{
- /* Ensure there's always at least one preemption point per-request. */
- *cs++ = MI_ARB_CHECK;
- *cs++ = MI_NOOP;
- request->wa_tail = intel_ring_offset(request, cs);
-
- return cs;
-}
-
-static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
-{
- cs = gen8_emit_ggtt_write(cs,
- request->fence.seqno,
- request->timeline->hwsp_offset,
- 0);
-
- cs = gen8_emit_ggtt_write(cs,
- intel_engine_next_hangcheck_seqno(request->engine),
- I915_GEM_HWS_HANGCHECK_ADDR,
- MI_FLUSH_DW_STORE_INDEX);
-
-
- *cs++ = MI_USER_INTERRUPT;
- *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-
- request->tail = intel_ring_offset(request, cs);
- assert_ring_tail_valid(request->ring, request->tail);
-
- return gen8_emit_wa_tail(request, cs);
-}
-
-static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
-{
- cs = gen8_emit_ggtt_write_rcs(cs,
- request->fence.seqno,
- request->timeline->hwsp_offset,
- PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_DC_FLUSH_ENABLE |
- PIPE_CONTROL_FLUSH_ENABLE |
- PIPE_CONTROL_CS_STALL);
-
- cs = gen8_emit_ggtt_write_rcs(cs,
- intel_engine_next_hangcheck_seqno(request->engine),
- I915_GEM_HWS_HANGCHECK_ADDR,
- PIPE_CONTROL_STORE_DATA_INDEX);
-
- *cs++ = MI_USER_INTERRUPT;
- *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-
- request->tail = intel_ring_offset(request, cs);
- assert_ring_tail_valid(request->ring, request->tail);
-
- return gen8_emit_wa_tail(request, cs);
-}
-
-static int gen8_init_rcs_context(struct i915_request *rq)
-{
- int ret;
-
- ret = intel_engine_emit_ctx_wa(rq);
- if (ret)
- return ret;
-
- ret = intel_rcs_context_init_mocs(rq);
- /*
- * Failing to program the MOCS is non-fatal.The system will not
- * run at peak performance. So generate an error and carry on.
- */
- if (ret)
- DRM_ERROR("MOCS failed to program: expect performance issues.\n");
-
- return i915_gem_render_state_emit(rq);
-}
-
-/**
- * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
- * @engine: Engine Command Streamer.
- */
-void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv;
-
- /*
- * Tasklet cannot be active at this point due intel_mark_active/idle
- * so this is just for documentation.
- */
- if (WARN_ON(test_bit(TASKLET_STATE_SCHED,
- &engine->execlists.tasklet.state)))
- tasklet_kill(&engine->execlists.tasklet);
-
- dev_priv = engine->i915;
-
- if (engine->buffer) {
- WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
- }
-
- if (engine->cleanup)
- engine->cleanup(engine);
-
- intel_engine_cleanup_common(engine);
-
- lrc_destroy_wa_ctx(engine);
-
- engine->i915 = NULL;
- dev_priv->engine[engine->id] = NULL;
- kfree(engine);
-}
-
-void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
-{
- engine->submit_request = execlists_submit_request;
- engine->cancel_requests = execlists_cancel_requests;
- engine->schedule = i915_schedule;
- engine->execlists.tasklet.func = execlists_submission_tasklet;
-
- engine->reset.prepare = execlists_reset_prepare;
- engine->reset.reset = execlists_reset;
- engine->reset.finish = execlists_reset_finish;
-
- engine->park = NULL;
- engine->unpark = NULL;
-
- engine->flags |= I915_ENGINE_SUPPORTS_STATS;
- if (!intel_vgpu_active(engine->i915))
- engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
- if (engine->preempt_context &&
- HAS_LOGICAL_RING_PREEMPTION(engine->i915))
- engine->flags |= I915_ENGINE_HAS_PREEMPTION;
-}
-
-static void
-logical_ring_default_vfuncs(struct intel_engine_cs *engine)
-{
- /* Default vfuncs which can be overriden by each engine. */
- engine->init_hw = gen8_init_common_ring;
-
- engine->reset.prepare = execlists_reset_prepare;
- engine->reset.reset = execlists_reset;
- engine->reset.finish = execlists_reset_finish;
-
- engine->cops = &execlists_context_ops;
- engine->request_alloc = execlists_request_alloc;
-
- engine->emit_flush = gen8_emit_flush;
- engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
- engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
-
- engine->set_default_submission = intel_execlists_set_default_submission;
-
- if (INTEL_GEN(engine->i915) < 11) {
- engine->irq_enable = gen8_logical_ring_enable_irq;
- engine->irq_disable = gen8_logical_ring_disable_irq;
- } else {
- /*
- * TODO: On Gen11 interrupt masks need to be clear
- * to allow C6 entry. Keep interrupts enabled at
- * and take the hit of generating extra interrupts
- * until a more refined solution exists.
- */
- }
- if (IS_GEN(engine->i915, 8))
- engine->emit_bb_start = gen8_emit_bb_start;
- else
- engine->emit_bb_start = gen9_emit_bb_start;
-}
-
-static inline void
-logical_ring_default_irqs(struct intel_engine_cs *engine)
-{
- unsigned int shift = 0;
-
- if (INTEL_GEN(engine->i915) < 11) {
- const u8 irq_shifts[] = {
- [RCS0] = GEN8_RCS_IRQ_SHIFT,
- [BCS0] = GEN8_BCS_IRQ_SHIFT,
- [VCS0] = GEN8_VCS0_IRQ_SHIFT,
- [VCS1] = GEN8_VCS1_IRQ_SHIFT,
- [VECS0] = GEN8_VECS_IRQ_SHIFT,
- };
-
- shift = irq_shifts[engine->id];
- }
-
- engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
- engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
-}
-
-static int
-logical_ring_setup(struct intel_engine_cs *engine)
-{
- int err;
-
- err = intel_engine_setup_common(engine);
- if (err)
- return err;
-
- /* Intentionally left blank. */
- engine->buffer = NULL;
-
- tasklet_init(&engine->execlists.tasklet,
- execlists_submission_tasklet, (unsigned long)engine);
-
- logical_ring_default_vfuncs(engine);
- logical_ring_default_irqs(engine);
-
- return 0;
-}
-
-static int logical_ring_init(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
- struct intel_engine_execlists * const execlists = &engine->execlists;
- u32 base = engine->mmio_base;
- int ret;
-
- ret = intel_engine_init_common(engine);
- if (ret)
- return ret;
-
- intel_engine_init_workarounds(engine);
-
- if (HAS_LOGICAL_RING_ELSQ(i915)) {
- execlists->submit_reg = i915->uncore.regs +
- i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
- execlists->ctrl_reg = i915->uncore.regs +
- i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
- } else {
- execlists->submit_reg = i915->uncore.regs +
- i915_mmio_reg_offset(RING_ELSP(base));
- }
-
- execlists->preempt_complete_status = ~0u;
- if (engine->preempt_context)
- execlists->preempt_complete_status =
- upper_32_bits(engine->preempt_context->lrc_desc);
-
- execlists->csb_status =
- &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
-
- execlists->csb_write =
- &engine->status_page.addr[intel_hws_csb_write_index(i915)];
-
- if (INTEL_GEN(engine->i915) < 11)
- execlists->csb_size = GEN8_CSB_ENTRIES;
- else
- execlists->csb_size = GEN11_CSB_ENTRIES;
-
- reset_csb_pointers(execlists);
-
- return 0;
-}
-
-int logical_render_ring_init(struct intel_engine_cs *engine)
-{
- int ret;
-
- ret = logical_ring_setup(engine);
- if (ret)
- return ret;
-
- /* Override some for render ring. */
- engine->init_context = gen8_init_rcs_context;
- engine->emit_flush = gen8_emit_flush_render;
- engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
-
- ret = logical_ring_init(engine);
- if (ret)
- return ret;
-
- ret = intel_init_workaround_bb(engine);
- if (ret) {
- /*
- * We continue even if we fail to initialize WA batch
- * because we only expect rare glitches but nothing
- * critical to prevent us from using GPU
- */
- DRM_ERROR("WA batch buffer initialization failed: %d\n",
- ret);
- }
-
- intel_engine_init_whitelist(engine);
-
- return 0;
-}
-
-int logical_xcs_ring_init(struct intel_engine_cs *engine)
-{
- int err;
-
- err = logical_ring_setup(engine);
- if (err)
- return err;
-
- return logical_ring_init(engine);
-}
-
-static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
-{
- u32 indirect_ctx_offset;
-
- switch (INTEL_GEN(engine->i915)) {
- default:
- MISSING_CASE(INTEL_GEN(engine->i915));
- /* fall through */
- case 11:
- indirect_ctx_offset =
- GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- break;
- case 10:
- indirect_ctx_offset =
- GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- break;
- case 9:
- indirect_ctx_offset =
- GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- break;
- case 8:
- indirect_ctx_offset =
- GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- break;
- }
-
- return indirect_ctx_offset;
-}
-
-static void execlists_init_reg_state(u32 *regs,
- struct intel_context *ce,
- struct intel_engine_cs *engine,
- struct intel_ring *ring)
-{
- struct i915_hw_ppgtt *ppgtt = ce->gem_context->ppgtt;
- bool rcs = engine->class == RENDER_CLASS;
- u32 base = engine->mmio_base;
-
- /* A context is actually a big batch buffer with several
- * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
- * values we are setting here are only for the first context restore:
- * on a subsequent save, the GPU will recreate this batchbuffer with new
- * values (including all the missing MI_LOAD_REGISTER_IMM commands that
- * we are not initializing here).
- */
- regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
- MI_LRI_FORCE_POSTED;
-
- CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
- _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
- _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
- if (INTEL_GEN(engine->i915) < 11) {
- regs[CTX_CONTEXT_CONTROL + 1] |=
- _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
- CTX_CTRL_RS_CTX_ENABLE);
- }
- CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
- CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
- RING_CTL_SIZE(ring->size) | RING_VALID);
- CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
- CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
- if (rcs) {
- struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
-
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
- RING_INDIRECT_CTX_OFFSET(base), 0);
- if (wa_ctx->indirect_ctx.size) {
- u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
-
- regs[CTX_RCS_INDIRECT_CTX + 1] =
- (ggtt_offset + wa_ctx->indirect_ctx.offset) |
- (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
-
- regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] =
- intel_lr_indirect_ctx_offset(engine) << 6;
- }
-
- CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
- if (wa_ctx->per_ctx.size) {
- u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
-
- regs[CTX_BB_PER_CTX_PTR + 1] =
- (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
- }
- }
-
- regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
-
- CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
- /* PDP values well be assigned later if needed */
- CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
- CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
- CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
- CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
- CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
- CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
- CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
- CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
-
- if (i915_vm_is_4lvl(&ppgtt->vm)) {
- /* 64b PPGTT (48bit canonical)
- * PDP0_DESCRIPTOR contains the base address to PML4 and
- * other PDP Descriptors are ignored.
- */
- ASSIGN_CTX_PML4(ppgtt, regs);
- } else {
- ASSIGN_CTX_PDP(ppgtt, regs, 3);
- ASSIGN_CTX_PDP(ppgtt, regs, 2);
- ASSIGN_CTX_PDP(ppgtt, regs, 1);
- ASSIGN_CTX_PDP(ppgtt, regs, 0);
- }
-
- if (rcs) {
- regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
- CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
-
- i915_oa_init_reg_state(engine, ce, regs);
- }
-
- regs[CTX_END] = MI_BATCH_BUFFER_END;
- if (INTEL_GEN(engine->i915) >= 10)
- regs[CTX_END] |= BIT(0);
-}
-
-static int
-populate_lr_context(struct intel_context *ce,
- struct drm_i915_gem_object *ctx_obj,
- struct intel_engine_cs *engine,
- struct intel_ring *ring)
-{
- void *vaddr;
- u32 *regs;
- int ret;
-
- vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
- if (IS_ERR(vaddr)) {
- ret = PTR_ERR(vaddr);
- DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
- return ret;
- }
-
- if (engine->default_state) {
- /*
- * We only want to copy over the template context state;
- * skipping over the headers reserved for GuC communication,
- * leaving those as zero.
- */
- const unsigned long start = LRC_HEADER_PAGES * PAGE_SIZE;
- void *defaults;
-
- defaults = i915_gem_object_pin_map(engine->default_state,
- I915_MAP_WB);
- if (IS_ERR(defaults)) {
- ret = PTR_ERR(defaults);
- goto err_unpin_ctx;
- }
-
- memcpy(vaddr + start, defaults + start, engine->context_size);
- i915_gem_object_unpin_map(engine->default_state);
- }
-
- /* The second page of the context object contains some fields which must
- * be set up prior to the first execution. */
- regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
- execlists_init_reg_state(regs, ce, engine, ring);
- if (!engine->default_state)
- regs[CTX_CONTEXT_CONTROL + 1] |=
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
- if (ce->gem_context == engine->i915->preempt_context &&
- INTEL_GEN(engine->i915) < 11)
- regs[CTX_CONTEXT_CONTROL + 1] |=
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
- CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT);
-
- ret = 0;
-err_unpin_ctx:
- __i915_gem_object_flush_map(ctx_obj,
- LRC_HEADER_PAGES * PAGE_SIZE,
- engine->context_size);
- i915_gem_object_unpin_map(ctx_obj);
- return ret;
-}
-
-static struct i915_timeline *get_timeline(struct i915_gem_context *ctx)
-{
- if (ctx->timeline)
- return i915_timeline_get(ctx->timeline);
- else
- return i915_timeline_create(ctx->i915, NULL);
-}
-
-static int execlists_context_deferred_alloc(struct intel_context *ce,
- struct intel_engine_cs *engine)
-{
- struct drm_i915_gem_object *ctx_obj;
- struct i915_vma *vma;
- u32 context_size;
- struct intel_ring *ring;
- struct i915_timeline *timeline;
- int ret;
-
- if (ce->state)
- return 0;
-
- context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
-
- /*
- * Before the actual start of the context image, we insert a few pages
- * for our own use and for sharing with the GuC.
- */
- context_size += LRC_HEADER_PAGES * PAGE_SIZE;
-
- ctx_obj = i915_gem_object_create(engine->i915, context_size);
- if (IS_ERR(ctx_obj))
- return PTR_ERR(ctx_obj);
-
- vma = i915_vma_instance(ctx_obj, &engine->i915->ggtt.vm, NULL);
- if (IS_ERR(vma)) {
- ret = PTR_ERR(vma);
- goto error_deref_obj;
- }
-
- timeline = get_timeline(ce->gem_context);
- if (IS_ERR(timeline)) {
- ret = PTR_ERR(timeline);
- goto error_deref_obj;
- }
-
- ring = intel_engine_create_ring(engine,
- timeline,
- ce->gem_context->ring_size);
- i915_timeline_put(timeline);
- if (IS_ERR(ring)) {
- ret = PTR_ERR(ring);
- goto error_deref_obj;
- }
-
- ret = populate_lr_context(ce, ctx_obj, engine, ring);
- if (ret) {
- DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
- goto error_ring_free;
- }
-
- ce->ring = ring;
- ce->state = vma;
-
- return 0;
-
-error_ring_free:
- intel_ring_put(ring);
-error_deref_obj:
- i915_gem_object_put(ctx_obj);
- return ret;
-}
-
-void intel_execlists_show_requests(struct intel_engine_cs *engine,
- struct drm_printer *m,
- void (*show_request)(struct drm_printer *m,
- struct i915_request *rq,
- const char *prefix),
- unsigned int max)
-{
- const struct intel_engine_execlists *execlists = &engine->execlists;
- struct i915_request *rq, *last;
- unsigned long flags;
- unsigned int count;
- struct rb_node *rb;
-
- spin_lock_irqsave(&engine->timeline.lock, flags);
-
- last = NULL;
- count = 0;
- list_for_each_entry(rq, &engine->timeline.requests, link) {
- if (count++ < max - 1)
- show_request(m, rq, "\t\tE ");
- else
- last = rq;
- }
- if (last) {
- if (count > max) {
- drm_printf(m,
- "\t\t...skipping %d executing requests...\n",
- count - max);
- }
- show_request(m, last, "\t\tE ");
- }
-
- last = NULL;
- count = 0;
- if (execlists->queue_priority_hint != INT_MIN)
- drm_printf(m, "\t\tQueue priority hint: %d\n",
- execlists->queue_priority_hint);
- for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
- struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
- int i;
-
- priolist_for_each_request(rq, p, i) {
- if (count++ < max - 1)
- show_request(m, rq, "\t\tQ ");
- else
- last = rq;
- }
- }
- if (last) {
- if (count > max) {
- drm_printf(m,
- "\t\t...skipping %d queued requests...\n",
- count - max);
- }
- show_request(m, last, "\t\tQ ");
- }
-
- spin_unlock_irqrestore(&engine->timeline.lock, flags);
-}
-
-void intel_lr_context_reset(struct intel_engine_cs *engine,
- struct intel_context *ce,
- u32 head,
- bool scrub)
-{
- /*
- * We want a simple context + ring to execute the breadcrumb update.
- * We cannot rely on the context being intact across the GPU hang,
- * so clear it and rebuild just what we need for the breadcrumb.
- * All pending requests for this context will be zapped, and any
- * future request will be after userspace has had the opportunity
- * to recreate its own state.
- */
- if (scrub) {
- u32 *regs = ce->lrc_reg_state;
-
- if (engine->pinned_default_state) {
- memcpy(regs, /* skip restoring the vanilla PPHWSP */
- engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
- engine->context_size - PAGE_SIZE);
- }
- execlists_init_reg_state(regs, ce, engine, ce->ring);
- }
-
- /* Rerun the request; its payload has been neutered (if guilty). */
- ce->ring->head = head;
- intel_ring_update_space(ce->ring);
-
- __execlists_update_reg_state(ce, engine);
-}
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/intel_lrc.c"
-#endif
+++ /dev/null
-/*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef _INTEL_LRC_H_
-#define _INTEL_LRC_H_
-
-#include "intel_ringbuffer.h"
-#include "i915_gem_context.h"
-
-/* Execlists regs */
-#define RING_ELSP(base) _MMIO((base) + 0x230)
-#define RING_EXECLIST_STATUS_LO(base) _MMIO((base) + 0x234)
-#define RING_EXECLIST_STATUS_HI(base) _MMIO((base) + 0x234 + 4)
-#define RING_CONTEXT_CONTROL(base) _MMIO((base) + 0x244)
-#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH (1 << 3)
-#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0)
-#define CTX_CTRL_RS_CTX_ENABLE (1 << 1)
-#define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2)
-#define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0)
-#define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510)
-#define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550)
-
-#define EL_CTRL_LOAD (1 << 0)
-
-/* The docs specify that the write pointer wraps around after 5h, "After status
- * is written out to the last available status QW at offset 5h, this pointer
- * wraps to 0."
- *
- * Therefore, one must infer than even though there are 3 bits available, 6 and
- * 7 appear to be * reserved.
- */
-#define GEN8_CSB_ENTRIES 6
-#define GEN8_CSB_PTR_MASK 0x7
-#define GEN8_CSB_READ_PTR_MASK (GEN8_CSB_PTR_MASK << 8)
-#define GEN8_CSB_WRITE_PTR_MASK (GEN8_CSB_PTR_MASK << 0)
-
-#define GEN11_CSB_ENTRIES 12
-#define GEN11_CSB_PTR_MASK 0xf
-#define GEN11_CSB_READ_PTR_MASK (GEN11_CSB_PTR_MASK << 8)
-#define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0)
-
-enum {
- INTEL_CONTEXT_SCHEDULE_IN = 0,
- INTEL_CONTEXT_SCHEDULE_OUT,
- INTEL_CONTEXT_SCHEDULE_PREEMPTED,
-};
-
-/* Logical Rings */
-void intel_logical_ring_cleanup(struct intel_engine_cs *engine);
-int logical_render_ring_init(struct intel_engine_cs *engine);
-int logical_xcs_ring_init(struct intel_engine_cs *engine);
-
-/* Logical Ring Contexts */
-
-/*
- * We allocate a header at the start of the context image for our own
- * use, therefore the actual location of the logical state is offset
- * from the start of the VMA. The layout is
- *
- * | [guc] | [hwsp] [logical state] |
- * |<- our header ->|<- context image ->|
- *
- */
-/* The first page is used for sharing data with the GuC */
-#define LRC_GUCSHR_PN (0)
-#define LRC_GUCSHR_SZ (1)
-/* At the start of the context image is its per-process HWS page */
-#define LRC_PPHWSP_PN (LRC_GUCSHR_PN + LRC_GUCSHR_SZ)
-#define LRC_PPHWSP_SZ (1)
-/* Finally we have the logical state for the context */
-#define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ)
-
-/*
- * Currently we include the PPHWSP in __intel_engine_context_size() so
- * the size of the header is synonymous with the start of the PPHWSP.
- */
-#define LRC_HEADER_PAGES LRC_PPHWSP_PN
-
-struct drm_printer;
-
-struct drm_i915_private;
-struct i915_gem_context;
-
-void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
-
-void intel_lr_context_reset(struct intel_engine_cs *engine,
- struct intel_context *ce,
- u32 head,
- bool scrub);
-
-void intel_execlists_show_requests(struct intel_engine_cs *engine,
- struct drm_printer *m,
- void (*show_request)(struct drm_printer *m,
- struct i915_request *rq,
- const char *prefix),
- unsigned int max);
-
-#endif /* _INTEL_LRC_H_ */
+++ /dev/null
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2014-2018 Intel Corporation
- */
-
-#ifndef _INTEL_LRC_REG_H_
-#define _INTEL_LRC_REG_H_
-
-#include <linux/types.h>
-
-/* GEN8+ Reg State Context */
-#define CTX_LRI_HEADER_0 0x01
-#define CTX_CONTEXT_CONTROL 0x02
-#define CTX_RING_HEAD 0x04
-#define CTX_RING_TAIL 0x06
-#define CTX_RING_BUFFER_START 0x08
-#define CTX_RING_BUFFER_CONTROL 0x0a
-#define CTX_BB_HEAD_U 0x0c
-#define CTX_BB_HEAD_L 0x0e
-#define CTX_BB_STATE 0x10
-#define CTX_SECOND_BB_HEAD_U 0x12
-#define CTX_SECOND_BB_HEAD_L 0x14
-#define CTX_SECOND_BB_STATE 0x16
-#define CTX_BB_PER_CTX_PTR 0x18
-#define CTX_RCS_INDIRECT_CTX 0x1a
-#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c
-#define CTX_LRI_HEADER_1 0x21
-#define CTX_CTX_TIMESTAMP 0x22
-#define CTX_PDP3_UDW 0x24
-#define CTX_PDP3_LDW 0x26
-#define CTX_PDP2_UDW 0x28
-#define CTX_PDP2_LDW 0x2a
-#define CTX_PDP1_UDW 0x2c
-#define CTX_PDP1_LDW 0x2e
-#define CTX_PDP0_UDW 0x30
-#define CTX_PDP0_LDW 0x32
-#define CTX_LRI_HEADER_2 0x41
-#define CTX_R_PWR_CLK_STATE 0x42
-#define CTX_END 0x44
-
-#define CTX_REG(reg_state, pos, reg, val) do { \
- u32 *reg_state__ = (reg_state); \
- const u32 pos__ = (pos); \
- (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \
- (reg_state__)[(pos__) + 1] = (val); \
-} while (0)
-
-#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \
- u32 *reg_state__ = (reg_state); \
- const u64 addr__ = i915_page_dir_dma_addr((ppgtt), (n)); \
- (reg_state__)[CTX_PDP ## n ## _UDW + 1] = upper_32_bits(addr__); \
- (reg_state__)[CTX_PDP ## n ## _LDW + 1] = lower_32_bits(addr__); \
-} while (0)
-
-#define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \
- u32 *reg_state__ = (reg_state); \
- const u64 addr__ = px_dma(&ppgtt->pml4); \
- (reg_state__)[CTX_PDP0_UDW + 1] = upper_32_bits(addr__); \
- (reg_state__)[CTX_PDP0_LDW + 1] = lower_32_bits(addr__); \
-} while (0)
-
-#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17
-#define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26
-#define GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x19
-#define GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x1A
-
-#endif /* _INTEL_LRC_REG_H_ */
+++ /dev/null
-/*
- * Copyright (c) 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions: *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "intel_mocs.h"
-#include "intel_lrc.h"
-#include "intel_ringbuffer.h"
-
-/* structures required */
-struct drm_i915_mocs_entry {
- u32 control_value;
- u16 l3cc_value;
- u16 used;
-};
-
-struct drm_i915_mocs_table {
- unsigned int size;
- unsigned int n_entries;
- const struct drm_i915_mocs_entry *table;
-};
-
-/* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
-#define _LE_CACHEABILITY(value) ((value) << 0)
-#define _LE_TGT_CACHE(value) ((value) << 2)
-#define LE_LRUM(value) ((value) << 4)
-#define LE_AOM(value) ((value) << 6)
-#define LE_RSC(value) ((value) << 7)
-#define LE_SCC(value) ((value) << 8)
-#define LE_PFM(value) ((value) << 11)
-#define LE_SCF(value) ((value) << 14)
-#define LE_COS(value) ((value) << 15)
-#define LE_SSE(value) ((value) << 17)
-
-/* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
-#define L3_ESC(value) ((value) << 0)
-#define L3_SCC(value) ((value) << 1)
-#define _L3_CACHEABILITY(value) ((value) << 4)
-
-/* Helper defines */
-#define GEN9_NUM_MOCS_ENTRIES 62 /* 62 out of 64 - 63 & 64 are reserved. */
-#define GEN11_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */
-
-/* (e)LLC caching options */
-#define LE_0_PAGETABLE _LE_CACHEABILITY(0)
-#define LE_1_UC _LE_CACHEABILITY(1)
-#define LE_2_WT _LE_CACHEABILITY(2)
-#define LE_3_WB _LE_CACHEABILITY(3)
-
-/* Target cache */
-#define LE_TC_0_PAGETABLE _LE_TGT_CACHE(0)
-#define LE_TC_1_LLC _LE_TGT_CACHE(1)
-#define LE_TC_2_LLC_ELLC _LE_TGT_CACHE(2)
-#define LE_TC_3_LLC_ELLC_ALT _LE_TGT_CACHE(3)
-
-/* L3 caching options */
-#define L3_0_DIRECT _L3_CACHEABILITY(0)
-#define L3_1_UC _L3_CACHEABILITY(1)
-#define L3_2_RESERVED _L3_CACHEABILITY(2)
-#define L3_3_WB _L3_CACHEABILITY(3)
-
-#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
- [__idx] = { \
- .control_value = __control_value, \
- .l3cc_value = __l3cc_value, \
- .used = 1, \
- }
-
-/*
- * MOCS tables
- *
- * These are the MOCS tables that are programmed across all the rings.
- * The control value is programmed to all the rings that support the
- * MOCS registers. While the l3cc_values are only programmed to the
- * LNCFCMOCS0 - LNCFCMOCS32 registers.
- *
- * These tables are intended to be kept reasonably consistent across
- * HW platforms, and for ICL+, be identical across OSes. To achieve
- * that, for Icelake and above, list of entries is published as part
- * of bspec.
- *
- * Entries not part of the following tables are undefined as far as
- * userspace is concerned and shouldn't be relied upon. For the time
- * being they will be initialized to PTE.
- *
- * The last two entries are reserved by the hardware. For ICL+ they
- * should be initialized according to bspec and never used, for older
- * platforms they should never be written to.
- *
- * NOTE: These tables are part of bspec and defined as part of hardware
- * interface for ICL+. For older platforms, they are part of kernel
- * ABI. It is expected that, for specific hardware platform, existing
- * entries will remain constant and the table will only be updated by
- * adding new entries, filling unused positions.
- */
-#define GEN9_MOCS_ENTRIES \
- MOCS_ENTRY(I915_MOCS_UNCACHED, \
- LE_1_UC | LE_TC_2_LLC_ELLC, \
- L3_1_UC), \
- MOCS_ENTRY(I915_MOCS_PTE, \
- LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \
- L3_3_WB)
-
-static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
- GEN9_MOCS_ENTRIES,
- MOCS_ENTRY(I915_MOCS_CACHED,
- LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3),
- L3_3_WB)
-};
-
-/* NOTE: the LE_TGT_CACHE is not used on Broxton */
-static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
- GEN9_MOCS_ENTRIES,
- MOCS_ENTRY(I915_MOCS_CACHED,
- LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3),
- L3_3_WB)
-};
-
-#define GEN11_MOCS_ENTRIES \
- /* Base - Uncached (Deprecated) */ \
- MOCS_ENTRY(I915_MOCS_UNCACHED, \
- LE_1_UC | LE_TC_1_LLC, \
- L3_1_UC), \
- /* Base - L3 + LeCC:PAT (Deprecated) */ \
- MOCS_ENTRY(I915_MOCS_PTE, \
- LE_0_PAGETABLE | LE_TC_1_LLC, \
- L3_3_WB), \
- /* Base - L3 + LLC */ \
- MOCS_ENTRY(2, \
- LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
- L3_3_WB), \
- /* Base - Uncached */ \
- MOCS_ENTRY(3, \
- LE_1_UC | LE_TC_1_LLC, \
- L3_1_UC), \
- /* Base - L3 */ \
- MOCS_ENTRY(4, \
- LE_1_UC | LE_TC_1_LLC, \
- L3_3_WB), \
- /* Base - LLC */ \
- MOCS_ENTRY(5, \
- LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
- L3_1_UC), \
- /* Age 0 - LLC */ \
- MOCS_ENTRY(6, \
- LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
- L3_1_UC), \
- /* Age 0 - L3 + LLC */ \
- MOCS_ENTRY(7, \
- LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
- L3_3_WB), \
- /* Age: Don't Chg. - LLC */ \
- MOCS_ENTRY(8, \
- LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
- L3_1_UC), \
- /* Age: Don't Chg. - L3 + LLC */ \
- MOCS_ENTRY(9, \
- LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
- L3_3_WB), \
- /* No AOM - LLC */ \
- MOCS_ENTRY(10, \
- LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
- L3_1_UC), \
- /* No AOM - L3 + LLC */ \
- MOCS_ENTRY(11, \
- LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
- L3_3_WB), \
- /* No AOM; Age 0 - LLC */ \
- MOCS_ENTRY(12, \
- LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
- L3_1_UC), \
- /* No AOM; Age 0 - L3 + LLC */ \
- MOCS_ENTRY(13, \
- LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
- L3_3_WB), \
- /* No AOM; Age:DC - LLC */ \
- MOCS_ENTRY(14, \
- LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
- L3_1_UC), \
- /* No AOM; Age:DC - L3 + LLC */ \
- MOCS_ENTRY(15, \
- LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
- L3_3_WB), \
- /* Self-Snoop - L3 + LLC */ \
- MOCS_ENTRY(18, \
- LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
- L3_3_WB), \
- /* Skip Caching - L3 + LLC(12.5%) */ \
- MOCS_ENTRY(19, \
- LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \
- L3_3_WB), \
- /* Skip Caching - L3 + LLC(25%) */ \
- MOCS_ENTRY(20, \
- LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \
- L3_3_WB), \
- /* Skip Caching - L3 + LLC(50%) */ \
- MOCS_ENTRY(21, \
- LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \
- L3_3_WB), \
- /* Skip Caching - L3 + LLC(75%) */ \
- MOCS_ENTRY(22, \
- LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \
- L3_3_WB), \
- /* Skip Caching - L3 + LLC(87.5%) */ \
- MOCS_ENTRY(23, \
- LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \
- L3_3_WB), \
- /* HW Reserved - SW program but never use */ \
- MOCS_ENTRY(62, \
- LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
- L3_1_UC), \
- /* HW Reserved - SW program but never use */ \
- MOCS_ENTRY(63, \
- LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
- L3_1_UC)
-
-static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
- GEN11_MOCS_ENTRIES
-};
-
-/**
- * get_mocs_settings()
- * @dev_priv: i915 device.
- * @table: Output table that will be made to point at appropriate
- * MOCS values for the device.
- *
- * This function will return the values of the MOCS table that needs to
- * be programmed for the platform. It will return the values that need
- * to be programmed and if they need to be programmed.
- *
- * Return: true if there are applicable MOCS settings for the device.
- */
-static bool get_mocs_settings(struct drm_i915_private *dev_priv,
- struct drm_i915_mocs_table *table)
-{
- bool result = false;
-
- if (INTEL_GEN(dev_priv) >= 11) {
- table->size = ARRAY_SIZE(icelake_mocs_table);
- table->table = icelake_mocs_table;
- table->n_entries = GEN11_NUM_MOCS_ENTRIES;
- result = true;
- } else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
- table->size = ARRAY_SIZE(skylake_mocs_table);
- table->n_entries = GEN9_NUM_MOCS_ENTRIES;
- table->table = skylake_mocs_table;
- result = true;
- } else if (IS_GEN9_LP(dev_priv)) {
- table->size = ARRAY_SIZE(broxton_mocs_table);
- table->n_entries = GEN9_NUM_MOCS_ENTRIES;
- table->table = broxton_mocs_table;
- result = true;
- } else {
- WARN_ONCE(INTEL_GEN(dev_priv) >= 9,
- "Platform that should have a MOCS table does not.\n");
- }
-
- /* WaDisableSkipCaching:skl,bxt,kbl,glk */
- if (IS_GEN(dev_priv, 9)) {
- int i;
-
- for (i = 0; i < table->size; i++)
- if (WARN_ON(table->table[i].l3cc_value &
- (L3_ESC(1) | L3_SCC(0x7))))
- return false;
- }
-
- return result;
-}
-
-static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index)
-{
- switch (engine_id) {
- case RCS0:
- return GEN9_GFX_MOCS(index);
- case VCS0:
- return GEN9_MFX0_MOCS(index);
- case BCS0:
- return GEN9_BLT_MOCS(index);
- case VECS0:
- return GEN9_VEBOX_MOCS(index);
- case VCS1:
- return GEN9_MFX1_MOCS(index);
- case VCS2:
- return GEN11_MFX2_MOCS(index);
- default:
- MISSING_CASE(engine_id);
- return INVALID_MMIO_REG;
- }
-}
-
-/*
- * Get control_value from MOCS entry taking into account when it's not used:
- * I915_MOCS_PTE's value is returned in this case.
- */
-static u32 get_entry_control(const struct drm_i915_mocs_table *table,
- unsigned int index)
-{
- if (table->table[index].used)
- return table->table[index].control_value;
-
- return table->table[I915_MOCS_PTE].control_value;
-}
-
-/**
- * intel_mocs_init_engine() - emit the mocs control table
- * @engine: The engine for whom to emit the registers.
- *
- * This function simply emits a MI_LOAD_REGISTER_IMM command for the
- * given table starting at the given address.
- */
-void intel_mocs_init_engine(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- struct drm_i915_mocs_table table;
- unsigned int index;
- u32 unused_value;
-
- if (!get_mocs_settings(dev_priv, &table))
- return;
-
- /* Set unused values to PTE */
- unused_value = table.table[I915_MOCS_PTE].control_value;
-
- for (index = 0; index < table.size; index++) {
- u32 value = get_entry_control(&table, index);
-
- I915_WRITE(mocs_register(engine->id, index), value);
- }
-
- /* All remaining entries are also unused */
- for (; index < table.n_entries; index++)
- I915_WRITE(mocs_register(engine->id, index), unused_value);
-}
-
-/**
- * emit_mocs_control_table() - emit the mocs control table
- * @rq: Request to set up the MOCS table for.
- * @table: The values to program into the control regs.
- *
- * This function simply emits a MI_LOAD_REGISTER_IMM command for the
- * given table starting at the given address.
- *
- * Return: 0 on success, otherwise the error status.
- */
-static int emit_mocs_control_table(struct i915_request *rq,
- const struct drm_i915_mocs_table *table)
-{
- enum intel_engine_id engine = rq->engine->id;
- unsigned int index;
- u32 unused_value;
- u32 *cs;
-
- if (GEM_WARN_ON(table->size > table->n_entries))
- return -ENODEV;
-
- /* Set unused values to PTE */
- unused_value = table->table[I915_MOCS_PTE].control_value;
-
- cs = intel_ring_begin(rq, 2 + 2 * table->n_entries);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
-
- for (index = 0; index < table->size; index++) {
- u32 value = get_entry_control(table, index);
-
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
- *cs++ = value;
- }
-
- /* All remaining entries are also unused */
- for (; index < table->n_entries; index++) {
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
- *cs++ = unused_value;
- }
-
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-/*
- * Get l3cc_value from MOCS entry taking into account when it's not used:
- * I915_MOCS_PTE's value is returned in this case.
- */
-static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table,
- unsigned int index)
-{
- if (table->table[index].used)
- return table->table[index].l3cc_value;
-
- return table->table[I915_MOCS_PTE].l3cc_value;
-}
-
-static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
- u16 low,
- u16 high)
-{
- return low | high << 16;
-}
-
-/**
- * emit_mocs_l3cc_table() - emit the mocs control table
- * @rq: Request to set up the MOCS table for.
- * @table: The values to program into the control regs.
- *
- * This function simply emits a MI_LOAD_REGISTER_IMM command for the
- * given table starting at the given address. This register set is
- * programmed in pairs.
- *
- * Return: 0 on success, otherwise the error status.
- */
-static int emit_mocs_l3cc_table(struct i915_request *rq,
- const struct drm_i915_mocs_table *table)
-{
- u16 unused_value;
- unsigned int i;
- u32 *cs;
-
- if (GEM_WARN_ON(table->size > table->n_entries))
- return -ENODEV;
-
- /* Set unused values to PTE */
- unused_value = table->table[I915_MOCS_PTE].l3cc_value;
-
- cs = intel_ring_begin(rq, 2 + table->n_entries);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
-
- for (i = 0; i < table->size / 2; i++) {
- u16 low = get_entry_l3cc(table, 2 * i);
- u16 high = get_entry_l3cc(table, 2 * i + 1);
-
- *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
- *cs++ = l3cc_combine(table, low, high);
- }
-
- /* Odd table size - 1 left over */
- if (table->size & 0x01) {
- u16 low = get_entry_l3cc(table, 2 * i);
-
- *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
- *cs++ = l3cc_combine(table, low, unused_value);
- i++;
- }
-
- /* All remaining entries are also unused */
- for (; i < table->n_entries / 2; i++) {
- *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
- *cs++ = l3cc_combine(table, unused_value, unused_value);
- }
-
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-/**
- * intel_mocs_init_l3cc_table() - program the mocs control table
- * @dev_priv: i915 device private
- *
- * This function simply programs the mocs registers for the given table
- * starting at the given address. This register set is programmed in pairs.
- *
- * These registers may get programmed more than once, it is simpler to
- * re-program 32 registers than maintain the state of when they were programmed.
- * We are always reprogramming with the same values and this only on context
- * start.
- *
- * Return: Nothing.
- */
-void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
-{
- struct drm_i915_mocs_table table;
- unsigned int i;
- u16 unused_value;
-
- if (!get_mocs_settings(dev_priv, &table))
- return;
-
- /* Set unused values to PTE */
- unused_value = table.table[I915_MOCS_PTE].l3cc_value;
-
- for (i = 0; i < table.size / 2; i++) {
- u16 low = get_entry_l3cc(&table, 2 * i);
- u16 high = get_entry_l3cc(&table, 2 * i + 1);
-
- I915_WRITE(GEN9_LNCFCMOCS(i),
- l3cc_combine(&table, low, high));
- }
-
- /* Odd table size - 1 left over */
- if (table.size & 0x01) {
- u16 low = get_entry_l3cc(&table, 2 * i);
-
- I915_WRITE(GEN9_LNCFCMOCS(i),
- l3cc_combine(&table, low, unused_value));
- i++;
- }
-
- /* All remaining entries are also unused */
- for (; i < table.n_entries / 2; i++)
- I915_WRITE(GEN9_LNCFCMOCS(i),
- l3cc_combine(&table, unused_value, unused_value));
-}
-
-/**
- * intel_rcs_context_init_mocs() - program the MOCS register.
- * @rq: Request to set up the MOCS tables for.
- *
- * This function will emit a batch buffer with the values required for
- * programming the MOCS register values for all the currently supported
- * rings.
- *
- * These registers are partially stored in the RCS context, so they are
- * emitted at the same time so that when a context is created these registers
- * are set up. These registers have to be emitted into the start of the
- * context as setting the ELSP will re-init some of these registers back
- * to the hw values.
- *
- * Return: 0 on success, otherwise the error status.
- */
-int intel_rcs_context_init_mocs(struct i915_request *rq)
-{
- struct drm_i915_mocs_table t;
- int ret;
-
- if (get_mocs_settings(rq->i915, &t)) {
- /* Program the RCS control registers */
- ret = emit_mocs_control_table(rq, &t);
- if (ret)
- return ret;
-
- /* Now program the l3cc registers */
- ret = emit_mocs_l3cc_table(rq, &t);
- if (ret)
- return ret;
- }
-
- return 0;
-}
+++ /dev/null
-/*
- * Copyright (c) 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef INTEL_MOCS_H
-#define INTEL_MOCS_H
-
-/**
- * DOC: Memory Objects Control State (MOCS)
- *
- * Motivation:
- * In previous Gens the MOCS settings was a value that was set by user land as
- * part of the batch. In Gen9 this has changed to be a single table (per ring)
- * that all batches now reference by index instead of programming the MOCS
- * directly.
- *
- * The one wrinkle in this is that only PART of the MOCS tables are included
- * in context (The GFX_MOCS_0 - GFX_MOCS_64 and the LNCFCMOCS0 - LNCFCMOCS32
- * registers). The rest are not (the settings for the other rings).
- *
- * This table needs to be set at system start-up because the way the table
- * interacts with the contexts and the GmmLib interface.
- *
- *
- * Implementation:
- *
- * The tables (one per supported platform) are defined in intel_mocs.c
- * and are programmed in the first batch after the context is loaded
- * (with the hardware workarounds). This will then let the usual
- * context handling keep the MOCS in step.
- */
-
-#include "i915_drv.h"
-
-int intel_rcs_context_init_mocs(struct i915_request *rq);
-void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv);
-void intel_mocs_init_engine(struct intel_engine_cs *engine);
-
-#endif
+++ /dev/null
-/*
- * Copyright © 2008-2010 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Eric Anholt <eric@anholt.net>
- * Zou Nan hai <nanhai.zou@intel.com>
- * Xiang Hai hao<haihao.xiang@intel.com>
- *
- */
-
-#include <linux/log2.h>
-
-#include <drm/i915_drm.h>
-
-#include "i915_drv.h"
-#include "i915_gem_render_state.h"
-#include "i915_reset.h"
-#include "i915_trace.h"
-#include "intel_drv.h"
-#include "intel_workarounds.h"
-
-/* Rough estimate of the typical request size, performing a flush,
- * set-context and then emitting the batch.
- */
-#define LEGACY_REQUEST_SIZE 200
-
-unsigned int intel_ring_update_space(struct intel_ring *ring)
-{
- unsigned int space;
-
- space = __intel_ring_space(ring->head, ring->emit, ring->size);
-
- ring->space = space;
- return space;
-}
-
-static int
-gen2_render_ring_flush(struct i915_request *rq, u32 mode)
-{
- unsigned int num_store_dw;
- u32 cmd, *cs;
-
- cmd = MI_FLUSH;
- num_store_dw = 0;
- if (mode & EMIT_INVALIDATE)
- cmd |= MI_READ_FLUSH;
- if (mode & EMIT_FLUSH)
- num_store_dw = 4;
-
- cs = intel_ring_begin(rq, 2 + 3 * num_store_dw);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = cmd;
- while (num_store_dw--) {
- *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
- *cs++ = i915_scratch_offset(rq->i915);
- *cs++ = 0;
- }
- *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
-
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static int
-gen4_render_ring_flush(struct i915_request *rq, u32 mode)
-{
- u32 cmd, *cs;
- int i;
-
- /*
- * read/write caches:
- *
- * I915_GEM_DOMAIN_RENDER is always invalidated, but is
- * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
- * also flushed at 2d versus 3d pipeline switches.
- *
- * read-only caches:
- *
- * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
- * MI_READ_FLUSH is set, and is always flushed on 965.
- *
- * I915_GEM_DOMAIN_COMMAND may not exist?
- *
- * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
- * invalidated when MI_EXE_FLUSH is set.
- *
- * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
- * invalidated with every MI_FLUSH.
- *
- * TLBs:
- *
- * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
- * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
- * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
- * are flushed at any MI_FLUSH.
- */
-
- cmd = MI_FLUSH;
- if (mode & EMIT_INVALIDATE) {
- cmd |= MI_EXE_FLUSH;
- if (IS_G4X(rq->i915) || IS_GEN(rq->i915, 5))
- cmd |= MI_INVALIDATE_ISP;
- }
-
- i = 2;
- if (mode & EMIT_INVALIDATE)
- i += 20;
-
- cs = intel_ring_begin(rq, i);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = cmd;
-
- /*
- * A random delay to let the CS invalidate take effect? Without this
- * delay, the GPU relocation path fails as the CS does not see
- * the updated contents. Just as important, if we apply the flushes
- * to the EMIT_FLUSH branch (i.e. immediately after the relocation
- * write and before the invalidate on the next batch), the relocations
- * still fail. This implies that is a delay following invalidation
- * that is required to reset the caches as opposed to a delay to
- * ensure the memory is written.
- */
- if (mode & EMIT_INVALIDATE) {
- *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
- *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
- *cs++ = 0;
- *cs++ = 0;
-
- for (i = 0; i < 12; i++)
- *cs++ = MI_FLUSH;
-
- *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
- *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
- *cs++ = 0;
- *cs++ = 0;
- }
-
- *cs++ = cmd;
-
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-/*
- * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
- * implementing two workarounds on gen6. From section 1.4.7.1
- * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
- *
- * [DevSNB-C+{W/A}] Before any depth stall flush (including those
- * produced by non-pipelined state commands), software needs to first
- * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
- * 0.
- *
- * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
- * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
- *
- * And the workaround for these two requires this workaround first:
- *
- * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
- * BEFORE the pipe-control with a post-sync op and no write-cache
- * flushes.
- *
- * And this last workaround is tricky because of the requirements on
- * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
- * volume 2 part 1:
- *
- * "1 of the following must also be set:
- * - Render Target Cache Flush Enable ([12] of DW1)
- * - Depth Cache Flush Enable ([0] of DW1)
- * - Stall at Pixel Scoreboard ([1] of DW1)
- * - Depth Stall ([13] of DW1)
- * - Post-Sync Operation ([13] of DW1)
- * - Notify Enable ([8] of DW1)"
- *
- * The cache flushes require the workaround flush that triggered this
- * one, so we can't use it. Depth stall would trigger the same.
- * Post-sync nonzero is what triggered this second workaround, so we
- * can't use that one either. Notify enable is IRQs, which aren't
- * really our business. That leaves only stall at scoreboard.
- */
-static int
-gen6_emit_post_sync_nonzero_flush(struct i915_request *rq)
-{
- u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
- u32 *cs;
-
- cs = intel_ring_begin(rq, 6);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = GFX_OP_PIPE_CONTROL(5);
- *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
- *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
- *cs++ = 0; /* low dword */
- *cs++ = 0; /* high dword */
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
-
- cs = intel_ring_begin(rq, 6);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = GFX_OP_PIPE_CONTROL(5);
- *cs++ = PIPE_CONTROL_QW_WRITE;
- *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
- *cs++ = 0;
- *cs++ = 0;
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static int
-gen6_render_ring_flush(struct i915_request *rq, u32 mode)
-{
- u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
- u32 *cs, flags = 0;
- int ret;
-
- /* Force SNB workarounds for PIPE_CONTROL flushes */
- ret = gen6_emit_post_sync_nonzero_flush(rq);
- if (ret)
- return ret;
-
- /* Just flush everything. Experiments have shown that reducing the
- * number of bits based on the write domains has little performance
- * impact.
- */
- if (mode & EMIT_FLUSH) {
- flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
- flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
- /*
- * Ensure that any following seqno writes only happen
- * when the render cache is indeed flushed.
- */
- flags |= PIPE_CONTROL_CS_STALL;
- }
- if (mode & EMIT_INVALIDATE) {
- flags |= PIPE_CONTROL_TLB_INVALIDATE;
- flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
- /*
- * TLB invalidate requires a post-sync write.
- */
- flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
- }
-
- cs = intel_ring_begin(rq, 4);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = GFX_OP_PIPE_CONTROL(4);
- *cs++ = flags;
- *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
- *cs++ = 0;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
-{
- /* First we do the gen6_emit_post_sync_nonzero_flush w/a */
- *cs++ = GFX_OP_PIPE_CONTROL(4);
- *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
- *cs++ = 0;
- *cs++ = 0;
-
- *cs++ = GFX_OP_PIPE_CONTROL(4);
- *cs++ = PIPE_CONTROL_QW_WRITE;
- *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
- *cs++ = 0;
-
- /* Finally we can flush and with it emit the breadcrumb */
- *cs++ = GFX_OP_PIPE_CONTROL(4);
- *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_DC_FLUSH_ENABLE |
- PIPE_CONTROL_QW_WRITE |
- PIPE_CONTROL_CS_STALL);
- *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT;
- *cs++ = rq->fence.seqno;
-
- *cs++ = GFX_OP_PIPE_CONTROL(4);
- *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_STORE_DATA_INDEX;
- *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | PIPE_CONTROL_GLOBAL_GTT;
- *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
-
- *cs++ = MI_USER_INTERRUPT;
- *cs++ = MI_NOOP;
-
- rq->tail = intel_ring_offset(rq, cs);
- assert_ring_tail_valid(rq->ring, rq->tail);
-
- return cs;
-}
-
-static int
-gen7_render_ring_cs_stall_wa(struct i915_request *rq)
-{
- u32 *cs;
-
- cs = intel_ring_begin(rq, 4);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = GFX_OP_PIPE_CONTROL(4);
- *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
- *cs++ = 0;
- *cs++ = 0;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static int
-gen7_render_ring_flush(struct i915_request *rq, u32 mode)
-{
- u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
- u32 *cs, flags = 0;
-
- /*
- * Ensure that any following seqno writes only happen when the render
- * cache is indeed flushed.
- *
- * Workaround: 4th PIPE_CONTROL command (except the ones with only
- * read-cache invalidate bits set) must have the CS_STALL bit set. We
- * don't try to be clever and just set it unconditionally.
- */
- flags |= PIPE_CONTROL_CS_STALL;
-
- /* Just flush everything. Experiments have shown that reducing the
- * number of bits based on the write domains has little performance
- * impact.
- */
- if (mode & EMIT_FLUSH) {
- flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
- flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
- flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
- flags |= PIPE_CONTROL_FLUSH_ENABLE;
- }
- if (mode & EMIT_INVALIDATE) {
- flags |= PIPE_CONTROL_TLB_INVALIDATE;
- flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
- flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
- /*
- * TLB invalidate requires a post-sync write.
- */
- flags |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
-
- flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
-
- /* Workaround: we must issue a pipe_control with CS-stall bit
- * set before a pipe_control command that has the state cache
- * invalidate bit set. */
- gen7_render_ring_cs_stall_wa(rq);
- }
-
- cs = intel_ring_begin(rq, 4);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = GFX_OP_PIPE_CONTROL(4);
- *cs++ = flags;
- *cs++ = scratch_addr;
- *cs++ = 0;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
-{
- *cs++ = GFX_OP_PIPE_CONTROL(4);
- *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_DC_FLUSH_ENABLE |
- PIPE_CONTROL_FLUSH_ENABLE |
- PIPE_CONTROL_QW_WRITE |
- PIPE_CONTROL_GLOBAL_GTT_IVB |
- PIPE_CONTROL_CS_STALL);
- *cs++ = rq->timeline->hwsp_offset;
- *cs++ = rq->fence.seqno;
-
- *cs++ = GFX_OP_PIPE_CONTROL(4);
- *cs++ = (PIPE_CONTROL_QW_WRITE |
- PIPE_CONTROL_STORE_DATA_INDEX |
- PIPE_CONTROL_GLOBAL_GTT_IVB);
- *cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
- *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
-
- *cs++ = MI_USER_INTERRUPT;
- *cs++ = MI_NOOP;
-
- rq->tail = intel_ring_offset(rq, cs);
- assert_ring_tail_valid(rq->ring, rq->tail);
-
- return cs;
-}
-
-static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
-{
- GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
- GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
-
- *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
- *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
- *cs++ = rq->fence.seqno;
-
- *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
- *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT;
- *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
-
- *cs++ = MI_USER_INTERRUPT;
- *cs++ = MI_NOOP;
-
- rq->tail = intel_ring_offset(rq, cs);
- assert_ring_tail_valid(rq->ring, rq->tail);
-
- return cs;
-}
-
-#define GEN7_XCS_WA 32
-static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
-{
- int i;
-
- GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
- GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
-
- *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
- *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
- *cs++ = rq->fence.seqno;
-
- *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
- *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT;
- *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
-
- for (i = 0; i < GEN7_XCS_WA; i++) {
- *cs++ = MI_STORE_DWORD_INDEX;
- *cs++ = I915_GEM_HWS_SEQNO_ADDR;
- *cs++ = rq->fence.seqno;
- }
-
- *cs++ = MI_FLUSH_DW;
- *cs++ = 0;
- *cs++ = 0;
-
- *cs++ = MI_USER_INTERRUPT;
-
- rq->tail = intel_ring_offset(rq, cs);
- assert_ring_tail_valid(rq->ring, rq->tail);
-
- return cs;
-}
-#undef GEN7_XCS_WA
-
-static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
-{
- /*
- * Keep the render interrupt unmasked as this papers over
- * lost interrupts following a reset.
- */
- if (engine->class == RENDER_CLASS) {
- if (INTEL_GEN(engine->i915) >= 6)
- mask &= ~BIT(0);
- else
- mask &= ~I915_USER_INTERRUPT;
- }
-
- intel_engine_set_hwsp_writemask(engine, mask);
-}
-
-static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- u32 addr;
-
- addr = lower_32_bits(phys);
- if (INTEL_GEN(dev_priv) >= 4)
- addr |= (phys >> 28) & 0xf0;
-
- I915_WRITE(HWS_PGA, addr);
-}
-
-static struct page *status_page(struct intel_engine_cs *engine)
-{
- struct drm_i915_gem_object *obj = engine->status_page.vma->obj;
-
- GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
- return sg_page(obj->mm.pages->sgl);
-}
-
-static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
-{
- set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
- set_hwstam(engine, ~0u);
-}
-
-static void set_hwsp(struct intel_engine_cs *engine, u32 offset)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- i915_reg_t hwsp;
-
- /*
- * The ring status page addresses are no longer next to the rest of
- * the ring registers as of gen7.
- */
- if (IS_GEN(dev_priv, 7)) {
- switch (engine->id) {
- /*
- * No more rings exist on Gen7. Default case is only to shut up
- * gcc switch check warning.
- */
- default:
- GEM_BUG_ON(engine->id);
- /* fallthrough */
- case RCS0:
- hwsp = RENDER_HWS_PGA_GEN7;
- break;
- case BCS0:
- hwsp = BLT_HWS_PGA_GEN7;
- break;
- case VCS0:
- hwsp = BSD_HWS_PGA_GEN7;
- break;
- case VECS0:
- hwsp = VEBOX_HWS_PGA_GEN7;
- break;
- }
- } else if (IS_GEN(dev_priv, 6)) {
- hwsp = RING_HWS_PGA_GEN6(engine->mmio_base);
- } else {
- hwsp = RING_HWS_PGA(engine->mmio_base);
- }
-
- I915_WRITE(hwsp, offset);
- POSTING_READ(hwsp);
-}
-
-static void flush_cs_tlb(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
-
- if (!IS_GEN_RANGE(dev_priv, 6, 7))
- return;
-
- /* ring should be idle before issuing a sync flush*/
- WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
-
- ENGINE_WRITE(engine, RING_INSTPM,
- _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
- INSTPM_SYNC_FLUSH));
- if (intel_wait_for_register(engine->uncore,
- RING_INSTPM(engine->mmio_base),
- INSTPM_SYNC_FLUSH, 0,
- 1000))
- DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
- engine->name);
-}
-
-static void ring_setup_status_page(struct intel_engine_cs *engine)
-{
- set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma));
- set_hwstam(engine, ~0u);
-
- flush_cs_tlb(engine);
-}
-
-static bool stop_ring(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
-
- if (INTEL_GEN(dev_priv) > 2) {
- ENGINE_WRITE(engine,
- RING_MI_MODE, _MASKED_BIT_ENABLE(STOP_RING));
- if (intel_wait_for_register(engine->uncore,
- RING_MI_MODE(engine->mmio_base),
- MODE_IDLE,
- MODE_IDLE,
- 1000)) {
- DRM_ERROR("%s : timed out trying to stop ring\n",
- engine->name);
-
- /*
- * Sometimes we observe that the idle flag is not
- * set even though the ring is empty. So double
- * check before giving up.
- */
- if (ENGINE_READ(engine, RING_HEAD) !=
- ENGINE_READ(engine, RING_TAIL))
- return false;
- }
- }
-
- ENGINE_WRITE(engine, RING_HEAD, ENGINE_READ(engine, RING_TAIL));
-
- ENGINE_WRITE(engine, RING_HEAD, 0);
- ENGINE_WRITE(engine, RING_TAIL, 0);
-
- /* The ring must be empty before it is disabled */
- ENGINE_WRITE(engine, RING_CTL, 0);
-
- return (ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) == 0;
-}
-
-static int init_ring_common(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- struct intel_ring *ring = engine->buffer;
- int ret = 0;
-
- intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
-
- if (!stop_ring(engine)) {
- /* G45 ring initialization often fails to reset head to zero */
- DRM_DEBUG_DRIVER("%s head not reset to zero "
- "ctl %08x head %08x tail %08x start %08x\n",
- engine->name,
- ENGINE_READ(engine, RING_CTL),
- ENGINE_READ(engine, RING_HEAD),
- ENGINE_READ(engine, RING_TAIL),
- ENGINE_READ(engine, RING_START));
-
- if (!stop_ring(engine)) {
- DRM_ERROR("failed to set %s head to zero "
- "ctl %08x head %08x tail %08x start %08x\n",
- engine->name,
- ENGINE_READ(engine, RING_CTL),
- ENGINE_READ(engine, RING_HEAD),
- ENGINE_READ(engine, RING_TAIL),
- ENGINE_READ(engine, RING_START));
- ret = -EIO;
- goto out;
- }
- }
-
- if (HWS_NEEDS_PHYSICAL(dev_priv))
- ring_setup_phys_status_page(engine);
- else
- ring_setup_status_page(engine);
-
- intel_engine_reset_breadcrumbs(engine);
-
- /* Enforce ordering by reading HEAD register back */
- ENGINE_READ(engine, RING_HEAD);
-
- /* Initialize the ring. This must happen _after_ we've cleared the ring
- * registers with the above sequence (the readback of the HEAD registers
- * also enforces ordering), otherwise the hw might lose the new ring
- * register values. */
- ENGINE_WRITE(engine, RING_START, i915_ggtt_offset(ring->vma));
-
- /* WaClearRingBufHeadRegAtInit:ctg,elk */
- if (ENGINE_READ(engine, RING_HEAD))
- DRM_DEBUG_DRIVER("%s initialization failed [head=%08x], fudging\n",
- engine->name, ENGINE_READ(engine, RING_HEAD));
-
- /* Check that the ring offsets point within the ring! */
- GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
- GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
- intel_ring_update_space(ring);
-
- /* First wake the ring up to an empty/idle ring */
- ENGINE_WRITE(engine, RING_HEAD, ring->head);
- ENGINE_WRITE(engine, RING_TAIL, ring->head);
- ENGINE_POSTING_READ(engine, RING_TAIL);
-
- ENGINE_WRITE(engine, RING_CTL, RING_CTL_SIZE(ring->size) | RING_VALID);
-
- /* If the head is still not zero, the ring is dead */
- if (intel_wait_for_register(engine->uncore,
- RING_CTL(engine->mmio_base),
- RING_VALID, RING_VALID,
- 50)) {
- DRM_ERROR("%s initialization failed "
- "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
- engine->name,
- ENGINE_READ(engine, RING_CTL),
- ENGINE_READ(engine, RING_CTL) & RING_VALID,
- ENGINE_READ(engine, RING_HEAD), ring->head,
- ENGINE_READ(engine, RING_TAIL), ring->tail,
- ENGINE_READ(engine, RING_START),
- i915_ggtt_offset(ring->vma));
- ret = -EIO;
- goto out;
- }
-
- if (INTEL_GEN(dev_priv) > 2)
- ENGINE_WRITE(engine,
- RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
-
- /* Now awake, let it get started */
- if (ring->tail != ring->head) {
- ENGINE_WRITE(engine, RING_TAIL, ring->tail);
- ENGINE_POSTING_READ(engine, RING_TAIL);
- }
-
- /* Papering over lost _interrupts_ immediately following the restart */
- intel_engine_queue_breadcrumbs(engine);
-out:
- intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
-
- return ret;
-}
-
-static void reset_prepare(struct intel_engine_cs *engine)
-{
- intel_engine_stop_cs(engine);
-}
-
-static void reset_ring(struct intel_engine_cs *engine, bool stalled)
-{
- struct i915_timeline *tl = &engine->timeline;
- struct i915_request *pos, *rq;
- unsigned long flags;
- u32 head;
-
- rq = NULL;
- spin_lock_irqsave(&tl->lock, flags);
- list_for_each_entry(pos, &tl->requests, link) {
- if (!i915_request_completed(pos)) {
- rq = pos;
- break;
- }
- }
-
- /*
- * The guilty request will get skipped on a hung engine.
- *
- * Users of client default contexts do not rely on logical
- * state preserved between batches so it is safe to execute
- * queued requests following the hang. Non default contexts
- * rely on preserved state, so skipping a batch loses the
- * evolution of the state and it needs to be considered corrupted.
- * Executing more queued batches on top of corrupted state is
- * risky. But we take the risk by trying to advance through
- * the queued requests in order to make the client behaviour
- * more predictable around resets, by not throwing away random
- * amount of batches it has prepared for execution. Sophisticated
- * clients can use gem_reset_stats_ioctl and dma fence status
- * (exported via sync_file info ioctl on explicit fences) to observe
- * when it loses the context state and should rebuild accordingly.
- *
- * The context ban, and ultimately the client ban, mechanism are safety
- * valves if client submission ends up resulting in nothing more than
- * subsequent hangs.
- */
-
- if (rq) {
- /*
- * Try to restore the logical GPU state to match the
- * continuation of the request queue. If we skip the
- * context/PD restore, then the next request may try to execute
- * assuming that its context is valid and loaded on the GPU and
- * so may try to access invalid memory, prompting repeated GPU
- * hangs.
- *
- * If the request was guilty, we still restore the logical
- * state in case the next request requires it (e.g. the
- * aliasing ppgtt), but skip over the hung batch.
- *
- * If the request was innocent, we try to replay the request
- * with the restored context.
- */
- i915_reset_request(rq, stalled);
-
- GEM_BUG_ON(rq->ring != engine->buffer);
- head = rq->head;
- } else {
- head = engine->buffer->tail;
- }
- engine->buffer->head = intel_ring_wrap(engine->buffer, head);
-
- spin_unlock_irqrestore(&tl->lock, flags);
-}
-
-static void reset_finish(struct intel_engine_cs *engine)
-{
-}
-
-static int intel_rcs_ctx_init(struct i915_request *rq)
-{
- int ret;
-
- ret = intel_engine_emit_ctx_wa(rq);
- if (ret != 0)
- return ret;
-
- ret = i915_gem_render_state_emit(rq);
- if (ret)
- return ret;
-
- return 0;
-}
-
-static int init_render_ring(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
-
- /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
- if (IS_GEN_RANGE(dev_priv, 4, 6))
- I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
-
- /* We need to disable the AsyncFlip performance optimisations in order
- * to use MI_WAIT_FOR_EVENT within the CS. It should already be
- * programmed to '1' on all products.
- *
- * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
- */
- if (IS_GEN_RANGE(dev_priv, 6, 7))
- I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
-
- /* Required for the hardware to program scanline values for waiting */
- /* WaEnableFlushTlbInvalidationMode:snb */
- if (IS_GEN(dev_priv, 6))
- I915_WRITE(GFX_MODE,
- _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
-
- /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
- if (IS_GEN(dev_priv, 7))
- I915_WRITE(GFX_MODE_GEN7,
- _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
- _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
-
- if (IS_GEN(dev_priv, 6)) {
- /* From the Sandybridge PRM, volume 1 part 3, page 24:
- * "If this bit is set, STCunit will have LRA as replacement
- * policy. [...] This bit must be reset. LRA replacement
- * policy is not supported."
- */
- I915_WRITE(CACHE_MODE_0,
- _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
- }
-
- if (IS_GEN_RANGE(dev_priv, 6, 7))
- I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
-
- return init_ring_common(engine);
-}
-
-static void cancel_requests(struct intel_engine_cs *engine)
-{
- struct i915_request *request;
- unsigned long flags;
-
- spin_lock_irqsave(&engine->timeline.lock, flags);
-
- /* Mark all submitted requests as skipped. */
- list_for_each_entry(request, &engine->timeline.requests, link) {
- if (!i915_request_signaled(request))
- dma_fence_set_error(&request->fence, -EIO);
-
- i915_request_mark_complete(request);
- }
-
- /* Remaining _unready_ requests will be nop'ed when submitted */
-
- spin_unlock_irqrestore(&engine->timeline.lock, flags);
-}
-
-static void i9xx_submit_request(struct i915_request *request)
-{
- i915_request_submit(request);
-
- ENGINE_WRITE(request->engine, RING_TAIL,
- intel_ring_set_tail(request->ring, request->tail));
-}
-
-static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
-{
- GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
- GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
-
- *cs++ = MI_FLUSH;
-
- *cs++ = MI_STORE_DWORD_INDEX;
- *cs++ = I915_GEM_HWS_SEQNO_ADDR;
- *cs++ = rq->fence.seqno;
-
- *cs++ = MI_STORE_DWORD_INDEX;
- *cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
- *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
-
- *cs++ = MI_USER_INTERRUPT;
-
- rq->tail = intel_ring_offset(rq, cs);
- assert_ring_tail_valid(rq->ring, rq->tail);
-
- return cs;
-}
-
-#define GEN5_WA_STORES 8 /* must be at least 1! */
-static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
-{
- int i;
-
- GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
- GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
-
- *cs++ = MI_FLUSH;
-
- *cs++ = MI_STORE_DWORD_INDEX;
- *cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
- *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
-
- BUILD_BUG_ON(GEN5_WA_STORES < 1);
- for (i = 0; i < GEN5_WA_STORES; i++) {
- *cs++ = MI_STORE_DWORD_INDEX;
- *cs++ = I915_GEM_HWS_SEQNO_ADDR;
- *cs++ = rq->fence.seqno;
- }
-
- *cs++ = MI_USER_INTERRUPT;
- *cs++ = MI_NOOP;
-
- rq->tail = intel_ring_offset(rq, cs);
- assert_ring_tail_valid(rq->ring, rq->tail);
-
- return cs;
-}
-#undef GEN5_WA_STORES
-
-static void
-gen5_irq_enable(struct intel_engine_cs *engine)
-{
- gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask);
-}
-
-static void
-gen5_irq_disable(struct intel_engine_cs *engine)
-{
- gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask);
-}
-
-static void
-i9xx_irq_enable(struct intel_engine_cs *engine)
-{
- engine->i915->irq_mask &= ~engine->irq_enable_mask;
- intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
- intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
-}
-
-static void
-i9xx_irq_disable(struct intel_engine_cs *engine)
-{
- engine->i915->irq_mask |= engine->irq_enable_mask;
- intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
-}
-
-static void
-i8xx_irq_enable(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
-
- dev_priv->irq_mask &= ~engine->irq_enable_mask;
- I915_WRITE16(GEN2_IMR, dev_priv->irq_mask);
- POSTING_READ16(RING_IMR(engine->mmio_base));
-}
-
-static void
-i8xx_irq_disable(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
-
- dev_priv->irq_mask |= engine->irq_enable_mask;
- I915_WRITE16(GEN2_IMR, dev_priv->irq_mask);
-}
-
-static int
-bsd_ring_flush(struct i915_request *rq, u32 mode)
-{
- u32 *cs;
-
- cs = intel_ring_begin(rq, 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_FLUSH;
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
- return 0;
-}
-
-static void
-gen6_irq_enable(struct intel_engine_cs *engine)
-{
- ENGINE_WRITE(engine, RING_IMR,
- ~(engine->irq_enable_mask | engine->irq_keep_mask));
-
- /* Flush/delay to ensure the RING_IMR is active before the GT IMR */
- ENGINE_POSTING_READ(engine, RING_IMR);
-
- gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask);
-}
-
-static void
-gen6_irq_disable(struct intel_engine_cs *engine)
-{
- ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
- gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask);
-}
-
-static void
-hsw_vebox_irq_enable(struct intel_engine_cs *engine)
-{
- ENGINE_WRITE(engine, RING_IMR, ~engine->irq_enable_mask);
-
- /* Flush/delay to ensure the RING_IMR is active before the GT IMR */
- ENGINE_POSTING_READ(engine, RING_IMR);
-
- gen6_unmask_pm_irq(engine->i915, engine->irq_enable_mask);
-}
-
-static void
-hsw_vebox_irq_disable(struct intel_engine_cs *engine)
-{
- ENGINE_WRITE(engine, RING_IMR, ~0);
- gen6_mask_pm_irq(engine->i915, engine->irq_enable_mask);
-}
-
-static int
-i965_emit_bb_start(struct i915_request *rq,
- u64 offset, u32 length,
- unsigned int dispatch_flags)
-{
- u32 *cs;
-
- cs = intel_ring_begin(rq, 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags &
- I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965);
- *cs++ = offset;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
-#define I830_BATCH_LIMIT SZ_256K
-#define I830_TLB_ENTRIES (2)
-#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
-static int
-i830_emit_bb_start(struct i915_request *rq,
- u64 offset, u32 len,
- unsigned int dispatch_flags)
-{
- u32 *cs, cs_offset = i915_scratch_offset(rq->i915);
-
- GEM_BUG_ON(rq->i915->gt.scratch->size < I830_WA_SIZE);
-
- cs = intel_ring_begin(rq, 6);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- /* Evict the invalid PTE TLBs */
- *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA;
- *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096;
- *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */
- *cs++ = cs_offset;
- *cs++ = 0xdeadbeef;
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
-
- if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
- if (len > I830_BATCH_LIMIT)
- return -ENOSPC;
-
- cs = intel_ring_begin(rq, 6 + 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- /* Blit the batch (which has now all relocs applied) to the
- * stable batch scratch bo area (so that the CS never
- * stumbles over its tlb invalidation bug) ...
- */
- *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA;
- *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096;
- *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096;
- *cs++ = cs_offset;
- *cs++ = 4096;
- *cs++ = offset;
-
- *cs++ = MI_FLUSH;
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
-
- /* ... and execute it. */
- offset = cs_offset;
- }
-
- cs = intel_ring_begin(rq, 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
- *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
- MI_BATCH_NON_SECURE);
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static int
-i915_emit_bb_start(struct i915_request *rq,
- u64 offset, u32 len,
- unsigned int dispatch_flags)
-{
- u32 *cs;
-
- cs = intel_ring_begin(rq, 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
- *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
- MI_BATCH_NON_SECURE);
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-int intel_ring_pin(struct intel_ring *ring)
-{
- struct i915_vma *vma = ring->vma;
- enum i915_map_type map = i915_coherent_map_type(vma->vm->i915);
- unsigned int flags;
- void *addr;
- int ret;
-
- GEM_BUG_ON(ring->vaddr);
-
- ret = i915_timeline_pin(ring->timeline);
- if (ret)
- return ret;
-
- flags = PIN_GLOBAL;
-
- /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
- flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
-
- if (vma->obj->stolen)
- flags |= PIN_MAPPABLE;
- else
- flags |= PIN_HIGH;
-
- ret = i915_vma_pin(vma, 0, 0, flags);
- if (unlikely(ret))
- goto unpin_timeline;
-
- if (i915_vma_is_map_and_fenceable(vma))
- addr = (void __force *)i915_vma_pin_iomap(vma);
- else
- addr = i915_gem_object_pin_map(vma->obj, map);
- if (IS_ERR(addr)) {
- ret = PTR_ERR(addr);
- goto unpin_ring;
- }
-
- vma->obj->pin_global++;
-
- ring->vaddr = addr;
- return 0;
-
-unpin_ring:
- i915_vma_unpin(vma);
-unpin_timeline:
- i915_timeline_unpin(ring->timeline);
- return ret;
-}
-
-void intel_ring_reset(struct intel_ring *ring, u32 tail)
-{
- GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
-
- ring->tail = tail;
- ring->head = tail;
- ring->emit = tail;
- intel_ring_update_space(ring);
-}
-
-void intel_ring_unpin(struct intel_ring *ring)
-{
- GEM_BUG_ON(!ring->vma);
- GEM_BUG_ON(!ring->vaddr);
-
- /* Discard any unused bytes beyond that submitted to hw. */
- intel_ring_reset(ring, ring->tail);
-
- if (i915_vma_is_map_and_fenceable(ring->vma))
- i915_vma_unpin_iomap(ring->vma);
- else
- i915_gem_object_unpin_map(ring->vma->obj);
- ring->vaddr = NULL;
-
- ring->vma->obj->pin_global--;
- i915_vma_unpin(ring->vma);
-
- i915_timeline_unpin(ring->timeline);
-}
-
-static struct i915_vma *
-intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
-{
- struct i915_address_space *vm = &dev_priv->ggtt.vm;
- struct drm_i915_gem_object *obj;
- struct i915_vma *vma;
-
- obj = i915_gem_object_create_stolen(dev_priv, size);
- if (!obj)
- obj = i915_gem_object_create_internal(dev_priv, size);
- if (IS_ERR(obj))
- return ERR_CAST(obj);
-
- /*
- * Mark ring buffers as read-only from GPU side (so no stray overwrites)
- * if supported by the platform's GGTT.
- */
- if (vm->has_read_only)
- i915_gem_object_set_readonly(obj);
-
- vma = i915_vma_instance(obj, vm, NULL);
- if (IS_ERR(vma))
- goto err;
-
- return vma;
-
-err:
- i915_gem_object_put(obj);
- return vma;
-}
-
-struct intel_ring *
-intel_engine_create_ring(struct intel_engine_cs *engine,
- struct i915_timeline *timeline,
- int size)
-{
- struct intel_ring *ring;
- struct i915_vma *vma;
-
- GEM_BUG_ON(!is_power_of_2(size));
- GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
- GEM_BUG_ON(timeline == &engine->timeline);
- lockdep_assert_held(&engine->i915->drm.struct_mutex);
-
- ring = kzalloc(sizeof(*ring), GFP_KERNEL);
- if (!ring)
- return ERR_PTR(-ENOMEM);
-
- kref_init(&ring->ref);
- INIT_LIST_HEAD(&ring->request_list);
- ring->timeline = i915_timeline_get(timeline);
-
- ring->size = size;
- /* Workaround an erratum on the i830 which causes a hang if
- * the TAIL pointer points to within the last 2 cachelines
- * of the buffer.
- */
- ring->effective_size = size;
- if (IS_I830(engine->i915) || IS_I845G(engine->i915))
- ring->effective_size -= 2 * CACHELINE_BYTES;
-
- intel_ring_update_space(ring);
-
- vma = intel_ring_create_vma(engine->i915, size);
- if (IS_ERR(vma)) {
- kfree(ring);
- return ERR_CAST(vma);
- }
- ring->vma = vma;
-
- return ring;
-}
-
-void intel_ring_free(struct kref *ref)
-{
- struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
- struct drm_i915_gem_object *obj = ring->vma->obj;
-
- i915_vma_close(ring->vma);
- __i915_gem_object_release_unless_active(obj);
-
- i915_timeline_put(ring->timeline);
- kfree(ring);
-}
-
-static void __ring_context_fini(struct intel_context *ce)
-{
- GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
- i915_gem_object_put(ce->state->obj);
-}
-
-static void ring_context_destroy(struct kref *ref)
-{
- struct intel_context *ce = container_of(ref, typeof(*ce), ref);
-
- GEM_BUG_ON(intel_context_is_pinned(ce));
-
- if (ce->state)
- __ring_context_fini(ce);
-
- intel_context_free(ce);
-}
-
-static int __context_pin_ppgtt(struct i915_gem_context *ctx)
-{
- struct i915_hw_ppgtt *ppgtt;
- int err = 0;
-
- ppgtt = ctx->ppgtt ?: ctx->i915->mm.aliasing_ppgtt;
- if (ppgtt)
- err = gen6_ppgtt_pin(ppgtt);
-
- return err;
-}
-
-static void __context_unpin_ppgtt(struct i915_gem_context *ctx)
-{
- struct i915_hw_ppgtt *ppgtt;
-
- ppgtt = ctx->ppgtt ?: ctx->i915->mm.aliasing_ppgtt;
- if (ppgtt)
- gen6_ppgtt_unpin(ppgtt);
-}
-
-static int __context_pin(struct intel_context *ce)
-{
- struct i915_vma *vma;
- int err;
-
- vma = ce->state;
- if (!vma)
- return 0;
-
- err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
- if (err)
- return err;
-
- /*
- * And mark is as a globally pinned object to let the shrinker know
- * it cannot reclaim the object until we release it.
- */
- vma->obj->pin_global++;
- vma->obj->mm.dirty = true;
-
- return 0;
-}
-
-static void __context_unpin(struct intel_context *ce)
-{
- struct i915_vma *vma;
-
- vma = ce->state;
- if (!vma)
- return;
-
- vma->obj->pin_global--;
- i915_vma_unpin(vma);
-}
-
-static void ring_context_unpin(struct intel_context *ce)
-{
- __context_unpin_ppgtt(ce->gem_context);
- __context_unpin(ce);
-}
-
-static struct i915_vma *
-alloc_context_vma(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
- struct drm_i915_gem_object *obj;
- struct i915_vma *vma;
- int err;
-
- obj = i915_gem_object_create(i915, engine->context_size);
- if (IS_ERR(obj))
- return ERR_CAST(obj);
-
- /*
- * Try to make the context utilize L3 as well as LLC.
- *
- * On VLV we don't have L3 controls in the PTEs so we
- * shouldn't touch the cache level, especially as that
- * would make the object snooped which might have a
- * negative performance impact.
- *
- * Snooping is required on non-llc platforms in execlist
- * mode, but since all GGTT accesses use PAT entry 0 we
- * get snooping anyway regardless of cache_level.
- *
- * This is only applicable for Ivy Bridge devices since
- * later platforms don't have L3 control bits in the PTE.
- */
- if (IS_IVYBRIDGE(i915))
- i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC);
-
- if (engine->default_state) {
- void *defaults, *vaddr;
-
- vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
- if (IS_ERR(vaddr)) {
- err = PTR_ERR(vaddr);
- goto err_obj;
- }
-
- defaults = i915_gem_object_pin_map(engine->default_state,
- I915_MAP_WB);
- if (IS_ERR(defaults)) {
- err = PTR_ERR(defaults);
- goto err_map;
- }
-
- memcpy(vaddr, defaults, engine->context_size);
- i915_gem_object_unpin_map(engine->default_state);
-
- i915_gem_object_flush_map(obj);
- i915_gem_object_unpin_map(obj);
- }
-
- vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto err_obj;
- }
-
- return vma;
-
-err_map:
- i915_gem_object_unpin_map(obj);
-err_obj:
- i915_gem_object_put(obj);
- return ERR_PTR(err);
-}
-
-static int ring_context_pin(struct intel_context *ce)
-{
- struct intel_engine_cs *engine = ce->engine;
- int err;
-
- /* One ringbuffer to rule them all */
- GEM_BUG_ON(!engine->buffer);
- ce->ring = engine->buffer;
-
- if (!ce->state && engine->context_size) {
- struct i915_vma *vma;
-
- vma = alloc_context_vma(engine);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
-
- ce->state = vma;
- }
-
- err = __context_pin(ce);
- if (err)
- return err;
-
- err = __context_pin_ppgtt(ce->gem_context);
- if (err)
- goto err_unpin;
-
- return 0;
-
-err_unpin:
- __context_unpin(ce);
- return err;
-}
-
-static void ring_context_reset(struct intel_context *ce)
-{
- intel_ring_reset(ce->ring, 0);
-}
-
-static const struct intel_context_ops ring_context_ops = {
- .pin = ring_context_pin,
- .unpin = ring_context_unpin,
-
- .reset = ring_context_reset,
- .destroy = ring_context_destroy,
-};
-
-static int intel_init_ring_buffer(struct intel_engine_cs *engine)
-{
- struct i915_timeline *timeline;
- struct intel_ring *ring;
- int err;
-
- err = intel_engine_setup_common(engine);
- if (err)
- return err;
-
- timeline = i915_timeline_create(engine->i915, engine->status_page.vma);
- if (IS_ERR(timeline)) {
- err = PTR_ERR(timeline);
- goto err;
- }
- GEM_BUG_ON(timeline->has_initial_breadcrumb);
-
- ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE);
- i915_timeline_put(timeline);
- if (IS_ERR(ring)) {
- err = PTR_ERR(ring);
- goto err;
- }
-
- err = intel_ring_pin(ring);
- if (err)
- goto err_ring;
-
- GEM_BUG_ON(engine->buffer);
- engine->buffer = ring;
-
- err = intel_engine_init_common(engine);
- if (err)
- goto err_unpin;
-
- GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma);
-
- return 0;
-
-err_unpin:
- intel_ring_unpin(ring);
-err_ring:
- intel_ring_put(ring);
-err:
- intel_engine_cleanup_common(engine);
- return err;
-}
-
-void intel_engine_cleanup(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
-
- WARN_ON(INTEL_GEN(dev_priv) > 2 &&
- (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
-
- intel_ring_unpin(engine->buffer);
- intel_ring_put(engine->buffer);
-
- if (engine->cleanup)
- engine->cleanup(engine);
-
- intel_engine_cleanup_common(engine);
-
- dev_priv->engine[engine->id] = NULL;
- kfree(engine);
-}
-
-static int load_pd_dir(struct i915_request *rq,
- const struct i915_hw_ppgtt *ppgtt)
-{
- const struct intel_engine_cs * const engine = rq->engine;
- u32 *cs;
-
- cs = intel_ring_begin(rq, 6);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
- *cs++ = PP_DIR_DCLV_2G;
-
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
- *cs++ = ppgtt->pd.base.ggtt_offset << 10;
-
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static int flush_pd_dir(struct i915_request *rq)
-{
- const struct intel_engine_cs * const engine = rq->engine;
- u32 *cs;
-
- cs = intel_ring_begin(rq, 4);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- /* Stall until the page table load is complete */
- *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
- *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
- *cs++ = i915_scratch_offset(rq->i915);
- *cs++ = MI_NOOP;
-
- intel_ring_advance(rq, cs);
- return 0;
-}
-
-static inline int mi_set_context(struct i915_request *rq, u32 flags)
-{
- struct drm_i915_private *i915 = rq->i915;
- struct intel_engine_cs *engine = rq->engine;
- enum intel_engine_id id;
- const int num_engines =
- IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
- bool force_restore = false;
- int len;
- u32 *cs;
-
- flags |= MI_MM_SPACE_GTT;
- if (IS_HASWELL(i915))
- /* These flags are for resource streamer on HSW+ */
- flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN;
- else
- flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;
-
- len = 4;
- if (IS_GEN(i915, 7))
- len += 2 + (num_engines ? 4 * num_engines + 6 : 0);
- if (flags & MI_FORCE_RESTORE) {
- GEM_BUG_ON(flags & MI_RESTORE_INHIBIT);
- flags &= ~MI_FORCE_RESTORE;
- force_restore = true;
- len += 2;
- }
-
- cs = intel_ring_begin(rq, len);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
- if (IS_GEN(i915, 7)) {
- *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
- if (num_engines) {
- struct intel_engine_cs *signaller;
-
- *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
- for_each_engine(signaller, i915, id) {
- if (signaller == engine)
- continue;
-
- *cs++ = i915_mmio_reg_offset(
- RING_PSMI_CTL(signaller->mmio_base));
- *cs++ = _MASKED_BIT_ENABLE(
- GEN6_PSMI_SLEEP_MSG_DISABLE);
- }
- }
- }
-
- if (force_restore) {
- /*
- * The HW doesn't handle being told to restore the current
- * context very well. Quite often it likes goes to go off and
- * sulk, especially when it is meant to be reloading PP_DIR.
- * A very simple fix to force the reload is to simply switch
- * away from the current context and back again.
- *
- * Note that the kernel_context will contain random state
- * following the INHIBIT_RESTORE. We accept this since we
- * never use the kernel_context state; it is merely a
- * placeholder we use to flush other contexts.
- */
- *cs++ = MI_SET_CONTEXT;
- *cs++ = i915_ggtt_offset(engine->kernel_context->state) |
- MI_MM_SPACE_GTT |
- MI_RESTORE_INHIBIT;
- }
-
- *cs++ = MI_NOOP;
- *cs++ = MI_SET_CONTEXT;
- *cs++ = i915_ggtt_offset(rq->hw_context->state) | flags;
- /*
- * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
- * WaMiSetContext_Hang:snb,ivb,vlv
- */
- *cs++ = MI_NOOP;
-
- if (IS_GEN(i915, 7)) {
- if (num_engines) {
- struct intel_engine_cs *signaller;
- i915_reg_t last_reg = {}; /* keep gcc quiet */
-
- *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
- for_each_engine(signaller, i915, id) {
- if (signaller == engine)
- continue;
-
- last_reg = RING_PSMI_CTL(signaller->mmio_base);
- *cs++ = i915_mmio_reg_offset(last_reg);
- *cs++ = _MASKED_BIT_DISABLE(
- GEN6_PSMI_SLEEP_MSG_DISABLE);
- }
-
- /* Insert a delay before the next switch! */
- *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
- *cs++ = i915_mmio_reg_offset(last_reg);
- *cs++ = i915_scratch_offset(rq->i915);
- *cs++ = MI_NOOP;
- }
- *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
- }
-
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static int remap_l3(struct i915_request *rq, int slice)
-{
- u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice];
- int i;
-
- if (!remap_info)
- return 0;
-
- cs = intel_ring_begin(rq, GEN7_L3LOG_SIZE/4 * 2 + 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- /*
- * Note: We do not worry about the concurrent register cacheline hang
- * here because no other code should access these registers other than
- * at initialization time.
- */
- *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
- for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
- *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
- *cs++ = remap_info[i];
- }
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static int switch_context(struct i915_request *rq)
-{
- struct intel_engine_cs *engine = rq->engine;
- struct i915_gem_context *ctx = rq->gem_context;
- struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
- unsigned int unwind_mm = 0;
- u32 hw_flags = 0;
- int ret, i;
-
- lockdep_assert_held(&rq->i915->drm.struct_mutex);
- GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
-
- if (ppgtt) {
- int loops;
-
- /*
- * Baytail takes a little more convincing that it really needs
- * to reload the PD between contexts. It is not just a little
- * longer, as adding more stalls after the load_pd_dir (i.e.
- * adding a long loop around flush_pd_dir) is not as effective
- * as reloading the PD umpteen times. 32 is derived from
- * experimentation (gem_exec_parallel/fds) and has no good
- * explanation.
- */
- loops = 1;
- if (engine->id == BCS0 && IS_VALLEYVIEW(engine->i915))
- loops = 32;
-
- do {
- ret = load_pd_dir(rq, ppgtt);
- if (ret)
- goto err;
- } while (--loops);
-
- if (ppgtt->pd_dirty_engines & engine->mask) {
- unwind_mm = engine->mask;
- ppgtt->pd_dirty_engines &= ~unwind_mm;
- hw_flags = MI_FORCE_RESTORE;
- }
- }
-
- if (rq->hw_context->state) {
- GEM_BUG_ON(engine->id != RCS0);
-
- /*
- * The kernel context(s) is treated as pure scratch and is not
- * expected to retain any state (as we sacrifice it during
- * suspend and on resume it may be corrupted). This is ok,
- * as nothing actually executes using the kernel context; it
- * is purely used for flushing user contexts.
- */
- if (i915_gem_context_is_kernel(ctx))
- hw_flags = MI_RESTORE_INHIBIT;
-
- ret = mi_set_context(rq, hw_flags);
- if (ret)
- goto err_mm;
- }
-
- if (ppgtt) {
- ret = engine->emit_flush(rq, EMIT_INVALIDATE);
- if (ret)
- goto err_mm;
-
- ret = flush_pd_dir(rq);
- if (ret)
- goto err_mm;
-
- /*
- * Not only do we need a full barrier (post-sync write) after
- * invalidating the TLBs, but we need to wait a little bit
- * longer. Whether this is merely delaying us, or the
- * subsequent flush is a key part of serialising with the
- * post-sync op, this extra pass appears vital before a
- * mm switch!
- */
- ret = engine->emit_flush(rq, EMIT_INVALIDATE);
- if (ret)
- goto err_mm;
-
- ret = engine->emit_flush(rq, EMIT_FLUSH);
- if (ret)
- goto err_mm;
- }
-
- if (ctx->remap_slice) {
- for (i = 0; i < MAX_L3_SLICES; i++) {
- if (!(ctx->remap_slice & BIT(i)))
- continue;
-
- ret = remap_l3(rq, i);
- if (ret)
- goto err_mm;
- }
-
- ctx->remap_slice = 0;
- }
-
- return 0;
-
-err_mm:
- if (unwind_mm)
- ppgtt->pd_dirty_engines |= unwind_mm;
-err:
- return ret;
-}
-
-static int ring_request_alloc(struct i915_request *request)
-{
- int ret;
-
- GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
- GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
-
- /*
- * Flush enough space to reduce the likelihood of waiting after
- * we start building the request - in which case we will just
- * have to repeat work.
- */
- request->reserved_space += LEGACY_REQUEST_SIZE;
-
- ret = switch_context(request);
- if (ret)
- return ret;
-
- /* Unconditionally invalidate GPU caches and TLBs. */
- ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
- if (ret)
- return ret;
-
- request->reserved_space -= LEGACY_REQUEST_SIZE;
- return 0;
-}
-
-static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
-{
- struct i915_request *target;
- long timeout;
-
- lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex);
-
- if (intel_ring_update_space(ring) >= bytes)
- return 0;
-
- GEM_BUG_ON(list_empty(&ring->request_list));
- list_for_each_entry(target, &ring->request_list, ring_link) {
- /* Would completion of this request free enough space? */
- if (bytes <= __intel_ring_space(target->postfix,
- ring->emit, ring->size))
- break;
- }
-
- if (WARN_ON(&target->ring_link == &ring->request_list))
- return -ENOSPC;
-
- timeout = i915_request_wait(target,
- I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
- MAX_SCHEDULE_TIMEOUT);
- if (timeout < 0)
- return timeout;
-
- i915_request_retire_upto(target);
-
- intel_ring_update_space(ring);
- GEM_BUG_ON(ring->space < bytes);
- return 0;
-}
-
-u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
-{
- struct intel_ring *ring = rq->ring;
- const unsigned int remain_usable = ring->effective_size - ring->emit;
- const unsigned int bytes = num_dwords * sizeof(u32);
- unsigned int need_wrap = 0;
- unsigned int total_bytes;
- u32 *cs;
-
- /* Packets must be qword aligned. */
- GEM_BUG_ON(num_dwords & 1);
-
- total_bytes = bytes + rq->reserved_space;
- GEM_BUG_ON(total_bytes > ring->effective_size);
-
- if (unlikely(total_bytes > remain_usable)) {
- const int remain_actual = ring->size - ring->emit;
-
- if (bytes > remain_usable) {
- /*
- * Not enough space for the basic request. So need to
- * flush out the remainder and then wait for
- * base + reserved.
- */
- total_bytes += remain_actual;
- need_wrap = remain_actual | 1;
- } else {
- /*
- * The base request will fit but the reserved space
- * falls off the end. So we don't need an immediate
- * wrap and only need to effectively wait for the
- * reserved size from the start of ringbuffer.
- */
- total_bytes = rq->reserved_space + remain_actual;
- }
- }
-
- if (unlikely(total_bytes > ring->space)) {
- int ret;
-
- /*
- * Space is reserved in the ringbuffer for finalising the
- * request, as that cannot be allowed to fail. During request
- * finalisation, reserved_space is set to 0 to stop the
- * overallocation and the assumption is that then we never need
- * to wait (which has the risk of failing with EINTR).
- *
- * See also i915_request_alloc() and i915_request_add().
- */
- GEM_BUG_ON(!rq->reserved_space);
-
- ret = wait_for_space(ring, total_bytes);
- if (unlikely(ret))
- return ERR_PTR(ret);
- }
-
- if (unlikely(need_wrap)) {
- need_wrap &= ~1;
- GEM_BUG_ON(need_wrap > ring->space);
- GEM_BUG_ON(ring->emit + need_wrap > ring->size);
- GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
-
- /* Fill the tail with MI_NOOP */
- memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
- ring->space -= need_wrap;
- ring->emit = 0;
- }
-
- GEM_BUG_ON(ring->emit > ring->size - bytes);
- GEM_BUG_ON(ring->space < bytes);
- cs = ring->vaddr + ring->emit;
- GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
- ring->emit += bytes;
- ring->space -= bytes;
-
- return cs;
-}
-
-/* Align the ring tail to a cacheline boundary */
-int intel_ring_cacheline_align(struct i915_request *rq)
-{
- int num_dwords;
- void *cs;
-
- num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
- if (num_dwords == 0)
- return 0;
-
- num_dwords = CACHELINE_DWORDS - num_dwords;
- GEM_BUG_ON(num_dwords & 1);
-
- cs = intel_ring_begin(rq, num_dwords);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
- intel_ring_advance(rq, cs);
-
- GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
- return 0;
-}
-
-static void gen6_bsd_submit_request(struct i915_request *request)
-{
- struct intel_uncore *uncore = request->engine->uncore;
-
- intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
-
- /* Every tail move must follow the sequence below */
-
- /* Disable notification that the ring is IDLE. The GT
- * will then assume that it is busy and bring it out of rc6.
- */
- intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL,
- _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
-
- /* Clear the context id. Here be magic! */
- intel_uncore_write64_fw(uncore, GEN6_BSD_RNCID, 0x0);
-
- /* Wait for the ring not to be idle, i.e. for it to wake up. */
- if (__intel_wait_for_register_fw(uncore,
- GEN6_BSD_SLEEP_PSMI_CONTROL,
- GEN6_BSD_SLEEP_INDICATOR,
- 0,
- 1000, 0, NULL))
- DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
-
- /* Now that the ring is fully powered up, update the tail */
- i9xx_submit_request(request);
-
- /* Let the ring send IDLE messages to the GT again,
- * and so let it sleep to conserve power when idle.
- */
- intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL,
- _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
-
- intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
-}
-
-static int mi_flush_dw(struct i915_request *rq, u32 flags)
-{
- u32 cmd, *cs;
-
- cs = intel_ring_begin(rq, 4);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- cmd = MI_FLUSH_DW;
-
- /*
- * We always require a command barrier so that subsequent
- * commands, such as breadcrumb interrupts, are strictly ordered
- * wrt the contents of the write cache being flushed to memory
- * (and thus being coherent from the CPU).
- */
- cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
-
- /*
- * Bspec vol 1c.3 - blitter engine command streamer:
- * "If ENABLED, all TLBs will be invalidated once the flush
- * operation is complete. This bit is only valid when the
- * Post-Sync Operation field is a value of 1h or 3h."
- */
- cmd |= flags;
-
- *cs++ = cmd;
- *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
- *cs++ = 0;
- *cs++ = MI_NOOP;
-
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static int gen6_flush_dw(struct i915_request *rq, u32 mode, u32 invflags)
-{
- return mi_flush_dw(rq, mode & EMIT_INVALIDATE ? invflags : 0);
-}
-
-static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode)
-{
- return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB | MI_INVALIDATE_BSD);
-}
-
-static int
-hsw_emit_bb_start(struct i915_request *rq,
- u64 offset, u32 len,
- unsigned int dispatch_flags)
-{
- u32 *cs;
-
- cs = intel_ring_begin(rq, 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
- 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW);
- /* bit0-7 is the length on GEN6+ */
- *cs++ = offset;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static int
-gen6_emit_bb_start(struct i915_request *rq,
- u64 offset, u32 len,
- unsigned int dispatch_flags)
-{
- u32 *cs;
-
- cs = intel_ring_begin(rq, 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
- 0 : MI_BATCH_NON_SECURE_I965);
- /* bit0-7 is the length on GEN6+ */
- *cs++ = offset;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-/* Blitter support (SandyBridge+) */
-
-static int gen6_ring_flush(struct i915_request *rq, u32 mode)
-{
- return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
-}
-
-static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
- struct intel_engine_cs *engine)
-{
- if (INTEL_GEN(dev_priv) >= 6) {
- engine->irq_enable = gen6_irq_enable;
- engine->irq_disable = gen6_irq_disable;
- } else if (INTEL_GEN(dev_priv) >= 5) {
- engine->irq_enable = gen5_irq_enable;
- engine->irq_disable = gen5_irq_disable;
- } else if (INTEL_GEN(dev_priv) >= 3) {
- engine->irq_enable = i9xx_irq_enable;
- engine->irq_disable = i9xx_irq_disable;
- } else {
- engine->irq_enable = i8xx_irq_enable;
- engine->irq_disable = i8xx_irq_disable;
- }
-}
-
-static void i9xx_set_default_submission(struct intel_engine_cs *engine)
-{
- engine->submit_request = i9xx_submit_request;
- engine->cancel_requests = cancel_requests;
-
- engine->park = NULL;
- engine->unpark = NULL;
-}
-
-static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
-{
- i9xx_set_default_submission(engine);
- engine->submit_request = gen6_bsd_submit_request;
-}
-
-static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
- struct intel_engine_cs *engine)
-{
- /* gen8+ are only supported with execlists */
- GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8);
-
- intel_ring_init_irq(dev_priv, engine);
-
- engine->init_hw = init_ring_common;
- engine->reset.prepare = reset_prepare;
- engine->reset.reset = reset_ring;
- engine->reset.finish = reset_finish;
-
- engine->cops = &ring_context_ops;
- engine->request_alloc = ring_request_alloc;
-
- /*
- * Using a global execution timeline; the previous final breadcrumb is
- * equivalent to our next initial bread so we can elide
- * engine->emit_init_breadcrumb().
- */
- engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb;
- if (IS_GEN(dev_priv, 5))
- engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
-
- engine->set_default_submission = i9xx_set_default_submission;
-
- if (INTEL_GEN(dev_priv) >= 6)
- engine->emit_bb_start = gen6_emit_bb_start;
- else if (INTEL_GEN(dev_priv) >= 4)
- engine->emit_bb_start = i965_emit_bb_start;
- else if (IS_I830(dev_priv) || IS_I845G(dev_priv))
- engine->emit_bb_start = i830_emit_bb_start;
- else
- engine->emit_bb_start = i915_emit_bb_start;
-}
-
-int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- int ret;
-
- intel_ring_default_vfuncs(dev_priv, engine);
-
- if (HAS_L3_DPF(dev_priv))
- engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
-
- engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
-
- if (INTEL_GEN(dev_priv) >= 7) {
- engine->init_context = intel_rcs_ctx_init;
- engine->emit_flush = gen7_render_ring_flush;
- engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb;
- } else if (IS_GEN(dev_priv, 6)) {
- engine->init_context = intel_rcs_ctx_init;
- engine->emit_flush = gen6_render_ring_flush;
- engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb;
- } else if (IS_GEN(dev_priv, 5)) {
- engine->emit_flush = gen4_render_ring_flush;
- } else {
- if (INTEL_GEN(dev_priv) < 4)
- engine->emit_flush = gen2_render_ring_flush;
- else
- engine->emit_flush = gen4_render_ring_flush;
- engine->irq_enable_mask = I915_USER_INTERRUPT;
- }
-
- if (IS_HASWELL(dev_priv))
- engine->emit_bb_start = hsw_emit_bb_start;
-
- engine->init_hw = init_render_ring;
-
- ret = intel_init_ring_buffer(engine);
- if (ret)
- return ret;
-
- return 0;
-}
-
-int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
-
- intel_ring_default_vfuncs(dev_priv, engine);
-
- if (INTEL_GEN(dev_priv) >= 6) {
- /* gen6 bsd needs a special wa for tail updates */
- if (IS_GEN(dev_priv, 6))
- engine->set_default_submission = gen6_bsd_set_default_submission;
- engine->emit_flush = gen6_bsd_ring_flush;
- engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
-
- if (IS_GEN(dev_priv, 6))
- engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
- else
- engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
- } else {
- engine->emit_flush = bsd_ring_flush;
- if (IS_GEN(dev_priv, 5))
- engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
- else
- engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
- }
-
- return intel_init_ring_buffer(engine);
-}
-
-int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
-
- GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
-
- intel_ring_default_vfuncs(dev_priv, engine);
-
- engine->emit_flush = gen6_ring_flush;
- engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
-
- if (IS_GEN(dev_priv, 6))
- engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
- else
- engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
-
- return intel_init_ring_buffer(engine);
-}
-
-int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
-
- GEM_BUG_ON(INTEL_GEN(dev_priv) < 7);
-
- intel_ring_default_vfuncs(dev_priv, engine);
-
- engine->emit_flush = gen6_ring_flush;
- engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
- engine->irq_enable = hsw_vebox_irq_enable;
- engine->irq_disable = hsw_vebox_irq_disable;
-
- engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
-
- return intel_init_ring_buffer(engine);
-}
+++ /dev/null
-/* SPDX-License-Identifier: MIT */
-#ifndef _INTEL_RINGBUFFER_H_
-#define _INTEL_RINGBUFFER_H_
-
-#include <drm/drm_util.h>
-
-#include <linux/hashtable.h>
-#include <linux/irq_work.h>
-#include <linux/random.h>
-#include <linux/seqlock.h>
-
-#include "i915_gem_batch_pool.h"
-#include "i915_pmu.h"
-#include "i915_reg.h"
-#include "i915_request.h"
-#include "i915_selftest.h"
-#include "i915_timeline.h"
-#include "intel_engine_types.h"
-#include "intel_gpu_commands.h"
-#include "intel_workarounds.h"
-
-struct drm_printer;
-
-/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
- * but keeps the logic simple. Indeed, the whole purpose of this macro is just
- * to give some inclination as to some of the magic values used in the various
- * workarounds!
- */
-#define CACHELINE_BYTES 64
-#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))
-
-/*
- * The register defines to be used with the following macros need to accept a
- * base param, e.g:
- *
- * REG_FOO(base) _MMIO((base) + <relative offset>)
- * ENGINE_READ(engine, REG_FOO);
- *
- * register arrays are to be defined and accessed as follows:
- *
- * REG_BAR(base, i) _MMIO((base) + <relative offset> + (i) * <shift>)
- * ENGINE_READ_IDX(engine, REG_BAR, i)
- */
-
-#define __ENGINE_REG_OP(op__, engine__, ...) \
- intel_uncore_##op__((engine__)->uncore, __VA_ARGS__)
-
-#define __ENGINE_READ_OP(op__, engine__, reg__) \
- __ENGINE_REG_OP(op__, (engine__), reg__((engine__)->mmio_base))
-
-#define ENGINE_READ16(...) __ENGINE_READ_OP(read16, __VA_ARGS__)
-#define ENGINE_READ(...) __ENGINE_READ_OP(read, __VA_ARGS__)
-#define ENGINE_READ_FW(...) __ENGINE_READ_OP(read_fw, __VA_ARGS__)
-#define ENGINE_POSTING_READ(...) __ENGINE_READ_OP(posting_read, __VA_ARGS__)
-
-#define ENGINE_READ64(engine__, lower_reg__, upper_reg__) \
- __ENGINE_REG_OP(read64_2x32, (engine__), \
- lower_reg__((engine__)->mmio_base), \
- upper_reg__((engine__)->mmio_base))
-
-#define ENGINE_READ_IDX(engine__, reg__, idx__) \
- __ENGINE_REG_OP(read, (engine__), reg__((engine__)->mmio_base, (idx__)))
-
-#define __ENGINE_WRITE_OP(op__, engine__, reg__, val__) \
- __ENGINE_REG_OP(op__, (engine__), reg__((engine__)->mmio_base), (val__))
-
-#define ENGINE_WRITE16(...) __ENGINE_WRITE_OP(write16, __VA_ARGS__)
-#define ENGINE_WRITE(...) __ENGINE_WRITE_OP(write, __VA_ARGS__)
-#define ENGINE_WRITE_FW(...) __ENGINE_WRITE_OP(write_fw, __VA_ARGS__)
-
-/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
- * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
- */
-enum intel_engine_hangcheck_action {
- ENGINE_IDLE = 0,
- ENGINE_WAIT,
- ENGINE_ACTIVE_SEQNO,
- ENGINE_ACTIVE_HEAD,
- ENGINE_ACTIVE_SUBUNITS,
- ENGINE_WAIT_KICK,
- ENGINE_DEAD,
-};
-
-static inline const char *
-hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
-{
- switch (a) {
- case ENGINE_IDLE:
- return "idle";
- case ENGINE_WAIT:
- return "wait";
- case ENGINE_ACTIVE_SEQNO:
- return "active seqno";
- case ENGINE_ACTIVE_HEAD:
- return "active head";
- case ENGINE_ACTIVE_SUBUNITS:
- return "active subunits";
- case ENGINE_WAIT_KICK:
- return "wait kick";
- case ENGINE_DEAD:
- return "dead";
- }
-
- return "unknown";
-}
-
-void intel_engines_set_scheduler_caps(struct drm_i915_private *i915);
-
-static inline bool __execlists_need_preempt(int prio, int last)
-{
- /*
- * Allow preemption of low -> normal -> high, but we do
- * not allow low priority tasks to preempt other low priority
- * tasks under the impression that latency for low priority
- * tasks does not matter (as much as background throughput),
- * so kiss.
- *
- * More naturally we would write
- * prio >= max(0, last);
- * except that we wish to prevent triggering preemption at the same
- * priority level: the task that is running should remain running
- * to preserve FIFO ordering of dependencies.
- */
- return prio > max(I915_PRIORITY_NORMAL - 1, last);
-}
-
-static inline void
-execlists_set_active(struct intel_engine_execlists *execlists,
- unsigned int bit)
-{
- __set_bit(bit, (unsigned long *)&execlists->active);
-}
-
-static inline bool
-execlists_set_active_once(struct intel_engine_execlists *execlists,
- unsigned int bit)
-{
- return !__test_and_set_bit(bit, (unsigned long *)&execlists->active);
-}
-
-static inline void
-execlists_clear_active(struct intel_engine_execlists *execlists,
- unsigned int bit)
-{
- __clear_bit(bit, (unsigned long *)&execlists->active);
-}
-
-static inline void
-execlists_clear_all_active(struct intel_engine_execlists *execlists)
-{
- execlists->active = 0;
-}
-
-static inline bool
-execlists_is_active(const struct intel_engine_execlists *execlists,
- unsigned int bit)
-{
- return test_bit(bit, (unsigned long *)&execlists->active);
-}
-
-void execlists_user_begin(struct intel_engine_execlists *execlists,
- const struct execlist_port *port);
-void execlists_user_end(struct intel_engine_execlists *execlists);
-
-void
-execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
-
-struct i915_request *
-execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
-
-static inline unsigned int
-execlists_num_ports(const struct intel_engine_execlists * const execlists)
-{
- return execlists->port_mask + 1;
-}
-
-static inline struct execlist_port *
-execlists_port_complete(struct intel_engine_execlists * const execlists,
- struct execlist_port * const port)
-{
- const unsigned int m = execlists->port_mask;
-
- GEM_BUG_ON(port_index(port, execlists) != 0);
- GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
-
- memmove(port, port + 1, m * sizeof(struct execlist_port));
- memset(port + m, 0, sizeof(struct execlist_port));
-
- return port;
-}
-
-static inline u32
-intel_read_status_page(const struct intel_engine_cs *engine, int reg)
-{
- /* Ensure that the compiler doesn't optimize away the load. */
- return READ_ONCE(engine->status_page.addr[reg]);
-}
-
-static inline void
-intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
-{
- /* Writing into the status page should be done sparingly. Since
- * we do when we are uncertain of the device state, we take a bit
- * of extra paranoia to try and ensure that the HWS takes the value
- * we give and that it doesn't end up trapped inside the CPU!
- */
- if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
- mb();
- clflush(&engine->status_page.addr[reg]);
- engine->status_page.addr[reg] = value;
- clflush(&engine->status_page.addr[reg]);
- mb();
- } else {
- WRITE_ONCE(engine->status_page.addr[reg], value);
- }
-}
-
-/*
- * Reads a dword out of the status page, which is written to from the command
- * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or
- * MI_STORE_DATA_IMM.
- *
- * The following dwords have a reserved meaning:
- * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes.
- * 0x04: ring 0 head pointer
- * 0x05: ring 1 head pointer (915-class)
- * 0x06: ring 2 head pointer (915-class)
- * 0x10-0x1b: Context status DWords (GM45)
- * 0x1f: Last written status offset. (GM45)
- * 0x20-0x2f: Reserved (Gen6+)
- *
- * The area from dword 0x30 to 0x3ff is available for driver usage.
- */
-#define I915_GEM_HWS_PREEMPT 0x32
-#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32))
-#define I915_GEM_HWS_HANGCHECK 0x34
-#define I915_GEM_HWS_HANGCHECK_ADDR (I915_GEM_HWS_HANGCHECK * sizeof(u32))
-#define I915_GEM_HWS_SEQNO 0x40
-#define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32))
-#define I915_GEM_HWS_SCRATCH 0x80
-#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32))
-
-#define I915_HWS_CSB_BUF0_INDEX 0x10
-#define I915_HWS_CSB_WRITE_INDEX 0x1f
-#define CNL_HWS_CSB_WRITE_INDEX 0x2f
-
-struct intel_ring *
-intel_engine_create_ring(struct intel_engine_cs *engine,
- struct i915_timeline *timeline,
- int size);
-int intel_ring_pin(struct intel_ring *ring);
-void intel_ring_reset(struct intel_ring *ring, u32 tail);
-unsigned int intel_ring_update_space(struct intel_ring *ring);
-void intel_ring_unpin(struct intel_ring *ring);
-void intel_ring_free(struct kref *ref);
-
-static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
-{
- kref_get(&ring->ref);
- return ring;
-}
-
-static inline void intel_ring_put(struct intel_ring *ring)
-{
- kref_put(&ring->ref, intel_ring_free);
-}
-
-void intel_engine_stop(struct intel_engine_cs *engine);
-void intel_engine_cleanup(struct intel_engine_cs *engine);
-
-int __must_check intel_ring_cacheline_align(struct i915_request *rq);
-
-u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
-
-static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
-{
- /* Dummy function.
- *
- * This serves as a placeholder in the code so that the reader
- * can compare against the preceding intel_ring_begin() and
- * check that the number of dwords emitted matches the space
- * reserved for the command packet (i.e. the value passed to
- * intel_ring_begin()).
- */
- GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
-}
-
-static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
-{
- return pos & (ring->size - 1);
-}
-
-static inline bool
-intel_ring_offset_valid(const struct intel_ring *ring,
- unsigned int pos)
-{
- if (pos & -ring->size) /* must be strictly within the ring */
- return false;
-
- if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
- return false;
-
- return true;
-}
-
-static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
-{
- /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
- u32 offset = addr - rq->ring->vaddr;
- GEM_BUG_ON(offset > rq->ring->size);
- return intel_ring_wrap(rq->ring, offset);
-}
-
-static inline void
-assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
-{
- GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
-
- /*
- * "Ring Buffer Use"
- * Gen2 BSpec "1. Programming Environment" / 1.4.4.6
- * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
- * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
- * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
- * same cacheline, the Head Pointer must not be greater than the Tail
- * Pointer."
- *
- * We use ring->head as the last known location of the actual RING_HEAD,
- * it may have advanced but in the worst case it is equally the same
- * as ring->head and so we should never program RING_TAIL to advance
- * into the same cacheline as ring->head.
- */
-#define cacheline(a) round_down(a, CACHELINE_BYTES)
- GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
- tail < ring->head);
-#undef cacheline
-}
-
-static inline unsigned int
-intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
-{
- /* Whilst writes to the tail are strictly order, there is no
- * serialisation between readers and the writers. The tail may be
- * read by i915_request_retire() just as it is being updated
- * by execlists, as although the breadcrumb is complete, the context
- * switch hasn't been seen.
- */
- assert_ring_tail_valid(ring, tail);
- ring->tail = tail;
- return tail;
-}
-
-static inline unsigned int
-__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
-{
- /*
- * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
- * same cacheline, the Head Pointer must not be greater than the Tail
- * Pointer."
- */
- GEM_BUG_ON(!is_power_of_2(size));
- return (head - tail - CACHELINE_BYTES) & (size - 1);
-}
-
-int intel_engine_setup_common(struct intel_engine_cs *engine);
-int intel_engine_init_common(struct intel_engine_cs *engine);
-void intel_engine_cleanup_common(struct intel_engine_cs *engine);
-
-int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
-int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
-int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
-int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
-
-int intel_engine_stop_cs(struct intel_engine_cs *engine);
-void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
-
-void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
-
-u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
-u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
-
-void intel_engine_get_instdone(struct intel_engine_cs *engine,
- struct intel_instdone *instdone);
-
-void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
-void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
-
-void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
-void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
-
-void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine);
-void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
-
-static inline void
-intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine)
-{
- irq_work_queue(&engine->breadcrumbs.irq_work);
-}
-
-void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine);
-
-void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
-void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
-
-void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
- struct drm_printer *p);
-
-static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
-{
- memset(batch, 0, 6 * sizeof(u32));
-
- batch[0] = GFX_OP_PIPE_CONTROL(6);
- batch[1] = flags;
- batch[2] = offset;
-
- return batch + 6;
-}
-
-static inline u32 *
-gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
-{
- /* We're using qword write, offset should be aligned to 8 bytes. */
- GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
-
- /* w/a for post sync ops following a GPGPU operation we
- * need a prior CS_STALL, which is emitted by the flush
- * following the batch.
- */
- *cs++ = GFX_OP_PIPE_CONTROL(6);
- *cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
- *cs++ = gtt_offset;
- *cs++ = 0;
- *cs++ = value;
- /* We're thrashing one dword of HWS. */
- *cs++ = 0;
-
- return cs;
-}
-
-static inline u32 *
-gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
-{
- /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
- GEM_BUG_ON(gtt_offset & (1 << 5));
- /* Offset should be aligned to 8 bytes for both (QW/DW) write types */
- GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
-
- *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW | flags;
- *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT;
- *cs++ = 0;
- *cs++ = value;
-
- return cs;
-}
-
-static inline void intel_engine_reset(struct intel_engine_cs *engine,
- bool stalled)
-{
- if (engine->reset.reset)
- engine->reset.reset(engine, stalled);
-}
-
-void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
-void intel_gt_resume(struct drm_i915_private *i915);
-
-bool intel_engine_is_idle(struct intel_engine_cs *engine);
-bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
-
-void intel_engine_lost_context(struct intel_engine_cs *engine);
-
-void intel_engines_park(struct drm_i915_private *i915);
-void intel_engines_unpark(struct drm_i915_private *i915);
-
-void intel_engines_reset_default_submission(struct drm_i915_private *i915);
-unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
-
-bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
-
-__printf(3, 4)
-void intel_engine_dump(struct intel_engine_cs *engine,
- struct drm_printer *m,
- const char *header, ...);
-
-struct intel_engine_cs *
-intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
-
-static inline void intel_engine_context_in(struct intel_engine_cs *engine)
-{
- unsigned long flags;
-
- if (READ_ONCE(engine->stats.enabled) == 0)
- return;
-
- write_seqlock_irqsave(&engine->stats.lock, flags);
-
- if (engine->stats.enabled > 0) {
- if (engine->stats.active++ == 0)
- engine->stats.start = ktime_get();
- GEM_BUG_ON(engine->stats.active == 0);
- }
-
- write_sequnlock_irqrestore(&engine->stats.lock, flags);
-}
-
-static inline void intel_engine_context_out(struct intel_engine_cs *engine)
-{
- unsigned long flags;
-
- if (READ_ONCE(engine->stats.enabled) == 0)
- return;
-
- write_seqlock_irqsave(&engine->stats.lock, flags);
-
- if (engine->stats.enabled > 0) {
- ktime_t last;
-
- if (engine->stats.active && --engine->stats.active == 0) {
- /*
- * Decrement the active context count and in case GPU
- * is now idle add up to the running total.
- */
- last = ktime_sub(ktime_get(), engine->stats.start);
-
- engine->stats.total = ktime_add(engine->stats.total,
- last);
- } else if (engine->stats.active == 0) {
- /*
- * After turning on engine stats, context out might be
- * the first event in which case we account from the
- * time stats gathering was turned on.
- */
- last = ktime_sub(ktime_get(), engine->stats.enabled_at);
-
- engine->stats.total = ktime_add(engine->stats.total,
- last);
- }
- }
-
- write_sequnlock_irqrestore(&engine->stats.lock, flags);
-}
-
-int intel_enable_engine_stats(struct intel_engine_cs *engine);
-void intel_disable_engine_stats(struct intel_engine_cs *engine);
-
-ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
-
-struct i915_request *
-intel_engine_find_active_request(struct intel_engine_cs *engine);
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-
-static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
-{
- if (!execlists->preempt_hang.inject_hang)
- return false;
-
- complete(&execlists->preempt_hang.completion);
- return true;
-}
-
-#else
-
-static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
-{
- return false;
-}
-
-#endif
-
-static inline u32
-intel_engine_next_hangcheck_seqno(struct intel_engine_cs *engine)
-{
- return engine->hangcheck.next_seqno =
- next_pseudo_random32(engine->hangcheck.next_seqno);
-}
-
-static inline u32
-intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine)
-{
- return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK);
-}
-
-#endif /* _INTEL_RINGBUFFER_H_ */
+++ /dev/null
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2019 Intel Corporation
- */
-
-#include "i915_drv.h"
-#include "intel_lrc_reg.h"
-#include "intel_sseu.h"
-
-u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
- const struct intel_sseu *req_sseu)
-{
- const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
- bool subslice_pg = sseu->has_subslice_pg;
- struct intel_sseu ctx_sseu;
- u8 slices, subslices;
- u32 rpcs = 0;
-
- /*
- * No explicit RPCS request is needed to ensure full
- * slice/subslice/EU enablement prior to Gen9.
- */
- if (INTEL_GEN(i915) < 9)
- return 0;
-
- /*
- * If i915/perf is active, we want a stable powergating configuration
- * on the system.
- *
- * We could choose full enablement, but on ICL we know there are use
- * cases which disable slices for functional, apart for performance
- * reasons. So in this case we select a known stable subset.
- */
- if (!i915->perf.oa.exclusive_stream) {
- ctx_sseu = *req_sseu;
- } else {
- ctx_sseu = intel_sseu_from_device_info(sseu);
-
- if (IS_GEN(i915, 11)) {
- /*
- * We only need subslice count so it doesn't matter
- * which ones we select - just turn off low bits in the
- * amount of half of all available subslices per slice.
- */
- ctx_sseu.subslice_mask =
- ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
- ctx_sseu.slice_mask = 0x1;
- }
- }
-
- slices = hweight8(ctx_sseu.slice_mask);
- subslices = hweight8(ctx_sseu.subslice_mask);
-
- /*
- * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
- * wide and Icelake has up to eight subslices, specfial programming is
- * needed in order to correctly enable all subslices.
- *
- * According to documentation software must consider the configuration
- * as 2x4x8 and hardware will translate this to 1x8x8.
- *
- * Furthemore, even though SScount is three bits, maximum documented
- * value for it is four. From this some rules/restrictions follow:
- *
- * 1.
- * If enabled subslice count is greater than four, two whole slices must
- * be enabled instead.
- *
- * 2.
- * When more than one slice is enabled, hardware ignores the subslice
- * count altogether.
- *
- * From these restrictions it follows that it is not possible to enable
- * a count of subslices between the SScount maximum of four restriction,
- * and the maximum available number on a particular SKU. Either all
- * subslices are enabled, or a count between one and four on the first
- * slice.
- */
- if (IS_GEN(i915, 11) &&
- slices == 1 &&
- subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
- GEM_BUG_ON(subslices & 1);
-
- subslice_pg = false;
- slices *= 2;
- }
-
- /*
- * Starting in Gen9, render power gating can leave
- * slice/subslice/EU in a partially enabled state. We
- * must make an explicit request through RPCS for full
- * enablement.
- */
- if (sseu->has_slice_pg) {
- u32 mask, val = slices;
-
- if (INTEL_GEN(i915) >= 11) {
- mask = GEN11_RPCS_S_CNT_MASK;
- val <<= GEN11_RPCS_S_CNT_SHIFT;
- } else {
- mask = GEN8_RPCS_S_CNT_MASK;
- val <<= GEN8_RPCS_S_CNT_SHIFT;
- }
-
- GEM_BUG_ON(val & ~mask);
- val &= mask;
-
- rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val;
- }
-
- if (subslice_pg) {
- u32 val = subslices;
-
- val <<= GEN8_RPCS_SS_CNT_SHIFT;
-
- GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK);
- val &= GEN8_RPCS_SS_CNT_MASK;
-
- rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val;
- }
-
- if (sseu->has_eu_pg) {
- u32 val;
-
- val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
- GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
- val &= GEN8_RPCS_EU_MIN_MASK;
-
- rpcs |= val;
-
- val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
- GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
- val &= GEN8_RPCS_EU_MAX_MASK;
-
- rpcs |= val;
-
- rpcs |= GEN8_RPCS_ENABLE;
- }
-
- return rpcs;
-}
+++ /dev/null
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2019 Intel Corporation
- */
-
-#ifndef __INTEL_SSEU_H__
-#define __INTEL_SSEU_H__
-
-#include <linux/types.h>
-
-struct drm_i915_private;
-
-#define GEN_MAX_SLICES (6) /* CNL upper bound */
-#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */
-
-struct sseu_dev_info {
- u8 slice_mask;
- u8 subslice_mask[GEN_MAX_SLICES];
- u16 eu_total;
- u8 eu_per_subslice;
- u8 min_eu_in_pool;
- /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
- u8 subslice_7eu[3];
- u8 has_slice_pg:1;
- u8 has_subslice_pg:1;
- u8 has_eu_pg:1;
-
- /* Topology fields */
- u8 max_slices;
- u8 max_subslices;
- u8 max_eus_per_subslice;
-
- /* We don't have more than 8 eus per subslice at the moment and as we
- * store eus enabled using bits, no need to multiply by eus per
- * subslice.
- */
- u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
-};
-
-/*
- * Powergating configuration for a particular (context,engine).
- */
-struct intel_sseu {
- u8 slice_mask;
- u8 subslice_mask;
- u8 min_eus_per_subslice;
- u8 max_eus_per_subslice;
-};
-
-static inline struct intel_sseu
-intel_sseu_from_device_info(const struct sseu_dev_info *sseu)
-{
- struct intel_sseu value = {
- .slice_mask = sseu->slice_mask,
- .subslice_mask = sseu->subslice_mask[0],
- .min_eus_per_subslice = sseu->max_eus_per_subslice,
- .max_eus_per_subslice = sseu->max_eus_per_subslice,
- };
-
- return value;
-}
-
-u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
- const struct intel_sseu *req_sseu);
-
-#endif /* __INTEL_SSEU_H__ */
*
*/
+#include "gt/intel_reset.h"
#include "intel_uc.h"
#include "intel_guc_submission.h"
#include "intel_guc.h"
#include "i915_drv.h"
-#include "i915_reset.h"
static void guc_free_load_err_log(struct intel_guc *guc);
+++ /dev/null
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2014-2018 Intel Corporation
- */
-
-#include "i915_drv.h"
-#include "intel_workarounds.h"
-
-/**
- * DOC: Hardware workarounds
- *
- * This file is intended as a central place to implement most [1]_ of the
- * required workarounds for hardware to work as originally intended. They fall
- * in five basic categories depending on how/when they are applied:
- *
- * - Workarounds that touch registers that are saved/restored to/from the HW
- * context image. The list is emitted (via Load Register Immediate commands)
- * everytime a new context is created.
- * - GT workarounds. The list of these WAs is applied whenever these registers
- * revert to default values (on GPU reset, suspend/resume [2]_, etc..).
- * - Display workarounds. The list is applied during display clock-gating
- * initialization.
- * - Workarounds that whitelist a privileged register, so that UMDs can manage
- * them directly. This is just a special case of a MMMIO workaround (as we
- * write the list of these to/be-whitelisted registers to some special HW
- * registers).
- * - Workaround batchbuffers, that get executed automatically by the hardware
- * on every HW context restore.
- *
- * .. [1] Please notice that there are other WAs that, due to their nature,
- * cannot be applied from a central place. Those are peppered around the rest
- * of the code, as needed.
- *
- * .. [2] Technically, some registers are powercontext saved & restored, so they
- * survive a suspend/resume. In practice, writing them again is not too
- * costly and simplifies things. We can revisit this in the future.
- *
- * Layout
- * ''''''
- *
- * Keep things in this file ordered by WA type, as per the above (context, GT,
- * display, register whitelist, batchbuffer). Then, inside each type, keep the
- * following order:
- *
- * - Infrastructure functions and macros
- * - WAs per platform in standard gen/chrono order
- * - Public functions to init or apply the given workaround type.
- */
-
-static void wa_init_start(struct i915_wa_list *wal, const char *name)
-{
- wal->name = name;
-}
-
-#define WA_LIST_CHUNK (1 << 4)
-
-static void wa_init_finish(struct i915_wa_list *wal)
-{
- /* Trim unused entries. */
- if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
- struct i915_wa *list = kmemdup(wal->list,
- wal->count * sizeof(*list),
- GFP_KERNEL);
-
- if (list) {
- kfree(wal->list);
- wal->list = list;
- }
- }
-
- if (!wal->count)
- return;
-
- DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n",
- wal->wa_count, wal->name);
-}
-
-static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
-{
- unsigned int addr = i915_mmio_reg_offset(wa->reg);
- unsigned int start = 0, end = wal->count;
- const unsigned int grow = WA_LIST_CHUNK;
- struct i915_wa *wa_;
-
- GEM_BUG_ON(!is_power_of_2(grow));
-
- if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
- struct i915_wa *list;
-
- list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
- GFP_KERNEL);
- if (!list) {
- DRM_ERROR("No space for workaround init!\n");
- return;
- }
-
- if (wal->list)
- memcpy(list, wal->list, sizeof(*wa) * wal->count);
-
- wal->list = list;
- }
-
- while (start < end) {
- unsigned int mid = start + (end - start) / 2;
-
- if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
- start = mid + 1;
- } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
- end = mid;
- } else {
- wa_ = &wal->list[mid];
-
- if ((wa->mask & ~wa_->mask) == 0) {
- DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
- i915_mmio_reg_offset(wa_->reg),
- wa_->mask, wa_->val);
-
- wa_->val &= ~wa->mask;
- }
-
- wal->wa_count++;
- wa_->val |= wa->val;
- wa_->mask |= wa->mask;
- wa_->read |= wa->read;
- return;
- }
- }
-
- wal->wa_count++;
- wa_ = &wal->list[wal->count++];
- *wa_ = *wa;
-
- while (wa_-- > wal->list) {
- GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
- i915_mmio_reg_offset(wa_[1].reg));
- if (i915_mmio_reg_offset(wa_[1].reg) >
- i915_mmio_reg_offset(wa_[0].reg))
- break;
-
- swap(wa_[1], wa_[0]);
- }
-}
-
-static void
-wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
- u32 val)
-{
- struct i915_wa wa = {
- .reg = reg,
- .mask = mask,
- .val = val,
- .read = mask,
- };
-
- _wa_add(wal, &wa);
-}
-
-static void
-wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
-{
- wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
-}
-
-static void
-wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
-{
- wa_write_masked_or(wal, reg, ~0, val);
-}
-
-static void
-wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
-{
- wa_write_masked_or(wal, reg, val, val);
-}
-
-static void
-ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
-{
- struct i915_wa wa = {
- .reg = reg,
- .mask = mask,
- .val = val,
- /* Bonkers HW, skip verifying */
- };
-
- _wa_add(wal, &wa);
-}
-
-#define WA_SET_BIT_MASKED(addr, mask) \
- wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
-
-#define WA_CLR_BIT_MASKED(addr, mask) \
- wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
-
-#define WA_SET_FIELD_MASKED(addr, mask, value) \
- wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
-
-static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine)
-{
- struct i915_wa_list *wal = &engine->ctx_wa_list;
-
- WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
-
- /* WaDisableAsyncFlipPerfMode:bdw,chv */
- WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
-
- /* WaDisablePartialInstShootdown:bdw,chv */
- WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
- PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
-
- /* Use Force Non-Coherent whenever executing a 3D context. This is a
- * workaround for for a possible hang in the unlikely event a TLB
- * invalidation occurs during a PSD flush.
- */
- /* WaForceEnableNonCoherent:bdw,chv */
- /* WaHdcDisableFetchWhenMasked:bdw,chv */
- WA_SET_BIT_MASKED(HDC_CHICKEN0,
- HDC_DONOT_FETCH_MEM_WHEN_MASKED |
- HDC_FORCE_NON_COHERENT);
-
- /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
- * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
- * polygons in the same 8x4 pixel/sample area to be processed without
- * stalling waiting for the earlier ones to write to Hierarchical Z
- * buffer."
- *
- * This optimization is off by default for BDW and CHV; turn it on.
- */
- WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
-
- /* Wa4x4STCOptimizationDisable:bdw,chv */
- WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
-
- /*
- * BSpec recommends 8x4 when MSAA is used,
- * however in practice 16x4 seems fastest.
- *
- * Note that PS/WM thread counts depend on the WIZ hashing
- * disable bit, which we don't touch here, but it's good
- * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
- */
- WA_SET_FIELD_MASKED(GEN7_GT_MODE,
- GEN6_WIZ_HASHING_MASK,
- GEN6_WIZ_HASHING_16x4);
-}
-
-static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
- struct i915_wa_list *wal = &engine->ctx_wa_list;
-
- gen8_ctx_workarounds_init(engine);
-
- /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
- WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
-
- /* WaDisableDopClockGating:bdw
- *
- * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
- * to disable EUTC clock gating.
- */
- WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
- DOP_CLOCK_GATING_DISABLE);
-
- WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
- GEN8_SAMPLER_POWER_BYPASS_DIS);
-
- WA_SET_BIT_MASKED(HDC_CHICKEN0,
- /* WaForceContextSaveRestoreNonCoherent:bdw */
- HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
- /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
- (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
-}
-
-static void chv_ctx_workarounds_init(struct intel_engine_cs *engine)
-{
- struct i915_wa_list *wal = &engine->ctx_wa_list;
-
- gen8_ctx_workarounds_init(engine);
-
- /* WaDisableThreadStallDopClockGating:chv */
- WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
-
- /* Improve HiZ throughput on CHV. */
- WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
-}
-
-static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
- struct i915_wa_list *wal = &engine->ctx_wa_list;
-
- if (HAS_LLC(i915)) {
- /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
- *
- * Must match Display Engine. See
- * WaCompressedResourceDisplayNewHashMode.
- */
- WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
- GEN9_PBE_COMPRESSED_HASH_SELECTION);
- WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
- GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
- }
-
- /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
- /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
- WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
- FLOW_CONTROL_ENABLE |
- PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
-
- /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
- if (!IS_COFFEELAKE(i915))
- WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
- GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
-
- /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
- /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
- WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
- GEN9_ENABLE_YV12_BUGFIX |
- GEN9_ENABLE_GPGPU_PREEMPTION);
-
- /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
- /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
- WA_SET_BIT_MASKED(CACHE_MODE_1,
- GEN8_4x4_STC_OPTIMIZATION_DISABLE |
- GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
-
- /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
- WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
- GEN9_CCS_TLB_PREFETCH_ENABLE);
-
- /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
- WA_SET_BIT_MASKED(HDC_CHICKEN0,
- HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
- HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
-
- /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
- * both tied to WaForceContextSaveRestoreNonCoherent
- * in some hsds for skl. We keep the tie for all gen9. The
- * documentation is a bit hazy and so we want to get common behaviour,
- * even though there is no clear evidence we would need both on kbl/bxt.
- * This area has been source of system hangs so we play it safe
- * and mimic the skl regardless of what bspec says.
- *
- * Use Force Non-Coherent whenever executing a 3D context. This
- * is a workaround for a possible hang in the unlikely event
- * a TLB invalidation occurs during a PSD flush.
- */
-
- /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
- WA_SET_BIT_MASKED(HDC_CHICKEN0,
- HDC_FORCE_NON_COHERENT);
-
- /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
- if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915))
- WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
- GEN8_SAMPLER_POWER_BYPASS_DIS);
-
- /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
- WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
-
- /*
- * Supporting preemption with fine-granularity requires changes in the
- * batch buffer programming. Since we can't break old userspace, we
- * need to set our default preemption level to safe value. Userspace is
- * still able to use more fine-grained preemption levels, since in
- * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
- * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
- * not real HW workarounds, but merely a way to start using preemption
- * while maintaining old contract with userspace.
- */
-
- /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
- WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
-
- /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
- WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
- GEN9_PREEMPT_GPGPU_LEVEL_MASK,
- GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
-
- /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
- if (IS_GEN9_LP(i915))
- WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
-}
-
-static void skl_tune_iz_hashing(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
- struct i915_wa_list *wal = &engine->ctx_wa_list;
- u8 vals[3] = { 0, 0, 0 };
- unsigned int i;
-
- for (i = 0; i < 3; i++) {
- u8 ss;
-
- /*
- * Only consider slices where one, and only one, subslice has 7
- * EUs
- */
- if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]))
- continue;
-
- /*
- * subslice_7eu[i] != 0 (because of the check above) and
- * ss_max == 4 (maximum number of subslices possible per slice)
- *
- * -> 0 <= ss <= 3;
- */
- ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1;
- vals[i] = 3 - ss;
- }
-
- if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
- return;
-
- /* Tune IZ hashing. See intel_device_info_runtime_init() */
- WA_SET_FIELD_MASKED(GEN7_GT_MODE,
- GEN9_IZ_HASHING_MASK(2) |
- GEN9_IZ_HASHING_MASK(1) |
- GEN9_IZ_HASHING_MASK(0),
- GEN9_IZ_HASHING(2, vals[2]) |
- GEN9_IZ_HASHING(1, vals[1]) |
- GEN9_IZ_HASHING(0, vals[0]));
-}
-
-static void skl_ctx_workarounds_init(struct intel_engine_cs *engine)
-{
- gen9_ctx_workarounds_init(engine);
- skl_tune_iz_hashing(engine);
-}
-
-static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine)
-{
- struct i915_wa_list *wal = &engine->ctx_wa_list;
-
- gen9_ctx_workarounds_init(engine);
-
- /* WaDisableThreadStallDopClockGating:bxt */
- WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
- STALL_DOP_GATING_DISABLE);
-
- /* WaToEnableHwFixForPushConstHWBug:bxt */
- WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
- GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
-}
-
-static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
- struct i915_wa_list *wal = &engine->ctx_wa_list;
-
- gen9_ctx_workarounds_init(engine);
-
- /* WaToEnableHwFixForPushConstHWBug:kbl */
- if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
- WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
- GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
-
- /* WaDisableSbeCacheDispatchPortSharing:kbl */
- WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
- GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
-}
-
-static void glk_ctx_workarounds_init(struct intel_engine_cs *engine)
-{
- struct i915_wa_list *wal = &engine->ctx_wa_list;
-
- gen9_ctx_workarounds_init(engine);
-
- /* WaToEnableHwFixForPushConstHWBug:glk */
- WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
- GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
-}
-
-static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine)
-{
- struct i915_wa_list *wal = &engine->ctx_wa_list;
-
- gen9_ctx_workarounds_init(engine);
-
- /* WaToEnableHwFixForPushConstHWBug:cfl */
- WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
- GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
-
- /* WaDisableSbeCacheDispatchPortSharing:cfl */
- WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
- GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
-}
-
-static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
- struct i915_wa_list *wal = &engine->ctx_wa_list;
-
- /* WaForceContextSaveRestoreNonCoherent:cnl */
- WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
- HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
-
- /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
- if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
- WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
-
- /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
- WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
- GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
-
- /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
- if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
- WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
- GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
-
- /* WaPushConstantDereferenceHoldDisable:cnl */
- WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
-
- /* FtrEnableFastAnisoL1BankingFix:cnl */
- WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
-
- /* WaDisable3DMidCmdPreemption:cnl */
- WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
-
- /* WaDisableGPGPUMidCmdPreemption:cnl */
- WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
- GEN9_PREEMPT_GPGPU_LEVEL_MASK,
- GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
-
- /* WaDisableEarlyEOT:cnl */
- WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
-}
-
-static void icl_ctx_workarounds_init(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
- struct i915_wa_list *wal = &engine->ctx_wa_list;
-
- /* Wa_1604370585:icl (pre-prod)
- * Formerly known as WaPushConstantDereferenceHoldDisable
- */
- if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
- WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
- PUSH_CONSTANT_DEREF_DISABLE);
-
- /* WaForceEnableNonCoherent:icl
- * This is not the same workaround as in early Gen9 platforms, where
- * lacking this could cause system hangs, but coherency performance
- * overhead is high and only a few compute workloads really need it
- * (the register is whitelisted in hardware now, so UMDs can opt in
- * for coherency if they have a good reason).
- */
- WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
-
- /* Wa_2006611047:icl (pre-prod)
- * Formerly known as WaDisableImprovedTdlClkGating
- */
- if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
- WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
- GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
-
- /* WaEnableStateCacheRedirectToCS:icl */
- WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN1,
- GEN11_STATE_CACHE_REDIRECT_TO_CS);
-
- /* Wa_2006665173:icl (pre-prod) */
- if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
- WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
- GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
-
- /* WaEnableFloatBlendOptimization:icl */
- wa_write_masked_or(wal,
- GEN10_CACHE_MODE_SS,
- 0, /* write-only, so skip validation */
- _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
-
- /* WaDisableGPGPUMidThreadPreemption:icl */
- WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
- GEN9_PREEMPT_GPGPU_LEVEL_MASK,
- GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
-}
-
-void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
- struct i915_wa_list *wal = &engine->ctx_wa_list;
-
- wa_init_start(wal, "context");
-
- if (IS_GEN(i915, 11))
- icl_ctx_workarounds_init(engine);
- else if (IS_CANNONLAKE(i915))
- cnl_ctx_workarounds_init(engine);
- else if (IS_COFFEELAKE(i915))
- cfl_ctx_workarounds_init(engine);
- else if (IS_GEMINILAKE(i915))
- glk_ctx_workarounds_init(engine);
- else if (IS_KABYLAKE(i915))
- kbl_ctx_workarounds_init(engine);
- else if (IS_BROXTON(i915))
- bxt_ctx_workarounds_init(engine);
- else if (IS_SKYLAKE(i915))
- skl_ctx_workarounds_init(engine);
- else if (IS_CHERRYVIEW(i915))
- chv_ctx_workarounds_init(engine);
- else if (IS_BROADWELL(i915))
- bdw_ctx_workarounds_init(engine);
- else if (INTEL_GEN(i915) < 8)
- return;
- else
- MISSING_CASE(INTEL_GEN(i915));
-
- wa_init_finish(wal);
-}
-
-int intel_engine_emit_ctx_wa(struct i915_request *rq)
-{
- struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
- struct i915_wa *wa;
- unsigned int i;
- u32 *cs;
- int ret;
-
- if (wal->count == 0)
- return 0;
-
- ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
- if (ret)
- return ret;
-
- cs = intel_ring_begin(rq, (wal->count * 2 + 2));
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
- for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
- *cs++ = i915_mmio_reg_offset(wa->reg);
- *cs++ = wa->val;
- }
- *cs++ = MI_NOOP;
-
- intel_ring_advance(rq, cs);
-
- ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
- if (ret)
- return ret;
-
- return 0;
-}
-
-static void
-gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
-{
- /* WaDisableKillLogic:bxt,skl,kbl */
- if (!IS_COFFEELAKE(i915))
- wa_write_or(wal,
- GAM_ECOCHK,
- ECOCHK_DIS_TLB);
-
- if (HAS_LLC(i915)) {
- /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
- *
- * Must match Display Engine. See
- * WaCompressedResourceDisplayNewHashMode.
- */
- wa_write_or(wal,
- MMCD_MISC_CTRL,
- MMCD_PCLA | MMCD_HOTSPOT_EN);
- }
-
- /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
- wa_write_or(wal,
- GAM_ECOCHK,
- BDW_DISABLE_HDC_INVALIDATION);
-}
-
-static void
-skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
-{
- gen9_gt_workarounds_init(i915, wal);
-
- /* WaDisableGafsUnitClkGating:skl */
- wa_write_or(wal,
- GEN7_UCGCTL4,
- GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
-
- /* WaInPlaceDecompressionHang:skl */
- if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
- wa_write_or(wal,
- GEN9_GAMT_ECO_REG_RW_IA,
- GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
-}
-
-static void
-bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
-{
- gen9_gt_workarounds_init(i915, wal);
-
- /* WaInPlaceDecompressionHang:bxt */
- wa_write_or(wal,
- GEN9_GAMT_ECO_REG_RW_IA,
- GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
-}
-
-static void
-kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
-{
- gen9_gt_workarounds_init(i915, wal);
-
- /* WaDisableDynamicCreditSharing:kbl */
- if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
- wa_write_or(wal,
- GAMT_CHKN_BIT_REG,
- GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
-
- /* WaDisableGafsUnitClkGating:kbl */
- wa_write_or(wal,
- GEN7_UCGCTL4,
- GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
-
- /* WaInPlaceDecompressionHang:kbl */
- wa_write_or(wal,
- GEN9_GAMT_ECO_REG_RW_IA,
- GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
-}
-
-static void
-glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
-{
- gen9_gt_workarounds_init(i915, wal);
-}
-
-static void
-cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
-{
- gen9_gt_workarounds_init(i915, wal);
-
- /* WaDisableGafsUnitClkGating:cfl */
- wa_write_or(wal,
- GEN7_UCGCTL4,
- GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
-
- /* WaInPlaceDecompressionHang:cfl */
- wa_write_or(wal,
- GEN9_GAMT_ECO_REG_RW_IA,
- GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
-}
-
-static void
-wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
-{
- const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
- u32 mcr_slice_subslice_mask;
-
- /*
- * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
- * L3Banks could be fused off in single slice scenario. If that is
- * the case, we might need to program MCR select to a valid L3Bank
- * by default, to make sure we correctly read certain registers
- * later on (in the range 0xB100 - 0xB3FF).
- * This might be incompatible with
- * WaProgramMgsrForCorrectSliceSpecificMmioReads.
- * Fortunately, this should not happen in production hardware, so
- * we only assert that this is the case (instead of implementing
- * something more complex that requires checking the range of every
- * MMIO read).
- */
- if (INTEL_GEN(i915) >= 10 &&
- is_power_of_2(sseu->slice_mask)) {
- /*
- * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches
- * enabled subslice, no need to redirect MCR packet
- */
- u32 slice = fls(sseu->slice_mask);
- u32 fuse3 =
- intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3);
- u8 ss_mask = sseu->subslice_mask[slice];
-
- u8 enabled_mask = (ss_mask | ss_mask >>
- GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
- u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK;
-
- /*
- * Production silicon should have matched L3Bank and
- * subslice enabled
- */
- WARN_ON((enabled_mask & disabled_mask) != enabled_mask);
- }
-
- if (INTEL_GEN(i915) >= 11)
- mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
- GEN11_MCR_SUBSLICE_MASK;
- else
- mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
- GEN8_MCR_SUBSLICE_MASK;
- /*
- * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
- * Before any MMIO read into slice/subslice specific registers, MCR
- * packet control register needs to be programmed to point to any
- * enabled s/ss pair. Otherwise, incorrect values will be returned.
- * This means each subsequent MMIO read will be forwarded to an
- * specific s/ss combination, but this is OK since these registers
- * are consistent across s/ss in almost all cases. In the rare
- * occasions, such as INSTDONE, where this value is dependent
- * on s/ss combo, the read should be done with read_subslice_reg.
- */
- wa_write_masked_or(wal,
- GEN8_MCR_SELECTOR,
- mcr_slice_subslice_mask,
- intel_calculate_mcr_s_ss_select(i915));
-}
-
-static void
-cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
-{
- wa_init_mcr(i915, wal);
-
- /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
- if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
- wa_write_or(wal,
- GAMT_CHKN_BIT_REG,
- GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
-
- /* WaInPlaceDecompressionHang:cnl */
- wa_write_or(wal,
- GEN9_GAMT_ECO_REG_RW_IA,
- GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
-}
-
-static void
-icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
-{
- wa_init_mcr(i915, wal);
-
- /* WaInPlaceDecompressionHang:icl */
- wa_write_or(wal,
- GEN9_GAMT_ECO_REG_RW_IA,
- GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
-
- /* WaModifyGamTlbPartitioning:icl */
- wa_write_masked_or(wal,
- GEN11_GACB_PERF_CTRL,
- GEN11_HASH_CTRL_MASK,
- GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
-
- /* Wa_1405766107:icl
- * Formerly known as WaCL2SFHalfMaxAlloc
- */
- wa_write_or(wal,
- GEN11_LSN_UNSLCVC,
- GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
- GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
-
- /* Wa_220166154:icl
- * Formerly known as WaDisCtxReload
- */
- wa_write_or(wal,
- GEN8_GAMW_ECO_DEV_RW_IA,
- GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
-
- /* Wa_1405779004:icl (pre-prod) */
- if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
- wa_write_or(wal,
- SLICE_UNIT_LEVEL_CLKGATE,
- MSCUNIT_CLKGATE_DIS);
-
- /* Wa_1406680159:icl */
- wa_write_or(wal,
- SUBSLICE_UNIT_LEVEL_CLKGATE,
- GWUNIT_CLKGATE_DIS);
-
- /* Wa_1406838659:icl (pre-prod) */
- if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
- wa_write_or(wal,
- INF_UNIT_LEVEL_CLKGATE,
- CGPSF_CLKGATE_DIS);
-
- /* Wa_1406463099:icl
- * Formerly known as WaGamTlbPendError
- */
- wa_write_or(wal,
- GAMT_CHKN_BIT_REG,
- GAMT_CHKN_DISABLE_L3_COH_PIPE);
-}
-
-static void
-gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
-{
- if (IS_GEN(i915, 11))
- icl_gt_workarounds_init(i915, wal);
- else if (IS_CANNONLAKE(i915))
- cnl_gt_workarounds_init(i915, wal);
- else if (IS_COFFEELAKE(i915))
- cfl_gt_workarounds_init(i915, wal);
- else if (IS_GEMINILAKE(i915))
- glk_gt_workarounds_init(i915, wal);
- else if (IS_KABYLAKE(i915))
- kbl_gt_workarounds_init(i915, wal);
- else if (IS_BROXTON(i915))
- bxt_gt_workarounds_init(i915, wal);
- else if (IS_SKYLAKE(i915))
- skl_gt_workarounds_init(i915, wal);
- else if (INTEL_GEN(i915) <= 8)
- return;
- else
- MISSING_CASE(INTEL_GEN(i915));
-}
-
-void intel_gt_init_workarounds(struct drm_i915_private *i915)
-{
- struct i915_wa_list *wal = &i915->gt_wa_list;
-
- wa_init_start(wal, "GT");
- gt_init_workarounds(i915, wal);
- wa_init_finish(wal);
-}
-
-static enum forcewake_domains
-wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
-{
- enum forcewake_domains fw = 0;
- struct i915_wa *wa;
- unsigned int i;
-
- for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
- fw |= intel_uncore_forcewake_for_reg(uncore,
- wa->reg,
- FW_REG_READ |
- FW_REG_WRITE);
-
- return fw;
-}
-
-static bool
-wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
-{
- if ((cur ^ wa->val) & wa->read) {
- DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
- name, from, i915_mmio_reg_offset(wa->reg),
- cur, cur & wa->read,
- wa->val, wa->mask);
-
- return false;
- }
-
- return true;
-}
-
-static void
-wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
-{
- enum forcewake_domains fw;
- unsigned long flags;
- struct i915_wa *wa;
- unsigned int i;
-
- if (!wal->count)
- return;
-
- fw = wal_get_fw_for_rmw(uncore, wal);
-
- spin_lock_irqsave(&uncore->lock, flags);
- intel_uncore_forcewake_get__locked(uncore, fw);
-
- for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
- intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val);
- if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
- wa_verify(wa,
- intel_uncore_read_fw(uncore, wa->reg),
- wal->name, "application");
- }
-
- intel_uncore_forcewake_put__locked(uncore, fw);
- spin_unlock_irqrestore(&uncore->lock, flags);
-}
-
-void intel_gt_apply_workarounds(struct drm_i915_private *i915)
-{
- wa_list_apply(&i915->uncore, &i915->gt_wa_list);
-}
-
-static bool wa_list_verify(struct intel_uncore *uncore,
- const struct i915_wa_list *wal,
- const char *from)
-{
- struct i915_wa *wa;
- unsigned int i;
- bool ok = true;
-
- for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
- ok &= wa_verify(wa,
- intel_uncore_read(uncore, wa->reg),
- wal->name, from);
-
- return ok;
-}
-
-bool intel_gt_verify_workarounds(struct drm_i915_private *i915,
- const char *from)
-{
- return wa_list_verify(&i915->uncore, &i915->gt_wa_list, from);
-}
-
-static void
-whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
-{
- struct i915_wa wa = {
- .reg = reg
- };
-
- if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
- return;
-
- _wa_add(wal, &wa);
-}
-
-static void gen9_whitelist_build(struct i915_wa_list *w)
-{
- /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
- whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
-
- /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
- whitelist_reg(w, GEN8_CS_CHICKEN1);
-
- /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
- whitelist_reg(w, GEN8_HDC_CHICKEN1);
-}
-
-static void skl_whitelist_build(struct i915_wa_list *w)
-{
- gen9_whitelist_build(w);
-
- /* WaDisableLSQCROPERFforOCL:skl */
- whitelist_reg(w, GEN8_L3SQCREG4);
-}
-
-static void bxt_whitelist_build(struct i915_wa_list *w)
-{
- gen9_whitelist_build(w);
-}
-
-static void kbl_whitelist_build(struct i915_wa_list *w)
-{
- gen9_whitelist_build(w);
-
- /* WaDisableLSQCROPERFforOCL:kbl */
- whitelist_reg(w, GEN8_L3SQCREG4);
-}
-
-static void glk_whitelist_build(struct i915_wa_list *w)
-{
- gen9_whitelist_build(w);
-
- /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
- whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
-}
-
-static void cfl_whitelist_build(struct i915_wa_list *w)
-{
- gen9_whitelist_build(w);
-}
-
-static void cnl_whitelist_build(struct i915_wa_list *w)
-{
- /* WaEnablePreemptionGranularityControlByUMD:cnl */
- whitelist_reg(w, GEN8_CS_CHICKEN1);
-}
-
-static void icl_whitelist_build(struct i915_wa_list *w)
-{
- /* WaAllowUMDToModifyHalfSliceChicken7:icl */
- whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
-
- /* WaAllowUMDToModifySamplerMode:icl */
- whitelist_reg(w, GEN10_SAMPLER_MODE);
-}
-
-void intel_engine_init_whitelist(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
- struct i915_wa_list *w = &engine->whitelist;
-
- GEM_BUG_ON(engine->id != RCS0);
-
- wa_init_start(w, "whitelist");
-
- if (IS_GEN(i915, 11))
- icl_whitelist_build(w);
- else if (IS_CANNONLAKE(i915))
- cnl_whitelist_build(w);
- else if (IS_COFFEELAKE(i915))
- cfl_whitelist_build(w);
- else if (IS_GEMINILAKE(i915))
- glk_whitelist_build(w);
- else if (IS_KABYLAKE(i915))
- kbl_whitelist_build(w);
- else if (IS_BROXTON(i915))
- bxt_whitelist_build(w);
- else if (IS_SKYLAKE(i915))
- skl_whitelist_build(w);
- else if (INTEL_GEN(i915) <= 8)
- return;
- else
- MISSING_CASE(INTEL_GEN(i915));
-
- wa_init_finish(w);
-}
-
-void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
-{
- const struct i915_wa_list *wal = &engine->whitelist;
- struct intel_uncore *uncore = engine->uncore;
- const u32 base = engine->mmio_base;
- struct i915_wa *wa;
- unsigned int i;
-
- if (!wal->count)
- return;
-
- for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
- intel_uncore_write(uncore,
- RING_FORCE_TO_NONPRIV(base, i),
- i915_mmio_reg_offset(wa->reg));
-
- /* And clear the rest just in case of garbage */
- for (; i < RING_MAX_NONPRIV_SLOTS; i++)
- intel_uncore_write(uncore,
- RING_FORCE_TO_NONPRIV(base, i),
- i915_mmio_reg_offset(RING_NOPID(base)));
-}
-
-static void
-rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
-{
- struct drm_i915_private *i915 = engine->i915;
-
- if (IS_GEN(i915, 11)) {
- /* This is not an Wa. Enable for better image quality */
- wa_masked_en(wal,
- _3D_CHICKEN3,
- _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
-
- /* WaPipelineFlushCoherentLines:icl */
- ignore_wa_write_or(wal,
- GEN8_L3SQCREG4,
- GEN8_LQSC_FLUSH_COHERENT_LINES,
- GEN8_LQSC_FLUSH_COHERENT_LINES);
-
- /*
- * Wa_1405543622:icl
- * Formerly known as WaGAPZPriorityScheme
- */
- wa_write_or(wal,
- GEN8_GARBCNTL,
- GEN11_ARBITRATION_PRIO_ORDER_MASK);
-
- /*
- * Wa_1604223664:icl
- * Formerly known as WaL3BankAddressHashing
- */
- wa_write_masked_or(wal,
- GEN8_GARBCNTL,
- GEN11_HASH_CTRL_EXCL_MASK,
- GEN11_HASH_CTRL_EXCL_BIT0);
- wa_write_masked_or(wal,
- GEN11_GLBLINVL,
- GEN11_BANK_HASH_ADDR_EXCL_MASK,
- GEN11_BANK_HASH_ADDR_EXCL_BIT0);
-
- /*
- * Wa_1405733216:icl
- * Formerly known as WaDisableCleanEvicts
- */
- ignore_wa_write_or(wal,
- GEN8_L3SQCREG4,
- GEN11_LQSC_CLEAN_EVICT_DISABLE,
- GEN11_LQSC_CLEAN_EVICT_DISABLE);
-
- /* WaForwardProgressSoftReset:icl */
- wa_write_or(wal,
- GEN10_SCRATCH_LNCF2,
- PMFLUSHDONE_LNICRSDROP |
- PMFLUSH_GAPL3UNBLOCK |
- PMFLUSHDONE_LNEBLK);
-
- /* Wa_1406609255:icl (pre-prod) */
- if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
- wa_write_or(wal,
- GEN7_SARCHKMD,
- GEN7_DISABLE_DEMAND_PREFETCH |
- GEN7_DISABLE_SAMPLER_PREFETCH);
- }
-
- if (IS_GEN_RANGE(i915, 9, 11)) {
- /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */
- wa_masked_en(wal,
- GEN7_FF_SLICE_CS_CHICKEN1,
- GEN9_FFSC_PERCTX_PREEMPT_CTRL);
- }
-
- if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
- /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
- wa_write_or(wal,
- GEN8_GARBCNTL,
- GEN9_GAPS_TSV_CREDIT_DISABLE);
- }
-
- if (IS_BROXTON(i915)) {
- /* WaDisablePooledEuLoadBalancingFix:bxt */
- wa_masked_en(wal,
- FF_SLICE_CS_CHICKEN2,
- GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
- }
-
- if (IS_GEN(i915, 9)) {
- /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
- wa_masked_en(wal,
- GEN9_CSFE_CHICKEN1_RCS,
- GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
-
- /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
- wa_write_or(wal,
- BDW_SCRATCH1,
- GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
-
- /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
- if (IS_GEN9_LP(i915))
- wa_write_masked_or(wal,
- GEN8_L3SQCREG1,
- L3_PRIO_CREDITS_MASK,
- L3_GENERAL_PRIO_CREDITS(62) |
- L3_HIGH_PRIO_CREDITS(2));
-
- /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
- wa_write_or(wal,
- GEN8_L3SQCREG4,
- GEN8_LQSC_FLUSH_COHERENT_LINES);
- }
-}
-
-static void
-xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
-{
- struct drm_i915_private *i915 = engine->i915;
-
- /* WaKBLVECSSemaphoreWaitPoll:kbl */
- if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
- wa_write(wal,
- RING_SEMA_WAIT_POLL(engine->mmio_base),
- 1);
- }
-}
-
-static void
-engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
-{
- if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8))
- return;
-
- if (engine->id == RCS0)
- rcs_engine_wa_init(engine, wal);
- else
- xcs_engine_wa_init(engine, wal);
-}
-
-void intel_engine_init_workarounds(struct intel_engine_cs *engine)
-{
- struct i915_wa_list *wal = &engine->wa_list;
-
- if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8))
- return;
-
- wa_init_start(wal, engine->name);
- engine_init_workarounds(engine, wal);
- wa_init_finish(wal);
-}
-
-void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
-{
- wa_list_apply(engine->uncore, &engine->wa_list);
-}
-
-static struct i915_vma *
-create_scratch(struct i915_address_space *vm, int count)
-{
- struct drm_i915_gem_object *obj;
- struct i915_vma *vma;
- unsigned int size;
- int err;
-
- size = round_up(count * sizeof(u32), PAGE_SIZE);
- obj = i915_gem_object_create_internal(vm->i915, size);
- if (IS_ERR(obj))
- return ERR_CAST(obj);
-
- i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
-
- vma = i915_vma_instance(obj, vm, NULL);
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto err_obj;
- }
-
- err = i915_vma_pin(vma, 0, 0,
- i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
- if (err)
- goto err_obj;
-
- return vma;
-
-err_obj:
- i915_gem_object_put(obj);
- return ERR_PTR(err);
-}
-
-static int
-wa_list_srm(struct i915_request *rq,
- const struct i915_wa_list *wal,
- struct i915_vma *vma)
-{
- const struct i915_wa *wa;
- unsigned int i;
- u32 srm, *cs;
-
- srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
- if (INTEL_GEN(rq->i915) >= 8)
- srm++;
-
- cs = intel_ring_begin(rq, 4 * wal->count);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
- *cs++ = srm;
- *cs++ = i915_mmio_reg_offset(wa->reg);
- *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
- *cs++ = 0;
- }
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static int engine_wa_list_verify(struct intel_engine_cs *engine,
- const struct i915_wa_list * const wal,
- const char *from)
-{
- const struct i915_wa *wa;
- struct i915_request *rq;
- struct i915_vma *vma;
- unsigned int i;
- u32 *results;
- int err;
-
- if (!wal->count)
- return 0;
-
- vma = create_scratch(&engine->i915->ggtt.vm, wal->count);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
-
- rq = i915_request_alloc(engine, engine->kernel_context->gem_context);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto err_vma;
- }
-
- err = wa_list_srm(rq, wal, vma);
- if (err)
- goto err_vma;
-
- i915_request_add(rq);
- if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {
- err = -ETIME;
- goto err_vma;
- }
-
- results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
- if (IS_ERR(results)) {
- err = PTR_ERR(results);
- goto err_vma;
- }
-
- err = 0;
- for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
- if (!wa_verify(wa, results[i], wal->name, from))
- err = -ENXIO;
-
- i915_gem_object_unpin_map(vma->obj);
-
-err_vma:
- i915_vma_unpin(vma);
- i915_vma_put(vma);
- return err;
-}
-
-int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
- const char *from)
-{
- return engine_wa_list_verify(engine, &engine->wa_list, from);
-}
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/intel_workarounds.c"
-#endif
+++ /dev/null
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2014-2018 Intel Corporation
- */
-
-#ifndef _I915_WORKAROUNDS_H_
-#define _I915_WORKAROUNDS_H_
-
-#include <linux/slab.h>
-
-#include "intel_workarounds_types.h"
-
-static inline void intel_wa_list_free(struct i915_wa_list *wal)
-{
- kfree(wal->list);
- memset(wal, 0, sizeof(*wal));
-}
-
-void intel_engine_init_ctx_wa(struct intel_engine_cs *engine);
-int intel_engine_emit_ctx_wa(struct i915_request *rq);
-
-void intel_gt_init_workarounds(struct drm_i915_private *i915);
-void intel_gt_apply_workarounds(struct drm_i915_private *i915);
-bool intel_gt_verify_workarounds(struct drm_i915_private *i915,
- const char *from);
-
-void intel_engine_init_whitelist(struct intel_engine_cs *engine);
-void intel_engine_apply_whitelist(struct intel_engine_cs *engine);
-
-void intel_engine_init_workarounds(struct intel_engine_cs *engine);
-void intel_engine_apply_workarounds(struct intel_engine_cs *engine);
-int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
- const char *from);
-
-#endif
+++ /dev/null
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2014-2018 Intel Corporation
- */
-
-#ifndef __INTEL_WORKAROUNDS_TYPES_H__
-#define __INTEL_WORKAROUNDS_TYPES_H__
-
-#include <linux/types.h>
-
-#include "i915_reg.h"
-
-struct i915_wa {
- i915_reg_t reg;
- u32 mask;
- u32 val;
- u32 read;
-};
-
-struct i915_wa_list {
- const char *name;
- struct i915_wa *list;
- unsigned int count;
- unsigned int wa_count;
-};
-
-#endif /* __INTEL_WORKAROUNDS_TYPES_H__ */
#include <linux/prime_numbers.h>
-#include "../i915_reset.h"
-#include "../i915_selftest.h"
+#include "gt/intel_reset.h"
+#include "i915_selftest.h"
+
#include "i915_random.h"
#include "igt_flush_test.h"
#include "igt_live_test.h"
#include "igt_reset.h"
+#include "gt/intel_engine.h"
+
#include "../i915_drv.h"
-#include "../intel_ringbuffer.h"
void igt_global_reset_lock(struct drm_i915_private *i915)
{
#include "../i915_selftest.h"
+#include "gt/intel_engine.h"
+
#include "../i915_drv.h"
#include "../i915_request.h"
-#include "../intel_ringbuffer.h"
#include "../i915_gem_context.h"
struct igt_spinner {
+++ /dev/null
-/*
- * SPDX-License-Identifier: GPL-2.0
- *
- * Copyright © 2018 Intel Corporation
- */
-
-#include "../i915_selftest.h"
-
-static int intel_mmio_bases_check(void *arg)
-{
- int i, j;
-
- for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
- const struct engine_info *info = &intel_engines[i];
- char name[INTEL_ENGINE_CS_MAX_NAME];
- u8 prev = U8_MAX;
-
- __sprint_engine_name(name, info);
-
- for (j = 0; j < MAX_MMIO_BASES; j++) {
- u8 gen = info->mmio_bases[j].gen;
- u32 base = info->mmio_bases[j].base;
-
- if (gen >= prev) {
- pr_err("%s: %s: mmio base for gen %x "
- "is before the one for gen %x\n",
- __func__, name, prev, gen);
- return -EINVAL;
- }
-
- if (gen == 0)
- break;
-
- if (!base) {
- pr_err("%s: %s: invalid mmio base (%x) "
- "for gen %x at entry %u\n",
- __func__, name, base, gen, j);
- return -EINVAL;
- }
-
- prev = gen;
- }
-
- pr_info("%s: min gen supported for %s = %d\n",
- __func__, name, prev);
- }
-
- return 0;
-}
-
-int intel_engine_cs_mock_selftests(void)
-{
- static const struct i915_subtest tests[] = {
- SUBTEST(intel_mmio_bases_check),
- };
-
- return i915_subtests(tests, NULL);
-}
+++ /dev/null
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#include <linux/kthread.h>
-
-#include "../i915_selftest.h"
-#include "i915_random.h"
-#include "igt_flush_test.h"
-#include "igt_reset.h"
-#include "igt_wedge_me.h"
-
-#include "mock_context.h"
-#include "mock_drm.h"
-
-#define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */
-
-struct hang {
- struct drm_i915_private *i915;
- struct drm_i915_gem_object *hws;
- struct drm_i915_gem_object *obj;
- struct i915_gem_context *ctx;
- u32 *seqno;
- u32 *batch;
-};
-
-static int hang_init(struct hang *h, struct drm_i915_private *i915)
-{
- void *vaddr;
- int err;
-
- memset(h, 0, sizeof(*h));
- h->i915 = i915;
-
- h->ctx = kernel_context(i915);
- if (IS_ERR(h->ctx))
- return PTR_ERR(h->ctx);
-
- GEM_BUG_ON(i915_gem_context_is_bannable(h->ctx));
-
- h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE);
- if (IS_ERR(h->hws)) {
- err = PTR_ERR(h->hws);
- goto err_ctx;
- }
-
- h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
- if (IS_ERR(h->obj)) {
- err = PTR_ERR(h->obj);
- goto err_hws;
- }
-
- i915_gem_object_set_cache_coherency(h->hws, I915_CACHE_LLC);
- vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB);
- if (IS_ERR(vaddr)) {
- err = PTR_ERR(vaddr);
- goto err_obj;
- }
- h->seqno = memset(vaddr, 0xff, PAGE_SIZE);
-
- vaddr = i915_gem_object_pin_map(h->obj,
- i915_coherent_map_type(i915));
- if (IS_ERR(vaddr)) {
- err = PTR_ERR(vaddr);
- goto err_unpin_hws;
- }
- h->batch = vaddr;
-
- return 0;
-
-err_unpin_hws:
- i915_gem_object_unpin_map(h->hws);
-err_obj:
- i915_gem_object_put(h->obj);
-err_hws:
- i915_gem_object_put(h->hws);
-err_ctx:
- kernel_context_close(h->ctx);
- return err;
-}
-
-static u64 hws_address(const struct i915_vma *hws,
- const struct i915_request *rq)
-{
- return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context);
-}
-
-static int move_to_active(struct i915_vma *vma,
- struct i915_request *rq,
- unsigned int flags)
-{
- int err;
-
- err = i915_vma_move_to_active(vma, rq, flags);
- if (err)
- return err;
-
- if (!i915_gem_object_has_active_reference(vma->obj)) {
- i915_gem_object_get(vma->obj);
- i915_gem_object_set_active_reference(vma->obj);
- }
-
- return 0;
-}
-
-static struct i915_request *
-hang_create_request(struct hang *h, struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = h->i915;
- struct i915_address_space *vm =
- h->ctx->ppgtt ? &h->ctx->ppgtt->vm : &i915->ggtt.vm;
- struct i915_request *rq = NULL;
- struct i915_vma *hws, *vma;
- unsigned int flags;
- u32 *batch;
- int err;
-
- if (i915_gem_object_is_active(h->obj)) {
- struct drm_i915_gem_object *obj;
- void *vaddr;
-
- obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE);
- if (IS_ERR(obj))
- return ERR_CAST(obj);
-
- vaddr = i915_gem_object_pin_map(obj,
- i915_coherent_map_type(h->i915));
- if (IS_ERR(vaddr)) {
- i915_gem_object_put(obj);
- return ERR_CAST(vaddr);
- }
-
- i915_gem_object_unpin_map(h->obj);
- i915_gem_object_put(h->obj);
-
- h->obj = obj;
- h->batch = vaddr;
- }
-
- vma = i915_vma_instance(h->obj, vm, NULL);
- if (IS_ERR(vma))
- return ERR_CAST(vma);
-
- hws = i915_vma_instance(h->hws, vm, NULL);
- if (IS_ERR(hws))
- return ERR_CAST(hws);
-
- err = i915_vma_pin(vma, 0, 0, PIN_USER);
- if (err)
- return ERR_PTR(err);
-
- err = i915_vma_pin(hws, 0, 0, PIN_USER);
- if (err)
- goto unpin_vma;
-
- rq = i915_request_alloc(engine, h->ctx);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto unpin_hws;
- }
-
- err = move_to_active(vma, rq, 0);
- if (err)
- goto cancel_rq;
-
- err = move_to_active(hws, rq, 0);
- if (err)
- goto cancel_rq;
-
- batch = h->batch;
- if (INTEL_GEN(i915) >= 8) {
- *batch++ = MI_STORE_DWORD_IMM_GEN4;
- *batch++ = lower_32_bits(hws_address(hws, rq));
- *batch++ = upper_32_bits(hws_address(hws, rq));
- *batch++ = rq->fence.seqno;
- *batch++ = MI_ARB_CHECK;
-
- memset(batch, 0, 1024);
- batch += 1024 / sizeof(*batch);
-
- *batch++ = MI_ARB_CHECK;
- *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
- *batch++ = lower_32_bits(vma->node.start);
- *batch++ = upper_32_bits(vma->node.start);
- } else if (INTEL_GEN(i915) >= 6) {
- *batch++ = MI_STORE_DWORD_IMM_GEN4;
- *batch++ = 0;
- *batch++ = lower_32_bits(hws_address(hws, rq));
- *batch++ = rq->fence.seqno;
- *batch++ = MI_ARB_CHECK;
-
- memset(batch, 0, 1024);
- batch += 1024 / sizeof(*batch);
-
- *batch++ = MI_ARB_CHECK;
- *batch++ = MI_BATCH_BUFFER_START | 1 << 8;
- *batch++ = lower_32_bits(vma->node.start);
- } else if (INTEL_GEN(i915) >= 4) {
- *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
- *batch++ = 0;
- *batch++ = lower_32_bits(hws_address(hws, rq));
- *batch++ = rq->fence.seqno;
- *batch++ = MI_ARB_CHECK;
-
- memset(batch, 0, 1024);
- batch += 1024 / sizeof(*batch);
-
- *batch++ = MI_ARB_CHECK;
- *batch++ = MI_BATCH_BUFFER_START | 2 << 6;
- *batch++ = lower_32_bits(vma->node.start);
- } else {
- *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
- *batch++ = lower_32_bits(hws_address(hws, rq));
- *batch++ = rq->fence.seqno;
- *batch++ = MI_ARB_CHECK;
-
- memset(batch, 0, 1024);
- batch += 1024 / sizeof(*batch);
-
- *batch++ = MI_ARB_CHECK;
- *batch++ = MI_BATCH_BUFFER_START | 2 << 6;
- *batch++ = lower_32_bits(vma->node.start);
- }
- *batch++ = MI_BATCH_BUFFER_END; /* not reached */
- i915_gem_chipset_flush(h->i915);
-
- if (rq->engine->emit_init_breadcrumb) {
- err = rq->engine->emit_init_breadcrumb(rq);
- if (err)
- goto cancel_rq;
- }
-
- flags = 0;
- if (INTEL_GEN(vm->i915) <= 5)
- flags |= I915_DISPATCH_SECURE;
-
- err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
-
-cancel_rq:
- if (err) {
- i915_request_skip(rq, err);
- i915_request_add(rq);
- }
-unpin_hws:
- i915_vma_unpin(hws);
-unpin_vma:
- i915_vma_unpin(vma);
- return err ? ERR_PTR(err) : rq;
-}
-
-static u32 hws_seqno(const struct hang *h, const struct i915_request *rq)
-{
- return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]);
-}
-
-static void hang_fini(struct hang *h)
-{
- *h->batch = MI_BATCH_BUFFER_END;
- i915_gem_chipset_flush(h->i915);
-
- i915_gem_object_unpin_map(h->obj);
- i915_gem_object_put(h->obj);
-
- i915_gem_object_unpin_map(h->hws);
- i915_gem_object_put(h->hws);
-
- kernel_context_close(h->ctx);
-
- igt_flush_test(h->i915, I915_WAIT_LOCKED);
-}
-
-static bool wait_until_running(struct hang *h, struct i915_request *rq)
-{
- return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq),
- rq->fence.seqno),
- 10) &&
- wait_for(i915_seqno_passed(hws_seqno(h, rq),
- rq->fence.seqno),
- 1000));
-}
-
-static int igt_hang_sanitycheck(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct i915_request *rq;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- struct hang h;
- int err;
-
- /* Basic check that we can execute our hanging batch */
-
- mutex_lock(&i915->drm.struct_mutex);
- err = hang_init(&h, i915);
- if (err)
- goto unlock;
-
- for_each_engine(engine, i915, id) {
- struct igt_wedge_me w;
- long timeout;
-
- if (!intel_engine_can_store_dword(engine))
- continue;
-
- rq = hang_create_request(&h, engine);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- pr_err("Failed to create request for %s, err=%d\n",
- engine->name, err);
- goto fini;
- }
-
- i915_request_get(rq);
-
- *h.batch = MI_BATCH_BUFFER_END;
- i915_gem_chipset_flush(i915);
-
- i915_request_add(rq);
-
- timeout = 0;
- igt_wedge_on_timeout(&w, i915, HZ / 10 /* 100ms timeout*/)
- timeout = i915_request_wait(rq,
- I915_WAIT_LOCKED,
- MAX_SCHEDULE_TIMEOUT);
- if (i915_reset_failed(i915))
- timeout = -EIO;
-
- i915_request_put(rq);
-
- if (timeout < 0) {
- err = timeout;
- pr_err("Wait for request failed on %s, err=%d\n",
- engine->name, err);
- goto fini;
- }
- }
-
-fini:
- hang_fini(&h);
-unlock:
- mutex_unlock(&i915->drm.struct_mutex);
- return err;
-}
-
-static int igt_global_reset(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- unsigned int reset_count;
- int err = 0;
-
- /* Check that we can issue a global GPU reset */
-
- igt_global_reset_lock(i915);
-
- reset_count = i915_reset_count(&i915->gpu_error);
-
- i915_reset(i915, ALL_ENGINES, NULL);
-
- if (i915_reset_count(&i915->gpu_error) == reset_count) {
- pr_err("No GPU reset recorded!\n");
- err = -EINVAL;
- }
-
- igt_global_reset_unlock(i915);
-
- if (i915_reset_failed(i915))
- err = -EIO;
-
- return err;
-}
-
-static int igt_wedged_reset(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- intel_wakeref_t wakeref;
-
- /* Check that we can recover a wedged device with a GPU reset */
-
- igt_global_reset_lock(i915);
- wakeref = intel_runtime_pm_get(i915);
-
- i915_gem_set_wedged(i915);
-
- GEM_BUG_ON(!i915_reset_failed(i915));
- i915_reset(i915, ALL_ENGINES, NULL);
-
- intel_runtime_pm_put(i915, wakeref);
- igt_global_reset_unlock(i915);
-
- return i915_reset_failed(i915) ? -EIO : 0;
-}
-
-static bool wait_for_idle(struct intel_engine_cs *engine)
-{
- return wait_for(intel_engine_is_idle(engine), IGT_IDLE_TIMEOUT) == 0;
-}
-
-static int igt_reset_nop(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct intel_engine_cs *engine;
- struct i915_gem_context *ctx;
- unsigned int reset_count, count;
- enum intel_engine_id id;
- intel_wakeref_t wakeref;
- struct drm_file *file;
- IGT_TIMEOUT(end_time);
- int err = 0;
-
- /* Check that we can reset during non-user portions of requests */
-
- file = mock_file(i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- mutex_lock(&i915->drm.struct_mutex);
- ctx = live_context(i915, file);
- mutex_unlock(&i915->drm.struct_mutex);
- if (IS_ERR(ctx)) {
- err = PTR_ERR(ctx);
- goto out;
- }
-
- i915_gem_context_clear_bannable(ctx);
- wakeref = intel_runtime_pm_get(i915);
- reset_count = i915_reset_count(&i915->gpu_error);
- count = 0;
- do {
- mutex_lock(&i915->drm.struct_mutex);
- for_each_engine(engine, i915, id) {
- int i;
-
- for (i = 0; i < 16; i++) {
- struct i915_request *rq;
-
- rq = i915_request_alloc(engine, ctx);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- break;
- }
-
- i915_request_add(rq);
- }
- }
- mutex_unlock(&i915->drm.struct_mutex);
-
- igt_global_reset_lock(i915);
- i915_reset(i915, ALL_ENGINES, NULL);
- igt_global_reset_unlock(i915);
- if (i915_reset_failed(i915)) {
- err = -EIO;
- break;
- }
-
- if (i915_reset_count(&i915->gpu_error) !=
- reset_count + ++count) {
- pr_err("Full GPU reset not recorded!\n");
- err = -EINVAL;
- break;
- }
-
- if (!i915_reset_flush(i915)) {
- struct drm_printer p =
- drm_info_printer(i915->drm.dev);
-
- pr_err("%s failed to idle after reset\n",
- engine->name);
- intel_engine_dump(engine, &p,
- "%s\n", engine->name);
-
- err = -EIO;
- break;
- }
-
- err = igt_flush_test(i915, 0);
- if (err)
- break;
- } while (time_before(jiffies, end_time));
- pr_info("%s: %d resets\n", __func__, count);
-
- mutex_lock(&i915->drm.struct_mutex);
- err = igt_flush_test(i915, I915_WAIT_LOCKED);
- mutex_unlock(&i915->drm.struct_mutex);
-
- intel_runtime_pm_put(i915, wakeref);
-
-out:
- mock_file_free(i915, file);
- if (i915_reset_failed(i915))
- err = -EIO;
- return err;
-}
-
-static int igt_reset_nop_engine(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct intel_engine_cs *engine;
- struct i915_gem_context *ctx;
- enum intel_engine_id id;
- intel_wakeref_t wakeref;
- struct drm_file *file;
- int err = 0;
-
- /* Check that we can engine-reset during non-user portions */
-
- if (!intel_has_reset_engine(i915))
- return 0;
-
- file = mock_file(i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- mutex_lock(&i915->drm.struct_mutex);
- ctx = live_context(i915, file);
- mutex_unlock(&i915->drm.struct_mutex);
- if (IS_ERR(ctx)) {
- err = PTR_ERR(ctx);
- goto out;
- }
-
- i915_gem_context_clear_bannable(ctx);
- wakeref = intel_runtime_pm_get(i915);
- for_each_engine(engine, i915, id) {
- unsigned int reset_count, reset_engine_count;
- unsigned int count;
- IGT_TIMEOUT(end_time);
-
- reset_count = i915_reset_count(&i915->gpu_error);
- reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
- engine);
- count = 0;
-
- set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
- do {
- int i;
-
- if (!wait_for_idle(engine)) {
- pr_err("%s failed to idle before reset\n",
- engine->name);
- err = -EIO;
- break;
- }
-
- mutex_lock(&i915->drm.struct_mutex);
- for (i = 0; i < 16; i++) {
- struct i915_request *rq;
-
- rq = i915_request_alloc(engine, ctx);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- break;
- }
-
- i915_request_add(rq);
- }
- mutex_unlock(&i915->drm.struct_mutex);
-
- err = i915_reset_engine(engine, NULL);
- if (err) {
- pr_err("i915_reset_engine failed\n");
- break;
- }
-
- if (i915_reset_count(&i915->gpu_error) != reset_count) {
- pr_err("Full GPU reset recorded! (engine reset expected)\n");
- err = -EINVAL;
- break;
- }
-
- if (i915_reset_engine_count(&i915->gpu_error, engine) !=
- reset_engine_count + ++count) {
- pr_err("%s engine reset not recorded!\n",
- engine->name);
- err = -EINVAL;
- break;
- }
-
- if (!i915_reset_flush(i915)) {
- struct drm_printer p =
- drm_info_printer(i915->drm.dev);
-
- pr_err("%s failed to idle after reset\n",
- engine->name);
- intel_engine_dump(engine, &p,
- "%s\n", engine->name);
-
- err = -EIO;
- break;
- }
- } while (time_before(jiffies, end_time));
- clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
- pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
-
- if (err)
- break;
-
- err = igt_flush_test(i915, 0);
- if (err)
- break;
- }
-
- mutex_lock(&i915->drm.struct_mutex);
- err = igt_flush_test(i915, I915_WAIT_LOCKED);
- mutex_unlock(&i915->drm.struct_mutex);
-
- intel_runtime_pm_put(i915, wakeref);
-out:
- mock_file_free(i915, file);
- if (i915_reset_failed(i915))
- err = -EIO;
- return err;
-}
-
-static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- struct hang h;
- int err = 0;
-
- /* Check that we can issue an engine reset on an idle engine (no-op) */
-
- if (!intel_has_reset_engine(i915))
- return 0;
-
- if (active) {
- mutex_lock(&i915->drm.struct_mutex);
- err = hang_init(&h, i915);
- mutex_unlock(&i915->drm.struct_mutex);
- if (err)
- return err;
- }
-
- for_each_engine(engine, i915, id) {
- unsigned int reset_count, reset_engine_count;
- IGT_TIMEOUT(end_time);
-
- if (active && !intel_engine_can_store_dword(engine))
- continue;
-
- if (!wait_for_idle(engine)) {
- pr_err("%s failed to idle before reset\n",
- engine->name);
- err = -EIO;
- break;
- }
-
- reset_count = i915_reset_count(&i915->gpu_error);
- reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
- engine);
-
- set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
- do {
- if (active) {
- struct i915_request *rq;
-
- mutex_lock(&i915->drm.struct_mutex);
- rq = hang_create_request(&h, engine);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- mutex_unlock(&i915->drm.struct_mutex);
- break;
- }
-
- i915_request_get(rq);
- i915_request_add(rq);
- mutex_unlock(&i915->drm.struct_mutex);
-
- if (!wait_until_running(&h, rq)) {
- struct drm_printer p = drm_info_printer(i915->drm.dev);
-
- pr_err("%s: Failed to start request %llx, at %x\n",
- __func__, rq->fence.seqno, hws_seqno(&h, rq));
- intel_engine_dump(engine, &p,
- "%s\n", engine->name);
-
- i915_request_put(rq);
- err = -EIO;
- break;
- }
-
- i915_request_put(rq);
- }
-
- err = i915_reset_engine(engine, NULL);
- if (err) {
- pr_err("i915_reset_engine failed\n");
- break;
- }
-
- if (i915_reset_count(&i915->gpu_error) != reset_count) {
- pr_err("Full GPU reset recorded! (engine reset expected)\n");
- err = -EINVAL;
- break;
- }
-
- if (i915_reset_engine_count(&i915->gpu_error, engine) !=
- ++reset_engine_count) {
- pr_err("%s engine reset not recorded!\n",
- engine->name);
- err = -EINVAL;
- break;
- }
-
- if (!i915_reset_flush(i915)) {
- struct drm_printer p =
- drm_info_printer(i915->drm.dev);
-
- pr_err("%s failed to idle after reset\n",
- engine->name);
- intel_engine_dump(engine, &p,
- "%s\n", engine->name);
-
- err = -EIO;
- break;
- }
- } while (time_before(jiffies, end_time));
- clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
-
- if (err)
- break;
-
- err = igt_flush_test(i915, 0);
- if (err)
- break;
- }
-
- if (i915_reset_failed(i915))
- err = -EIO;
-
- if (active) {
- mutex_lock(&i915->drm.struct_mutex);
- hang_fini(&h);
- mutex_unlock(&i915->drm.struct_mutex);
- }
-
- return err;
-}
-
-static int igt_reset_idle_engine(void *arg)
-{
- return __igt_reset_engine(arg, false);
-}
-
-static int igt_reset_active_engine(void *arg)
-{
- return __igt_reset_engine(arg, true);
-}
-
-struct active_engine {
- struct task_struct *task;
- struct intel_engine_cs *engine;
- unsigned long resets;
- unsigned int flags;
-};
-
-#define TEST_ACTIVE BIT(0)
-#define TEST_OTHERS BIT(1)
-#define TEST_SELF BIT(2)
-#define TEST_PRIORITY BIT(3)
-
-static int active_request_put(struct i915_request *rq)
-{
- int err = 0;
-
- if (!rq)
- return 0;
-
- if (i915_request_wait(rq, 0, 5 * HZ) < 0) {
- GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld\n",
- rq->engine->name,
- rq->fence.context,
- rq->fence.seqno);
- GEM_TRACE_DUMP();
-
- i915_gem_set_wedged(rq->i915);
- err = -EIO;
- }
-
- i915_request_put(rq);
-
- return err;
-}
-
-static int active_engine(void *data)
-{
- I915_RND_STATE(prng);
- struct active_engine *arg = data;
- struct intel_engine_cs *engine = arg->engine;
- struct i915_request *rq[8] = {};
- struct i915_gem_context *ctx[ARRAY_SIZE(rq)];
- struct drm_file *file;
- unsigned long count = 0;
- int err = 0;
-
- file = mock_file(engine->i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- for (count = 0; count < ARRAY_SIZE(ctx); count++) {
- mutex_lock(&engine->i915->drm.struct_mutex);
- ctx[count] = live_context(engine->i915, file);
- mutex_unlock(&engine->i915->drm.struct_mutex);
- if (IS_ERR(ctx[count])) {
- err = PTR_ERR(ctx[count]);
- while (--count)
- i915_gem_context_put(ctx[count]);
- goto err_file;
- }
- }
-
- while (!kthread_should_stop()) {
- unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1);
- struct i915_request *old = rq[idx];
- struct i915_request *new;
-
- mutex_lock(&engine->i915->drm.struct_mutex);
- new = i915_request_alloc(engine, ctx[idx]);
- if (IS_ERR(new)) {
- mutex_unlock(&engine->i915->drm.struct_mutex);
- err = PTR_ERR(new);
- break;
- }
-
- if (arg->flags & TEST_PRIORITY)
- ctx[idx]->sched.priority =
- i915_prandom_u32_max_state(512, &prng);
-
- rq[idx] = i915_request_get(new);
- i915_request_add(new);
- mutex_unlock(&engine->i915->drm.struct_mutex);
-
- err = active_request_put(old);
- if (err)
- break;
-
- cond_resched();
- }
-
- for (count = 0; count < ARRAY_SIZE(rq); count++) {
- int err__ = active_request_put(rq[count]);
-
- /* Keep the first error */
- if (!err)
- err = err__;
- }
-
-err_file:
- mock_file_free(engine->i915, file);
- return err;
-}
-
-static int __igt_reset_engines(struct drm_i915_private *i915,
- const char *test_name,
- unsigned int flags)
-{
- struct intel_engine_cs *engine, *other;
- enum intel_engine_id id, tmp;
- struct hang h;
- int err = 0;
-
- /* Check that issuing a reset on one engine does not interfere
- * with any other engine.
- */
-
- if (!intel_has_reset_engine(i915))
- return 0;
-
- if (flags & TEST_ACTIVE) {
- mutex_lock(&i915->drm.struct_mutex);
- err = hang_init(&h, i915);
- mutex_unlock(&i915->drm.struct_mutex);
- if (err)
- return err;
-
- if (flags & TEST_PRIORITY)
- h.ctx->sched.priority = 1024;
- }
-
- for_each_engine(engine, i915, id) {
- struct active_engine threads[I915_NUM_ENGINES] = {};
- unsigned long global = i915_reset_count(&i915->gpu_error);
- unsigned long count = 0, reported;
- IGT_TIMEOUT(end_time);
-
- if (flags & TEST_ACTIVE &&
- !intel_engine_can_store_dword(engine))
- continue;
-
- if (!wait_for_idle(engine)) {
- pr_err("i915_reset_engine(%s:%s): failed to idle before reset\n",
- engine->name, test_name);
- err = -EIO;
- break;
- }
-
- memset(threads, 0, sizeof(threads));
- for_each_engine(other, i915, tmp) {
- struct task_struct *tsk;
-
- threads[tmp].resets =
- i915_reset_engine_count(&i915->gpu_error,
- other);
-
- if (!(flags & TEST_OTHERS))
- continue;
-
- if (other == engine && !(flags & TEST_SELF))
- continue;
-
- threads[tmp].engine = other;
- threads[tmp].flags = flags;
-
- tsk = kthread_run(active_engine, &threads[tmp],
- "igt/%s", other->name);
- if (IS_ERR(tsk)) {
- err = PTR_ERR(tsk);
- goto unwind;
- }
-
- threads[tmp].task = tsk;
- get_task_struct(tsk);
- }
-
- set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
- do {
- struct i915_request *rq = NULL;
-
- if (flags & TEST_ACTIVE) {
- mutex_lock(&i915->drm.struct_mutex);
- rq = hang_create_request(&h, engine);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- mutex_unlock(&i915->drm.struct_mutex);
- break;
- }
-
- i915_request_get(rq);
- i915_request_add(rq);
- mutex_unlock(&i915->drm.struct_mutex);
-
- if (!wait_until_running(&h, rq)) {
- struct drm_printer p = drm_info_printer(i915->drm.dev);
-
- pr_err("%s: Failed to start request %llx, at %x\n",
- __func__, rq->fence.seqno, hws_seqno(&h, rq));
- intel_engine_dump(engine, &p,
- "%s\n", engine->name);
-
- i915_request_put(rq);
- err = -EIO;
- break;
- }
- }
-
- err = i915_reset_engine(engine, NULL);
- if (err) {
- pr_err("i915_reset_engine(%s:%s): failed, err=%d\n",
- engine->name, test_name, err);
- break;
- }
-
- count++;
-
- if (rq) {
- if (i915_request_wait(rq, 0, HZ / 5) < 0) {
- struct drm_printer p =
- drm_info_printer(i915->drm.dev);
-
- pr_err("i915_reset_engine(%s:%s):"
- " failed to complete request after reset\n",
- engine->name, test_name);
- intel_engine_dump(engine, &p,
- "%s\n", engine->name);
- i915_request_put(rq);
-
- GEM_TRACE_DUMP();
- i915_gem_set_wedged(i915);
- err = -EIO;
- break;
- }
-
- i915_request_put(rq);
- }
-
- if (!(flags & TEST_SELF) && !wait_for_idle(engine)) {
- struct drm_printer p =
- drm_info_printer(i915->drm.dev);
-
- pr_err("i915_reset_engine(%s:%s):"
- " failed to idle after reset\n",
- engine->name, test_name);
- intel_engine_dump(engine, &p,
- "%s\n", engine->name);
-
- err = -EIO;
- break;
- }
- } while (time_before(jiffies, end_time));
- clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
- pr_info("i915_reset_engine(%s:%s): %lu resets\n",
- engine->name, test_name, count);
-
- reported = i915_reset_engine_count(&i915->gpu_error, engine);
- reported -= threads[engine->id].resets;
- if (reported != count) {
- pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n",
- engine->name, test_name, count, reported);
- if (!err)
- err = -EINVAL;
- }
-
-unwind:
- for_each_engine(other, i915, tmp) {
- int ret;
-
- if (!threads[tmp].task)
- continue;
-
- ret = kthread_stop(threads[tmp].task);
- if (ret) {
- pr_err("kthread for other engine %s failed, err=%d\n",
- other->name, ret);
- if (!err)
- err = ret;
- }
- put_task_struct(threads[tmp].task);
-
- if (other != engine &&
- threads[tmp].resets !=
- i915_reset_engine_count(&i915->gpu_error, other)) {
- pr_err("Innocent engine %s was reset (count=%ld)\n",
- other->name,
- i915_reset_engine_count(&i915->gpu_error,
- other) -
- threads[tmp].resets);
- if (!err)
- err = -EINVAL;
- }
- }
-
- if (global != i915_reset_count(&i915->gpu_error)) {
- pr_err("Global reset (count=%ld)!\n",
- i915_reset_count(&i915->gpu_error) - global);
- if (!err)
- err = -EINVAL;
- }
-
- if (err)
- break;
-
- err = igt_flush_test(i915, 0);
- if (err)
- break;
- }
-
- if (i915_reset_failed(i915))
- err = -EIO;
-
- if (flags & TEST_ACTIVE) {
- mutex_lock(&i915->drm.struct_mutex);
- hang_fini(&h);
- mutex_unlock(&i915->drm.struct_mutex);
- }
-
- return err;
-}
-
-static int igt_reset_engines(void *arg)
-{
- static const struct {
- const char *name;
- unsigned int flags;
- } phases[] = {
- { "idle", 0 },
- { "active", TEST_ACTIVE },
- { "others-idle", TEST_OTHERS },
- { "others-active", TEST_OTHERS | TEST_ACTIVE },
- {
- "others-priority",
- TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY
- },
- {
- "self-priority",
- TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY | TEST_SELF,
- },
- { }
- };
- struct drm_i915_private *i915 = arg;
- typeof(*phases) *p;
- int err;
-
- for (p = phases; p->name; p++) {
- if (p->flags & TEST_PRIORITY) {
- if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
- continue;
- }
-
- err = __igt_reset_engines(arg, p->name, p->flags);
- if (err)
- return err;
- }
-
- return 0;
-}
-
-static u32 fake_hangcheck(struct drm_i915_private *i915,
- intel_engine_mask_t mask)
-{
- u32 count = i915_reset_count(&i915->gpu_error);
-
- i915_reset(i915, mask, NULL);
-
- return count;
-}
-
-static int igt_reset_wait(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct i915_request *rq;
- unsigned int reset_count;
- struct hang h;
- long timeout;
- int err;
-
- if (!intel_engine_can_store_dword(i915->engine[RCS0]))
- return 0;
-
- /* Check that we detect a stuck waiter and issue a reset */
-
- igt_global_reset_lock(i915);
-
- mutex_lock(&i915->drm.struct_mutex);
- err = hang_init(&h, i915);
- if (err)
- goto unlock;
-
- rq = hang_create_request(&h, i915->engine[RCS0]);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto fini;
- }
-
- i915_request_get(rq);
- i915_request_add(rq);
-
- if (!wait_until_running(&h, rq)) {
- struct drm_printer p = drm_info_printer(i915->drm.dev);
-
- pr_err("%s: Failed to start request %llx, at %x\n",
- __func__, rq->fence.seqno, hws_seqno(&h, rq));
- intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
-
- i915_gem_set_wedged(i915);
-
- err = -EIO;
- goto out_rq;
- }
-
- reset_count = fake_hangcheck(i915, ALL_ENGINES);
-
- timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10);
- if (timeout < 0) {
- pr_err("i915_request_wait failed on a stuck request: err=%ld\n",
- timeout);
- err = timeout;
- goto out_rq;
- }
-
- if (i915_reset_count(&i915->gpu_error) == reset_count) {
- pr_err("No GPU reset recorded!\n");
- err = -EINVAL;
- goto out_rq;
- }
-
-out_rq:
- i915_request_put(rq);
-fini:
- hang_fini(&h);
-unlock:
- mutex_unlock(&i915->drm.struct_mutex);
- igt_global_reset_unlock(i915);
-
- if (i915_reset_failed(i915))
- return -EIO;
-
- return err;
-}
-
-struct evict_vma {
- struct completion completion;
- struct i915_vma *vma;
-};
-
-static int evict_vma(void *data)
-{
- struct evict_vma *arg = data;
- struct i915_address_space *vm = arg->vma->vm;
- struct drm_i915_private *i915 = vm->i915;
- struct drm_mm_node evict = arg->vma->node;
- int err;
-
- complete(&arg->completion);
-
- mutex_lock(&i915->drm.struct_mutex);
- err = i915_gem_evict_for_node(vm, &evict, 0);
- mutex_unlock(&i915->drm.struct_mutex);
-
- return err;
-}
-
-static int evict_fence(void *data)
-{
- struct evict_vma *arg = data;
- struct drm_i915_private *i915 = arg->vma->vm->i915;
- int err;
-
- complete(&arg->completion);
-
- mutex_lock(&i915->drm.struct_mutex);
-
- /* Mark the fence register as dirty to force the mmio update. */
- err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512);
- if (err) {
- pr_err("Invalid Y-tiling settings; err:%d\n", err);
- goto out_unlock;
- }
-
- err = i915_vma_pin_fence(arg->vma);
- if (err) {
- pr_err("Unable to pin Y-tiled fence; err:%d\n", err);
- goto out_unlock;
- }
-
- i915_vma_unpin_fence(arg->vma);
-
-out_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
-
- return err;
-}
-
-static int __igt_reset_evict_vma(struct drm_i915_private *i915,
- struct i915_address_space *vm,
- int (*fn)(void *),
- unsigned int flags)
-{
- struct drm_i915_gem_object *obj;
- struct task_struct *tsk = NULL;
- struct i915_request *rq;
- struct evict_vma arg;
- struct hang h;
- int err;
-
- if (!intel_engine_can_store_dword(i915->engine[RCS0]))
- return 0;
-
- /* Check that we can recover an unbind stuck on a hanging request */
-
- mutex_lock(&i915->drm.struct_mutex);
- err = hang_init(&h, i915);
- if (err)
- goto unlock;
-
- obj = i915_gem_object_create_internal(i915, SZ_1M);
- if (IS_ERR(obj)) {
- err = PTR_ERR(obj);
- goto fini;
- }
-
- if (flags & EXEC_OBJECT_NEEDS_FENCE) {
- err = i915_gem_object_set_tiling(obj, I915_TILING_X, 512);
- if (err) {
- pr_err("Invalid X-tiling settings; err:%d\n", err);
- goto out_obj;
- }
- }
-
- arg.vma = i915_vma_instance(obj, vm, NULL);
- if (IS_ERR(arg.vma)) {
- err = PTR_ERR(arg.vma);
- goto out_obj;
- }
-
- rq = hang_create_request(&h, i915->engine[RCS0]);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto out_obj;
- }
-
- err = i915_vma_pin(arg.vma, 0, 0,
- i915_vma_is_ggtt(arg.vma) ?
- PIN_GLOBAL | PIN_MAPPABLE :
- PIN_USER);
- if (err) {
- i915_request_add(rq);
- goto out_obj;
- }
-
- if (flags & EXEC_OBJECT_NEEDS_FENCE) {
- err = i915_vma_pin_fence(arg.vma);
- if (err) {
- pr_err("Unable to pin X-tiled fence; err:%d\n", err);
- i915_vma_unpin(arg.vma);
- i915_request_add(rq);
- goto out_obj;
- }
- }
-
- err = i915_vma_move_to_active(arg.vma, rq, flags);
-
- if (flags & EXEC_OBJECT_NEEDS_FENCE)
- i915_vma_unpin_fence(arg.vma);
- i915_vma_unpin(arg.vma);
-
- i915_request_get(rq);
- i915_request_add(rq);
- if (err)
- goto out_rq;
-
- mutex_unlock(&i915->drm.struct_mutex);
-
- if (!wait_until_running(&h, rq)) {
- struct drm_printer p = drm_info_printer(i915->drm.dev);
-
- pr_err("%s: Failed to start request %llx, at %x\n",
- __func__, rq->fence.seqno, hws_seqno(&h, rq));
- intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
-
- i915_gem_set_wedged(i915);
- goto out_reset;
- }
-
- init_completion(&arg.completion);
-
- tsk = kthread_run(fn, &arg, "igt/evict_vma");
- if (IS_ERR(tsk)) {
- err = PTR_ERR(tsk);
- tsk = NULL;
- goto out_reset;
- }
- get_task_struct(tsk);
-
- wait_for_completion(&arg.completion);
-
- if (wait_for(!list_empty(&rq->fence.cb_list), 10)) {
- struct drm_printer p = drm_info_printer(i915->drm.dev);
-
- pr_err("igt/evict_vma kthread did not wait\n");
- intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
-
- i915_gem_set_wedged(i915);
- goto out_reset;
- }
-
-out_reset:
- igt_global_reset_lock(i915);
- fake_hangcheck(rq->i915, rq->engine->mask);
- igt_global_reset_unlock(i915);
-
- if (tsk) {
- struct igt_wedge_me w;
-
- /* The reset, even indirectly, should take less than 10ms. */
- igt_wedge_on_timeout(&w, i915, HZ / 10 /* 100ms timeout*/)
- err = kthread_stop(tsk);
-
- put_task_struct(tsk);
- }
-
- mutex_lock(&i915->drm.struct_mutex);
-out_rq:
- i915_request_put(rq);
-out_obj:
- i915_gem_object_put(obj);
-fini:
- hang_fini(&h);
-unlock:
- mutex_unlock(&i915->drm.struct_mutex);
-
- if (i915_reset_failed(i915))
- return -EIO;
-
- return err;
-}
-
-static int igt_reset_evict_ggtt(void *arg)
-{
- struct drm_i915_private *i915 = arg;
-
- return __igt_reset_evict_vma(i915, &i915->ggtt.vm,
- evict_vma, EXEC_OBJECT_WRITE);
-}
-
-static int igt_reset_evict_ppgtt(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct i915_gem_context *ctx;
- struct drm_file *file;
- int err;
-
- file = mock_file(i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- mutex_lock(&i915->drm.struct_mutex);
- ctx = live_context(i915, file);
- mutex_unlock(&i915->drm.struct_mutex);
- if (IS_ERR(ctx)) {
- err = PTR_ERR(ctx);
- goto out;
- }
-
- err = 0;
- if (ctx->ppgtt) /* aliasing == global gtt locking, covered above */
- err = __igt_reset_evict_vma(i915, &ctx->ppgtt->vm,
- evict_vma, EXEC_OBJECT_WRITE);
-
-out:
- mock_file_free(i915, file);
- return err;
-}
-
-static int igt_reset_evict_fence(void *arg)
-{
- struct drm_i915_private *i915 = arg;
-
- return __igt_reset_evict_vma(i915, &i915->ggtt.vm,
- evict_fence, EXEC_OBJECT_NEEDS_FENCE);
-}
-
-static int wait_for_others(struct drm_i915_private *i915,
- struct intel_engine_cs *exclude)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- for_each_engine(engine, i915, id) {
- if (engine == exclude)
- continue;
-
- if (!wait_for_idle(engine))
- return -EIO;
- }
-
- return 0;
-}
-
-static int igt_reset_queue(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- struct hang h;
- int err;
-
- /* Check that we replay pending requests following a hang */
-
- igt_global_reset_lock(i915);
-
- mutex_lock(&i915->drm.struct_mutex);
- err = hang_init(&h, i915);
- if (err)
- goto unlock;
-
- for_each_engine(engine, i915, id) {
- struct i915_request *prev;
- IGT_TIMEOUT(end_time);
- unsigned int count;
-
- if (!intel_engine_can_store_dword(engine))
- continue;
-
- prev = hang_create_request(&h, engine);
- if (IS_ERR(prev)) {
- err = PTR_ERR(prev);
- goto fini;
- }
-
- i915_request_get(prev);
- i915_request_add(prev);
-
- count = 0;
- do {
- struct i915_request *rq;
- unsigned int reset_count;
-
- rq = hang_create_request(&h, engine);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto fini;
- }
-
- i915_request_get(rq);
- i915_request_add(rq);
-
- /*
- * XXX We don't handle resetting the kernel context
- * very well. If we trigger a device reset twice in
- * quick succession while the kernel context is
- * executing, we may end up skipping the breadcrumb.
- * This is really only a problem for the selftest as
- * normally there is a large interlude between resets
- * (hangcheck), or we focus on resetting just one
- * engine and so avoid repeatedly resetting innocents.
- */
- err = wait_for_others(i915, engine);
- if (err) {
- pr_err("%s(%s): Failed to idle other inactive engines after device reset\n",
- __func__, engine->name);
- i915_request_put(rq);
- i915_request_put(prev);
-
- GEM_TRACE_DUMP();
- i915_gem_set_wedged(i915);
- goto fini;
- }
-
- if (!wait_until_running(&h, prev)) {
- struct drm_printer p = drm_info_printer(i915->drm.dev);
-
- pr_err("%s(%s): Failed to start request %llx, at %x\n",
- __func__, engine->name,
- prev->fence.seqno, hws_seqno(&h, prev));
- intel_engine_dump(engine, &p,
- "%s\n", engine->name);
-
- i915_request_put(rq);
- i915_request_put(prev);
-
- i915_gem_set_wedged(i915);
-
- err = -EIO;
- goto fini;
- }
-
- reset_count = fake_hangcheck(i915, BIT(id));
-
- if (prev->fence.error != -EIO) {
- pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
- prev->fence.error);
- i915_request_put(rq);
- i915_request_put(prev);
- err = -EINVAL;
- goto fini;
- }
-
- if (rq->fence.error) {
- pr_err("Fence error status not zero [%d] after unrelated reset\n",
- rq->fence.error);
- i915_request_put(rq);
- i915_request_put(prev);
- err = -EINVAL;
- goto fini;
- }
-
- if (i915_reset_count(&i915->gpu_error) == reset_count) {
- pr_err("No GPU reset recorded!\n");
- i915_request_put(rq);
- i915_request_put(prev);
- err = -EINVAL;
- goto fini;
- }
-
- i915_request_put(prev);
- prev = rq;
- count++;
- } while (time_before(jiffies, end_time));
- pr_info("%s: Completed %d resets\n", engine->name, count);
-
- *h.batch = MI_BATCH_BUFFER_END;
- i915_gem_chipset_flush(i915);
-
- i915_request_put(prev);
-
- err = igt_flush_test(i915, I915_WAIT_LOCKED);
- if (err)
- break;
- }
-
-fini:
- hang_fini(&h);
-unlock:
- mutex_unlock(&i915->drm.struct_mutex);
- igt_global_reset_unlock(i915);
-
- if (i915_reset_failed(i915))
- return -EIO;
-
- return err;
-}
-
-static int igt_handle_error(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct intel_engine_cs *engine = i915->engine[RCS0];
- struct hang h;
- struct i915_request *rq;
- struct i915_gpu_state *error;
- int err;
-
- /* Check that we can issue a global GPU and engine reset */
-
- if (!intel_has_reset_engine(i915))
- return 0;
-
- if (!engine || !intel_engine_can_store_dword(engine))
- return 0;
-
- mutex_lock(&i915->drm.struct_mutex);
-
- err = hang_init(&h, i915);
- if (err)
- goto err_unlock;
-
- rq = hang_create_request(&h, engine);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto err_fini;
- }
-
- i915_request_get(rq);
- i915_request_add(rq);
-
- if (!wait_until_running(&h, rq)) {
- struct drm_printer p = drm_info_printer(i915->drm.dev);
-
- pr_err("%s: Failed to start request %llx, at %x\n",
- __func__, rq->fence.seqno, hws_seqno(&h, rq));
- intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
-
- i915_gem_set_wedged(i915);
-
- err = -EIO;
- goto err_request;
- }
-
- mutex_unlock(&i915->drm.struct_mutex);
-
- /* Temporarily disable error capture */
- error = xchg(&i915->gpu_error.first_error, (void *)-1);
-
- i915_handle_error(i915, engine->mask, 0, NULL);
-
- xchg(&i915->gpu_error.first_error, error);
-
- mutex_lock(&i915->drm.struct_mutex);
-
- if (rq->fence.error != -EIO) {
- pr_err("Guilty request not identified!\n");
- err = -EINVAL;
- goto err_request;
- }
-
-err_request:
- i915_request_put(rq);
-err_fini:
- hang_fini(&h);
-err_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
- return err;
-}
-
-static void __preempt_begin(void)
-{
- preempt_disable();
-}
-
-static void __preempt_end(void)
-{
- preempt_enable();
-}
-
-static void __softirq_begin(void)
-{
- local_bh_disable();
-}
-
-static void __softirq_end(void)
-{
- local_bh_enable();
-}
-
-static void __hardirq_begin(void)
-{
- local_irq_disable();
-}
-
-static void __hardirq_end(void)
-{
- local_irq_enable();
-}
-
-struct atomic_section {
- const char *name;
- void (*critical_section_begin)(void);
- void (*critical_section_end)(void);
-};
-
-static int __igt_atomic_reset_engine(struct intel_engine_cs *engine,
- const struct atomic_section *p,
- const char *mode)
-{
- struct tasklet_struct * const t = &engine->execlists.tasklet;
- int err;
-
- GEM_TRACE("i915_reset_engine(%s:%s) under %s\n",
- engine->name, mode, p->name);
-
- tasklet_disable_nosync(t);
- p->critical_section_begin();
-
- err = i915_reset_engine(engine, NULL);
-
- p->critical_section_end();
- tasklet_enable(t);
-
- if (err)
- pr_err("i915_reset_engine(%s:%s) failed under %s\n",
- engine->name, mode, p->name);
-
- return err;
-}
-
-static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
- const struct atomic_section *p)
-{
- struct drm_i915_private *i915 = engine->i915;
- struct i915_request *rq;
- struct hang h;
- int err;
-
- err = __igt_atomic_reset_engine(engine, p, "idle");
- if (err)
- return err;
-
- err = hang_init(&h, i915);
- if (err)
- return err;
-
- rq = hang_create_request(&h, engine);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto out;
- }
-
- i915_request_get(rq);
- i915_request_add(rq);
-
- if (wait_until_running(&h, rq)) {
- err = __igt_atomic_reset_engine(engine, p, "active");
- } else {
- pr_err("%s(%s): Failed to start request %llx, at %x\n",
- __func__, engine->name,
- rq->fence.seqno, hws_seqno(&h, rq));
- i915_gem_set_wedged(i915);
- err = -EIO;
- }
-
- if (err == 0) {
- struct igt_wedge_me w;
-
- igt_wedge_on_timeout(&w, i915, HZ / 20 /* 50ms timeout*/)
- i915_request_wait(rq,
- I915_WAIT_LOCKED,
- MAX_SCHEDULE_TIMEOUT);
- if (i915_reset_failed(i915))
- err = -EIO;
- }
-
- i915_request_put(rq);
-out:
- hang_fini(&h);
- return err;
-}
-
-static void force_reset(struct drm_i915_private *i915)
-{
- i915_gem_set_wedged(i915);
- i915_reset(i915, 0, NULL);
-}
-
-static int igt_atomic_reset(void *arg)
-{
- static const struct atomic_section phases[] = {
- { "preempt", __preempt_begin, __preempt_end },
- { "softirq", __softirq_begin, __softirq_end },
- { "hardirq", __hardirq_begin, __hardirq_end },
- { }
- };
- struct drm_i915_private *i915 = arg;
- intel_wakeref_t wakeref;
- int err = 0;
-
- /* Check that the resets are usable from atomic context */
-
- igt_global_reset_lock(i915);
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(i915);
-
- /* Flush any requests before we get started and check basics */
- force_reset(i915);
- if (i915_reset_failed(i915))
- goto unlock;
-
- if (intel_has_gpu_reset(i915)) {
- const typeof(*phases) *p;
-
- for (p = phases; p->name; p++) {
- GEM_TRACE("intel_gpu_reset under %s\n", p->name);
-
- p->critical_section_begin();
- err = intel_gpu_reset(i915, ALL_ENGINES);
- p->critical_section_end();
-
- if (err) {
- pr_err("intel_gpu_reset failed under %s\n",
- p->name);
- goto out;
- }
- }
-
- force_reset(i915);
- }
-
- if (USES_GUC_SUBMISSION(i915))
- goto unlock;
-
- if (intel_has_reset_engine(i915)) {
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- for_each_engine(engine, i915, id) {
- const typeof(*phases) *p;
-
- for (p = phases; p->name; p++) {
- err = igt_atomic_reset_engine(engine, p);
- if (err)
- goto out;
- }
- }
- }
-
-out:
- /* As we poke around the guts, do a full reset before continuing. */
- force_reset(i915);
-
-unlock:
- intel_runtime_pm_put(i915, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
- igt_global_reset_unlock(i915);
-
- return err;
-}
-
-int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
-{
- static const struct i915_subtest tests[] = {
- SUBTEST(igt_global_reset), /* attempt to recover GPU first */
- SUBTEST(igt_wedged_reset),
- SUBTEST(igt_hang_sanitycheck),
- SUBTEST(igt_reset_nop),
- SUBTEST(igt_reset_nop_engine),
- SUBTEST(igt_reset_idle_engine),
- SUBTEST(igt_reset_active_engine),
- SUBTEST(igt_reset_engines),
- SUBTEST(igt_reset_queue),
- SUBTEST(igt_reset_wait),
- SUBTEST(igt_reset_evict_ggtt),
- SUBTEST(igt_reset_evict_ppgtt),
- SUBTEST(igt_reset_evict_fence),
- SUBTEST(igt_handle_error),
- SUBTEST(igt_atomic_reset),
- };
- intel_wakeref_t wakeref;
- bool saved_hangcheck;
- int err;
-
- if (!intel_has_gpu_reset(i915))
- return 0;
-
- if (i915_terminally_wedged(i915))
- return -EIO; /* we're long past hope of a successful reset */
-
- wakeref = intel_runtime_pm_get(i915);
- saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
- drain_delayed_work(&i915->gpu_error.hangcheck_work); /* flush param */
-
- err = i915_subtests(tests, i915);
-
- mutex_lock(&i915->drm.struct_mutex);
- igt_flush_test(i915, I915_WAIT_LOCKED);
- mutex_unlock(&i915->drm.struct_mutex);
-
- i915_modparams.enable_hangcheck = saved_hangcheck;
- intel_runtime_pm_put(i915, wakeref);
-
- return err;
-}
+++ /dev/null
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2018 Intel Corporation
- */
-
-#include <linux/prime_numbers.h>
-
-#include "../i915_reset.h"
-
-#include "../i915_selftest.h"
-#include "igt_flush_test.h"
-#include "igt_live_test.h"
-#include "igt_spinner.h"
-#include "i915_random.h"
-
-#include "mock_context.h"
-
-static int live_sanitycheck(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct intel_engine_cs *engine;
- struct i915_gem_context *ctx;
- enum intel_engine_id id;
- struct igt_spinner spin;
- intel_wakeref_t wakeref;
- int err = -ENOMEM;
-
- if (!HAS_LOGICAL_RING_CONTEXTS(i915))
- return 0;
-
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(i915);
-
- if (igt_spinner_init(&spin, i915))
- goto err_unlock;
-
- ctx = kernel_context(i915);
- if (!ctx)
- goto err_spin;
-
- for_each_engine(engine, i915, id) {
- struct i915_request *rq;
-
- rq = igt_spinner_create_request(&spin, ctx, engine, MI_NOOP);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto err_ctx;
- }
-
- i915_request_add(rq);
- if (!igt_wait_for_spinner(&spin, rq)) {
- GEM_TRACE("spinner failed to start\n");
- GEM_TRACE_DUMP();
- i915_gem_set_wedged(i915);
- err = -EIO;
- goto err_ctx;
- }
-
- igt_spinner_end(&spin);
- if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
- err = -EIO;
- goto err_ctx;
- }
- }
-
- err = 0;
-err_ctx:
- kernel_context_close(ctx);
-err_spin:
- igt_spinner_fini(&spin);
-err_unlock:
- igt_flush_test(i915, I915_WAIT_LOCKED);
- intel_runtime_pm_put(i915, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
- return err;
-}
-
-static int live_busywait_preempt(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct i915_gem_context *ctx_hi, *ctx_lo;
- struct intel_engine_cs *engine;
- struct drm_i915_gem_object *obj;
- struct i915_vma *vma;
- enum intel_engine_id id;
- intel_wakeref_t wakeref;
- int err = -ENOMEM;
- u32 *map;
-
- /*
- * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
- * preempt the busywaits used to synchronise between rings.
- */
-
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(i915);
-
- ctx_hi = kernel_context(i915);
- if (!ctx_hi)
- goto err_unlock;
- ctx_hi->sched.priority = INT_MAX;
-
- ctx_lo = kernel_context(i915);
- if (!ctx_lo)
- goto err_ctx_hi;
- ctx_lo->sched.priority = INT_MIN;
-
- obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
- if (IS_ERR(obj)) {
- err = PTR_ERR(obj);
- goto err_ctx_lo;
- }
-
- map = i915_gem_object_pin_map(obj, I915_MAP_WC);
- if (IS_ERR(map)) {
- err = PTR_ERR(map);
- goto err_obj;
- }
-
- vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto err_map;
- }
-
- err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
- if (err)
- goto err_map;
-
- for_each_engine(engine, i915, id) {
- struct i915_request *lo, *hi;
- struct igt_live_test t;
- u32 *cs;
-
- if (!intel_engine_can_store_dword(engine))
- continue;
-
- if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
- err = -EIO;
- goto err_vma;
- }
-
- /*
- * We create two requests. The low priority request
- * busywaits on a semaphore (inside the ringbuffer where
- * is should be preemptible) and the high priority requests
- * uses a MI_STORE_DWORD_IMM to update the semaphore value
- * allowing the first request to complete. If preemption
- * fails, we hang instead.
- */
-
- lo = i915_request_alloc(engine, ctx_lo);
- if (IS_ERR(lo)) {
- err = PTR_ERR(lo);
- goto err_vma;
- }
-
- cs = intel_ring_begin(lo, 8);
- if (IS_ERR(cs)) {
- err = PTR_ERR(cs);
- i915_request_add(lo);
- goto err_vma;
- }
-
- *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
- *cs++ = i915_ggtt_offset(vma);
- *cs++ = 0;
- *cs++ = 1;
-
- /* XXX Do we need a flush + invalidate here? */
-
- *cs++ = MI_SEMAPHORE_WAIT |
- MI_SEMAPHORE_GLOBAL_GTT |
- MI_SEMAPHORE_POLL |
- MI_SEMAPHORE_SAD_EQ_SDD;
- *cs++ = 0;
- *cs++ = i915_ggtt_offset(vma);
- *cs++ = 0;
-
- intel_ring_advance(lo, cs);
- i915_request_add(lo);
-
- if (wait_for(READ_ONCE(*map), 10)) {
- err = -ETIMEDOUT;
- goto err_vma;
- }
-
- /* Low priority request should be busywaiting now */
- if (i915_request_wait(lo, I915_WAIT_LOCKED, 1) != -ETIME) {
- pr_err("%s: Busywaiting request did not!\n",
- engine->name);
- err = -EIO;
- goto err_vma;
- }
-
- hi = i915_request_alloc(engine, ctx_hi);
- if (IS_ERR(hi)) {
- err = PTR_ERR(hi);
- goto err_vma;
- }
-
- cs = intel_ring_begin(hi, 4);
- if (IS_ERR(cs)) {
- err = PTR_ERR(cs);
- i915_request_add(hi);
- goto err_vma;
- }
-
- *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
- *cs++ = i915_ggtt_offset(vma);
- *cs++ = 0;
- *cs++ = 0;
-
- intel_ring_advance(hi, cs);
- i915_request_add(hi);
-
- if (i915_request_wait(lo, I915_WAIT_LOCKED, HZ / 5) < 0) {
- struct drm_printer p = drm_info_printer(i915->drm.dev);
-
- pr_err("%s: Failed to preempt semaphore busywait!\n",
- engine->name);
-
- intel_engine_dump(engine, &p, "%s\n", engine->name);
- GEM_TRACE_DUMP();
-
- i915_gem_set_wedged(i915);
- err = -EIO;
- goto err_vma;
- }
- GEM_BUG_ON(READ_ONCE(*map));
-
- if (igt_live_test_end(&t)) {
- err = -EIO;
- goto err_vma;
- }
- }
-
- err = 0;
-err_vma:
- i915_vma_unpin(vma);
-err_map:
- i915_gem_object_unpin_map(obj);
-err_obj:
- i915_gem_object_put(obj);
-err_ctx_lo:
- kernel_context_close(ctx_lo);
-err_ctx_hi:
- kernel_context_close(ctx_hi);
-err_unlock:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
- err = -EIO;
- intel_runtime_pm_put(i915, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
- return err;
-}
-
-static int live_preempt(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct i915_gem_context *ctx_hi, *ctx_lo;
- struct igt_spinner spin_hi, spin_lo;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- intel_wakeref_t wakeref;
- int err = -ENOMEM;
-
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
- return 0;
-
- if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
- pr_err("Logical preemption supported, but not exposed\n");
-
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(i915);
-
- if (igt_spinner_init(&spin_hi, i915))
- goto err_unlock;
-
- if (igt_spinner_init(&spin_lo, i915))
- goto err_spin_hi;
-
- ctx_hi = kernel_context(i915);
- if (!ctx_hi)
- goto err_spin_lo;
- ctx_hi->sched.priority =
- I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
-
- ctx_lo = kernel_context(i915);
- if (!ctx_lo)
- goto err_ctx_hi;
- ctx_lo->sched.priority =
- I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
-
- for_each_engine(engine, i915, id) {
- struct igt_live_test t;
- struct i915_request *rq;
-
- if (!intel_engine_has_preemption(engine))
- continue;
-
- if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
- err = -EIO;
- goto err_ctx_lo;
- }
-
- rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine,
- MI_ARB_CHECK);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto err_ctx_lo;
- }
-
- i915_request_add(rq);
- if (!igt_wait_for_spinner(&spin_lo, rq)) {
- GEM_TRACE("lo spinner failed to start\n");
- GEM_TRACE_DUMP();
- i915_gem_set_wedged(i915);
- err = -EIO;
- goto err_ctx_lo;
- }
-
- rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine,
- MI_ARB_CHECK);
- if (IS_ERR(rq)) {
- igt_spinner_end(&spin_lo);
- err = PTR_ERR(rq);
- goto err_ctx_lo;
- }
-
- i915_request_add(rq);
- if (!igt_wait_for_spinner(&spin_hi, rq)) {
- GEM_TRACE("hi spinner failed to start\n");
- GEM_TRACE_DUMP();
- i915_gem_set_wedged(i915);
- err = -EIO;
- goto err_ctx_lo;
- }
-
- igt_spinner_end(&spin_hi);
- igt_spinner_end(&spin_lo);
-
- if (igt_live_test_end(&t)) {
- err = -EIO;
- goto err_ctx_lo;
- }
- }
-
- err = 0;
-err_ctx_lo:
- kernel_context_close(ctx_lo);
-err_ctx_hi:
- kernel_context_close(ctx_hi);
-err_spin_lo:
- igt_spinner_fini(&spin_lo);
-err_spin_hi:
- igt_spinner_fini(&spin_hi);
-err_unlock:
- igt_flush_test(i915, I915_WAIT_LOCKED);
- intel_runtime_pm_put(i915, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
- return err;
-}
-
-static int live_late_preempt(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct i915_gem_context *ctx_hi, *ctx_lo;
- struct igt_spinner spin_hi, spin_lo;
- struct intel_engine_cs *engine;
- struct i915_sched_attr attr = {};
- enum intel_engine_id id;
- intel_wakeref_t wakeref;
- int err = -ENOMEM;
-
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
- return 0;
-
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(i915);
-
- if (igt_spinner_init(&spin_hi, i915))
- goto err_unlock;
-
- if (igt_spinner_init(&spin_lo, i915))
- goto err_spin_hi;
-
- ctx_hi = kernel_context(i915);
- if (!ctx_hi)
- goto err_spin_lo;
-
- ctx_lo = kernel_context(i915);
- if (!ctx_lo)
- goto err_ctx_hi;
-
- for_each_engine(engine, i915, id) {
- struct igt_live_test t;
- struct i915_request *rq;
-
- if (!intel_engine_has_preemption(engine))
- continue;
-
- if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
- err = -EIO;
- goto err_ctx_lo;
- }
-
- rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine,
- MI_ARB_CHECK);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto err_ctx_lo;
- }
-
- i915_request_add(rq);
- if (!igt_wait_for_spinner(&spin_lo, rq)) {
- pr_err("First context failed to start\n");
- goto err_wedged;
- }
-
- rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine,
- MI_NOOP);
- if (IS_ERR(rq)) {
- igt_spinner_end(&spin_lo);
- err = PTR_ERR(rq);
- goto err_ctx_lo;
- }
-
- i915_request_add(rq);
- if (igt_wait_for_spinner(&spin_hi, rq)) {
- pr_err("Second context overtook first?\n");
- goto err_wedged;
- }
-
- attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
- engine->schedule(rq, &attr);
-
- if (!igt_wait_for_spinner(&spin_hi, rq)) {
- pr_err("High priority context failed to preempt the low priority context\n");
- GEM_TRACE_DUMP();
- goto err_wedged;
- }
-
- igt_spinner_end(&spin_hi);
- igt_spinner_end(&spin_lo);
-
- if (igt_live_test_end(&t)) {
- err = -EIO;
- goto err_ctx_lo;
- }
- }
-
- err = 0;
-err_ctx_lo:
- kernel_context_close(ctx_lo);
-err_ctx_hi:
- kernel_context_close(ctx_hi);
-err_spin_lo:
- igt_spinner_fini(&spin_lo);
-err_spin_hi:
- igt_spinner_fini(&spin_hi);
-err_unlock:
- igt_flush_test(i915, I915_WAIT_LOCKED);
- intel_runtime_pm_put(i915, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
- return err;
-
-err_wedged:
- igt_spinner_end(&spin_hi);
- igt_spinner_end(&spin_lo);
- i915_gem_set_wedged(i915);
- err = -EIO;
- goto err_ctx_lo;
-}
-
-struct preempt_client {
- struct igt_spinner spin;
- struct i915_gem_context *ctx;
-};
-
-static int preempt_client_init(struct drm_i915_private *i915,
- struct preempt_client *c)
-{
- c->ctx = kernel_context(i915);
- if (!c->ctx)
- return -ENOMEM;
-
- if (igt_spinner_init(&c->spin, i915))
- goto err_ctx;
-
- return 0;
-
-err_ctx:
- kernel_context_close(c->ctx);
- return -ENOMEM;
-}
-
-static void preempt_client_fini(struct preempt_client *c)
-{
- igt_spinner_fini(&c->spin);
- kernel_context_close(c->ctx);
-}
-
-static int live_suppress_self_preempt(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct intel_engine_cs *engine;
- struct i915_sched_attr attr = {
- .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
- };
- struct preempt_client a, b;
- enum intel_engine_id id;
- intel_wakeref_t wakeref;
- int err = -ENOMEM;
-
- /*
- * Verify that if a preemption request does not cause a change in
- * the current execution order, the preempt-to-idle injection is
- * skipped and that we do not accidentally apply it after the CS
- * completion event.
- */
-
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
- return 0;
-
- if (USES_GUC_SUBMISSION(i915))
- return 0; /* presume black blox */
-
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(i915);
-
- if (preempt_client_init(i915, &a))
- goto err_unlock;
- if (preempt_client_init(i915, &b))
- goto err_client_a;
-
- for_each_engine(engine, i915, id) {
- struct i915_request *rq_a, *rq_b;
- int depth;
-
- if (!intel_engine_has_preemption(engine))
- continue;
-
- engine->execlists.preempt_hang.count = 0;
-
- rq_a = igt_spinner_create_request(&a.spin,
- a.ctx, engine,
- MI_NOOP);
- if (IS_ERR(rq_a)) {
- err = PTR_ERR(rq_a);
- goto err_client_b;
- }
-
- i915_request_add(rq_a);
- if (!igt_wait_for_spinner(&a.spin, rq_a)) {
- pr_err("First client failed to start\n");
- goto err_wedged;
- }
-
- for (depth = 0; depth < 8; depth++) {
- rq_b = igt_spinner_create_request(&b.spin,
- b.ctx, engine,
- MI_NOOP);
- if (IS_ERR(rq_b)) {
- err = PTR_ERR(rq_b);
- goto err_client_b;
- }
- i915_request_add(rq_b);
-
- GEM_BUG_ON(i915_request_completed(rq_a));
- engine->schedule(rq_a, &attr);
- igt_spinner_end(&a.spin);
-
- if (!igt_wait_for_spinner(&b.spin, rq_b)) {
- pr_err("Second client failed to start\n");
- goto err_wedged;
- }
-
- swap(a, b);
- rq_a = rq_b;
- }
- igt_spinner_end(&a.spin);
-
- if (engine->execlists.preempt_hang.count) {
- pr_err("Preemption recorded x%d, depth %d; should have been suppressed!\n",
- engine->execlists.preempt_hang.count,
- depth);
- err = -EINVAL;
- goto err_client_b;
- }
-
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
- goto err_wedged;
- }
-
- err = 0;
-err_client_b:
- preempt_client_fini(&b);
-err_client_a:
- preempt_client_fini(&a);
-err_unlock:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
- err = -EIO;
- intel_runtime_pm_put(i915, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
- return err;
-
-err_wedged:
- igt_spinner_end(&b.spin);
- igt_spinner_end(&a.spin);
- i915_gem_set_wedged(i915);
- err = -EIO;
- goto err_client_b;
-}
-
-static int __i915_sw_fence_call
-dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
-{
- return NOTIFY_DONE;
-}
-
-static struct i915_request *dummy_request(struct intel_engine_cs *engine)
-{
- struct i915_request *rq;
-
- rq = kzalloc(sizeof(*rq), GFP_KERNEL);
- if (!rq)
- return NULL;
-
- INIT_LIST_HEAD(&rq->active_list);
- rq->engine = engine;
-
- i915_sched_node_init(&rq->sched);
-
- /* mark this request as permanently incomplete */
- rq->fence.seqno = 1;
- BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
- rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
- GEM_BUG_ON(i915_request_completed(rq));
-
- i915_sw_fence_init(&rq->submit, dummy_notify);
- i915_sw_fence_commit(&rq->submit);
-
- return rq;
-}
-
-static void dummy_request_free(struct i915_request *dummy)
-{
- i915_request_mark_complete(dummy);
- i915_sched_node_fini(&dummy->sched);
- i915_sw_fence_fini(&dummy->submit);
-
- dma_fence_free(&dummy->fence);
-}
-
-static int live_suppress_wait_preempt(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct preempt_client client[4];
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- intel_wakeref_t wakeref;
- int err = -ENOMEM;
- int i;
-
- /*
- * Waiters are given a little priority nudge, but not enough
- * to actually cause any preemption. Double check that we do
- * not needlessly generate preempt-to-idle cycles.
- */
-
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
- return 0;
-
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(i915);
-
- if (preempt_client_init(i915, &client[0])) /* ELSP[0] */
- goto err_unlock;
- if (preempt_client_init(i915, &client[1])) /* ELSP[1] */
- goto err_client_0;
- if (preempt_client_init(i915, &client[2])) /* head of queue */
- goto err_client_1;
- if (preempt_client_init(i915, &client[3])) /* bystander */
- goto err_client_2;
-
- for_each_engine(engine, i915, id) {
- int depth;
-
- if (!intel_engine_has_preemption(engine))
- continue;
-
- if (!engine->emit_init_breadcrumb)
- continue;
-
- for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
- struct i915_request *rq[ARRAY_SIZE(client)];
- struct i915_request *dummy;
-
- engine->execlists.preempt_hang.count = 0;
-
- dummy = dummy_request(engine);
- if (!dummy)
- goto err_client_3;
-
- for (i = 0; i < ARRAY_SIZE(client); i++) {
- rq[i] = igt_spinner_create_request(&client[i].spin,
- client[i].ctx, engine,
- MI_NOOP);
- if (IS_ERR(rq[i])) {
- err = PTR_ERR(rq[i]);
- goto err_wedged;
- }
-
- /* Disable NEWCLIENT promotion */
- __i915_active_request_set(&rq[i]->timeline->last_request,
- dummy);
- i915_request_add(rq[i]);
- }
-
- dummy_request_free(dummy);
-
- GEM_BUG_ON(i915_request_completed(rq[0]));
- if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
- pr_err("%s: First client failed to start\n",
- engine->name);
- goto err_wedged;
- }
- GEM_BUG_ON(!i915_request_started(rq[0]));
-
- if (i915_request_wait(rq[depth],
- I915_WAIT_LOCKED |
- I915_WAIT_PRIORITY,
- 1) != -ETIME) {
- pr_err("%s: Waiter depth:%d completed!\n",
- engine->name, depth);
- goto err_wedged;
- }
-
- for (i = 0; i < ARRAY_SIZE(client); i++)
- igt_spinner_end(&client[i].spin);
-
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
- goto err_wedged;
-
- if (engine->execlists.preempt_hang.count) {
- pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
- engine->name,
- engine->execlists.preempt_hang.count,
- depth);
- err = -EINVAL;
- goto err_client_3;
- }
- }
- }
-
- err = 0;
-err_client_3:
- preempt_client_fini(&client[3]);
-err_client_2:
- preempt_client_fini(&client[2]);
-err_client_1:
- preempt_client_fini(&client[1]);
-err_client_0:
- preempt_client_fini(&client[0]);
-err_unlock:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
- err = -EIO;
- intel_runtime_pm_put(i915, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
- return err;
-
-err_wedged:
- for (i = 0; i < ARRAY_SIZE(client); i++)
- igt_spinner_end(&client[i].spin);
- i915_gem_set_wedged(i915);
- err = -EIO;
- goto err_client_3;
-}
-
-static int live_chain_preempt(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct intel_engine_cs *engine;
- struct preempt_client hi, lo;
- enum intel_engine_id id;
- intel_wakeref_t wakeref;
- int err = -ENOMEM;
-
- /*
- * Build a chain AB...BA between two contexts (A, B) and request
- * preemption of the last request. It should then complete before
- * the previously submitted spinner in B.
- */
-
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
- return 0;
-
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(i915);
-
- if (preempt_client_init(i915, &hi))
- goto err_unlock;
-
- if (preempt_client_init(i915, &lo))
- goto err_client_hi;
-
- for_each_engine(engine, i915, id) {
- struct i915_sched_attr attr = {
- .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
- };
- struct igt_live_test t;
- struct i915_request *rq;
- int ring_size, count, i;
-
- if (!intel_engine_has_preemption(engine))
- continue;
-
- rq = igt_spinner_create_request(&lo.spin,
- lo.ctx, engine,
- MI_ARB_CHECK);
- if (IS_ERR(rq))
- goto err_wedged;
- i915_request_add(rq);
-
- ring_size = rq->wa_tail - rq->head;
- if (ring_size < 0)
- ring_size += rq->ring->size;
- ring_size = rq->ring->size / ring_size;
- pr_debug("%s(%s): Using maximum of %d requests\n",
- __func__, engine->name, ring_size);
-
- igt_spinner_end(&lo.spin);
- if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 2) < 0) {
- pr_err("Timed out waiting to flush %s\n", engine->name);
- goto err_wedged;
- }
-
- if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
- err = -EIO;
- goto err_wedged;
- }
-
- for_each_prime_number_from(count, 1, ring_size) {
- rq = igt_spinner_create_request(&hi.spin,
- hi.ctx, engine,
- MI_ARB_CHECK);
- if (IS_ERR(rq))
- goto err_wedged;
- i915_request_add(rq);
- if (!igt_wait_for_spinner(&hi.spin, rq))
- goto err_wedged;
-
- rq = igt_spinner_create_request(&lo.spin,
- lo.ctx, engine,
- MI_ARB_CHECK);
- if (IS_ERR(rq))
- goto err_wedged;
- i915_request_add(rq);
-
- for (i = 0; i < count; i++) {
- rq = i915_request_alloc(engine, lo.ctx);
- if (IS_ERR(rq))
- goto err_wedged;
- i915_request_add(rq);
- }
-
- rq = i915_request_alloc(engine, hi.ctx);
- if (IS_ERR(rq))
- goto err_wedged;
- i915_request_add(rq);
- engine->schedule(rq, &attr);
-
- igt_spinner_end(&hi.spin);
- if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {
- struct drm_printer p =
- drm_info_printer(i915->drm.dev);
-
- pr_err("Failed to preempt over chain of %d\n",
- count);
- intel_engine_dump(engine, &p,
- "%s\n", engine->name);
- goto err_wedged;
- }
- igt_spinner_end(&lo.spin);
-
- rq = i915_request_alloc(engine, lo.ctx);
- if (IS_ERR(rq))
- goto err_wedged;
- i915_request_add(rq);
- if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {
- struct drm_printer p =
- drm_info_printer(i915->drm.dev);
-
- pr_err("Failed to flush low priority chain of %d requests\n",
- count);
- intel_engine_dump(engine, &p,
- "%s\n", engine->name);
- goto err_wedged;
- }
- }
-
- if (igt_live_test_end(&t)) {
- err = -EIO;
- goto err_wedged;
- }
- }
-
- err = 0;
-err_client_lo:
- preempt_client_fini(&lo);
-err_client_hi:
- preempt_client_fini(&hi);
-err_unlock:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
- err = -EIO;
- intel_runtime_pm_put(i915, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
- return err;
-
-err_wedged:
- igt_spinner_end(&hi.spin);
- igt_spinner_end(&lo.spin);
- i915_gem_set_wedged(i915);
- err = -EIO;
- goto err_client_lo;
-}
-
-static int live_preempt_hang(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct i915_gem_context *ctx_hi, *ctx_lo;
- struct igt_spinner spin_hi, spin_lo;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- intel_wakeref_t wakeref;
- int err = -ENOMEM;
-
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
- return 0;
-
- if (!intel_has_reset_engine(i915))
- return 0;
-
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(i915);
-
- if (igt_spinner_init(&spin_hi, i915))
- goto err_unlock;
-
- if (igt_spinner_init(&spin_lo, i915))
- goto err_spin_hi;
-
- ctx_hi = kernel_context(i915);
- if (!ctx_hi)
- goto err_spin_lo;
- ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
-
- ctx_lo = kernel_context(i915);
- if (!ctx_lo)
- goto err_ctx_hi;
- ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
-
- for_each_engine(engine, i915, id) {
- struct i915_request *rq;
-
- if (!intel_engine_has_preemption(engine))
- continue;
-
- rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine,
- MI_ARB_CHECK);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto err_ctx_lo;
- }
-
- i915_request_add(rq);
- if (!igt_wait_for_spinner(&spin_lo, rq)) {
- GEM_TRACE("lo spinner failed to start\n");
- GEM_TRACE_DUMP();
- i915_gem_set_wedged(i915);
- err = -EIO;
- goto err_ctx_lo;
- }
-
- rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine,
- MI_ARB_CHECK);
- if (IS_ERR(rq)) {
- igt_spinner_end(&spin_lo);
- err = PTR_ERR(rq);
- goto err_ctx_lo;
- }
-
- init_completion(&engine->execlists.preempt_hang.completion);
- engine->execlists.preempt_hang.inject_hang = true;
-
- i915_request_add(rq);
-
- if (!wait_for_completion_timeout(&engine->execlists.preempt_hang.completion,
- HZ / 10)) {
- pr_err("Preemption did not occur within timeout!");
- GEM_TRACE_DUMP();
- i915_gem_set_wedged(i915);
- err = -EIO;
- goto err_ctx_lo;
- }
-
- set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
- i915_reset_engine(engine, NULL);
- clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
-
- engine->execlists.preempt_hang.inject_hang = false;
-
- if (!igt_wait_for_spinner(&spin_hi, rq)) {
- GEM_TRACE("hi spinner failed to start\n");
- GEM_TRACE_DUMP();
- i915_gem_set_wedged(i915);
- err = -EIO;
- goto err_ctx_lo;
- }
-
- igt_spinner_end(&spin_hi);
- igt_spinner_end(&spin_lo);
- if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
- err = -EIO;
- goto err_ctx_lo;
- }
- }
-
- err = 0;
-err_ctx_lo:
- kernel_context_close(ctx_lo);
-err_ctx_hi:
- kernel_context_close(ctx_hi);
-err_spin_lo:
- igt_spinner_fini(&spin_lo);
-err_spin_hi:
- igt_spinner_fini(&spin_hi);
-err_unlock:
- igt_flush_test(i915, I915_WAIT_LOCKED);
- intel_runtime_pm_put(i915, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
- return err;
-}
-
-static int random_range(struct rnd_state *rnd, int min, int max)
-{
- return i915_prandom_u32_max_state(max - min, rnd) + min;
-}
-
-static int random_priority(struct rnd_state *rnd)
-{
- return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
-}
-
-struct preempt_smoke {
- struct drm_i915_private *i915;
- struct i915_gem_context **contexts;
- struct intel_engine_cs *engine;
- struct drm_i915_gem_object *batch;
- unsigned int ncontext;
- struct rnd_state prng;
- unsigned long count;
-};
-
-static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
-{
- return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
- &smoke->prng)];
-}
-
-static int smoke_submit(struct preempt_smoke *smoke,
- struct i915_gem_context *ctx, int prio,
- struct drm_i915_gem_object *batch)
-{
- struct i915_request *rq;
- struct i915_vma *vma = NULL;
- int err = 0;
-
- if (batch) {
- vma = i915_vma_instance(batch, &ctx->ppgtt->vm, NULL);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
-
- err = i915_vma_pin(vma, 0, 0, PIN_USER);
- if (err)
- return err;
- }
-
- ctx->sched.priority = prio;
-
- rq = i915_request_alloc(smoke->engine, ctx);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto unpin;
- }
-
- if (vma) {
- err = rq->engine->emit_bb_start(rq,
- vma->node.start,
- PAGE_SIZE, 0);
- if (!err)
- err = i915_vma_move_to_active(vma, rq, 0);
- }
-
- i915_request_add(rq);
-
-unpin:
- if (vma)
- i915_vma_unpin(vma);
-
- return err;
-}
-
-static int smoke_crescendo_thread(void *arg)
-{
- struct preempt_smoke *smoke = arg;
- IGT_TIMEOUT(end_time);
- unsigned long count;
-
- count = 0;
- do {
- struct i915_gem_context *ctx = smoke_context(smoke);
- int err;
-
- mutex_lock(&smoke->i915->drm.struct_mutex);
- err = smoke_submit(smoke,
- ctx, count % I915_PRIORITY_MAX,
- smoke->batch);
- mutex_unlock(&smoke->i915->drm.struct_mutex);
- if (err)
- return err;
-
- count++;
- } while (!__igt_timeout(end_time, NULL));
-
- smoke->count = count;
- return 0;
-}
-
-static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
-#define BATCH BIT(0)
-{
- struct task_struct *tsk[I915_NUM_ENGINES] = {};
- struct preempt_smoke arg[I915_NUM_ENGINES];
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- unsigned long count;
- int err = 0;
-
- mutex_unlock(&smoke->i915->drm.struct_mutex);
-
- for_each_engine(engine, smoke->i915, id) {
- arg[id] = *smoke;
- arg[id].engine = engine;
- if (!(flags & BATCH))
- arg[id].batch = NULL;
- arg[id].count = 0;
-
- tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
- "igt/smoke:%d", id);
- if (IS_ERR(tsk[id])) {
- err = PTR_ERR(tsk[id]);
- break;
- }
- get_task_struct(tsk[id]);
- }
-
- count = 0;
- for_each_engine(engine, smoke->i915, id) {
- int status;
-
- if (IS_ERR_OR_NULL(tsk[id]))
- continue;
-
- status = kthread_stop(tsk[id]);
- if (status && !err)
- err = status;
-
- count += arg[id].count;
-
- put_task_struct(tsk[id]);
- }
-
- mutex_lock(&smoke->i915->drm.struct_mutex);
-
- pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
- count, flags,
- RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
- return 0;
-}
-
-static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
-{
- enum intel_engine_id id;
- IGT_TIMEOUT(end_time);
- unsigned long count;
-
- count = 0;
- do {
- for_each_engine(smoke->engine, smoke->i915, id) {
- struct i915_gem_context *ctx = smoke_context(smoke);
- int err;
-
- err = smoke_submit(smoke,
- ctx, random_priority(&smoke->prng),
- flags & BATCH ? smoke->batch : NULL);
- if (err)
- return err;
-
- count++;
- }
- } while (!__igt_timeout(end_time, NULL));
-
- pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
- count, flags,
- RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
- return 0;
-}
-
-static int live_preempt_smoke(void *arg)
-{
- struct preempt_smoke smoke = {
- .i915 = arg,
- .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
- .ncontext = 1024,
- };
- const unsigned int phase[] = { 0, BATCH };
- intel_wakeref_t wakeref;
- struct igt_live_test t;
- int err = -ENOMEM;
- u32 *cs;
- int n;
-
- if (!HAS_LOGICAL_RING_PREEMPTION(smoke.i915))
- return 0;
-
- smoke.contexts = kmalloc_array(smoke.ncontext,
- sizeof(*smoke.contexts),
- GFP_KERNEL);
- if (!smoke.contexts)
- return -ENOMEM;
-
- mutex_lock(&smoke.i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(smoke.i915);
-
- smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE);
- if (IS_ERR(smoke.batch)) {
- err = PTR_ERR(smoke.batch);
- goto err_unlock;
- }
-
- cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
- if (IS_ERR(cs)) {
- err = PTR_ERR(cs);
- goto err_batch;
- }
- for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
- cs[n] = MI_ARB_CHECK;
- cs[n] = MI_BATCH_BUFFER_END;
- i915_gem_object_flush_map(smoke.batch);
- i915_gem_object_unpin_map(smoke.batch);
-
- if (igt_live_test_begin(&t, smoke.i915, __func__, "all")) {
- err = -EIO;
- goto err_batch;
- }
-
- for (n = 0; n < smoke.ncontext; n++) {
- smoke.contexts[n] = kernel_context(smoke.i915);
- if (!smoke.contexts[n])
- goto err_ctx;
- }
-
- for (n = 0; n < ARRAY_SIZE(phase); n++) {
- err = smoke_crescendo(&smoke, phase[n]);
- if (err)
- goto err_ctx;
-
- err = smoke_random(&smoke, phase[n]);
- if (err)
- goto err_ctx;
- }
-
-err_ctx:
- if (igt_live_test_end(&t))
- err = -EIO;
-
- for (n = 0; n < smoke.ncontext; n++) {
- if (!smoke.contexts[n])
- break;
- kernel_context_close(smoke.contexts[n]);
- }
-
-err_batch:
- i915_gem_object_put(smoke.batch);
-err_unlock:
- intel_runtime_pm_put(smoke.i915, wakeref);
- mutex_unlock(&smoke.i915->drm.struct_mutex);
- kfree(smoke.contexts);
-
- return err;
-}
-
-int intel_execlists_live_selftests(struct drm_i915_private *i915)
-{
- static const struct i915_subtest tests[] = {
- SUBTEST(live_sanitycheck),
- SUBTEST(live_busywait_preempt),
- SUBTEST(live_preempt),
- SUBTEST(live_late_preempt),
- SUBTEST(live_suppress_self_preempt),
- SUBTEST(live_suppress_wait_preempt),
- SUBTEST(live_chain_preempt),
- SUBTEST(live_preempt_hang),
- SUBTEST(live_preempt_smoke),
- };
-
- if (!HAS_EXECLISTS(i915))
- return 0;
-
- if (i915_terminally_wedged(i915))
- return 0;
-
- return i915_subtests(tests, i915);
-}
+++ /dev/null
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2018 Intel Corporation
- */
-
-#include "../i915_selftest.h"
-#include "../i915_reset.h"
-
-#include "igt_flush_test.h"
-#include "igt_reset.h"
-#include "igt_spinner.h"
-#include "igt_wedge_me.h"
-#include "mock_context.h"
-#include "mock_drm.h"
-
-static const struct wo_register {
- enum intel_platform platform;
- u32 reg;
-} wo_registers[] = {
- { INTEL_GEMINILAKE, 0x731c }
-};
-
-#define REF_NAME_MAX (INTEL_ENGINE_CS_MAX_NAME + 4)
-struct wa_lists {
- struct i915_wa_list gt_wa_list;
- struct {
- char name[REF_NAME_MAX];
- struct i915_wa_list wa_list;
- } engine[I915_NUM_ENGINES];
-};
-
-static void
-reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- memset(lists, 0, sizeof(*lists));
-
- wa_init_start(&lists->gt_wa_list, "GT_REF");
- gt_init_workarounds(i915, &lists->gt_wa_list);
- wa_init_finish(&lists->gt_wa_list);
-
- for_each_engine(engine, i915, id) {
- struct i915_wa_list *wal = &lists->engine[id].wa_list;
- char *name = lists->engine[id].name;
-
- snprintf(name, REF_NAME_MAX, "%s_REF", engine->name);
-
- wa_init_start(wal, name);
- engine_init_workarounds(engine, wal);
- wa_init_finish(wal);
- }
-}
-
-static void
-reference_lists_fini(struct drm_i915_private *i915, struct wa_lists *lists)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- for_each_engine(engine, i915, id)
- intel_wa_list_free(&lists->engine[id].wa_list);
-
- intel_wa_list_free(&lists->gt_wa_list);
-}
-
-static struct drm_i915_gem_object *
-read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
-{
- const u32 base = engine->mmio_base;
- struct drm_i915_gem_object *result;
- intel_wakeref_t wakeref;
- struct i915_request *rq;
- struct i915_vma *vma;
- u32 srm, *cs;
- int err;
- int i;
-
- result = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
- if (IS_ERR(result))
- return result;
-
- i915_gem_object_set_cache_coherency(result, I915_CACHE_LLC);
-
- cs = i915_gem_object_pin_map(result, I915_MAP_WB);
- if (IS_ERR(cs)) {
- err = PTR_ERR(cs);
- goto err_obj;
- }
- memset(cs, 0xc5, PAGE_SIZE);
- i915_gem_object_flush_map(result);
- i915_gem_object_unpin_map(result);
-
- vma = i915_vma_instance(result, &engine->i915->ggtt.vm, NULL);
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto err_obj;
- }
-
- err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
- if (err)
- goto err_obj;
-
- rq = ERR_PTR(-ENODEV);
- with_intel_runtime_pm(engine->i915, wakeref)
- rq = i915_request_alloc(engine, ctx);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto err_pin;
- }
-
- err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
- if (err)
- goto err_req;
-
- srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
- if (INTEL_GEN(ctx->i915) >= 8)
- srm++;
-
- cs = intel_ring_begin(rq, 4 * RING_MAX_NONPRIV_SLOTS);
- if (IS_ERR(cs)) {
- err = PTR_ERR(cs);
- goto err_req;
- }
-
- for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) {
- *cs++ = srm;
- *cs++ = i915_mmio_reg_offset(RING_FORCE_TO_NONPRIV(base, i));
- *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
- *cs++ = 0;
- }
- intel_ring_advance(rq, cs);
-
- i915_gem_object_get(result);
- i915_gem_object_set_active_reference(result);
-
- i915_request_add(rq);
- i915_vma_unpin(vma);
-
- return result;
-
-err_req:
- i915_request_add(rq);
-err_pin:
- i915_vma_unpin(vma);
-err_obj:
- i915_gem_object_put(result);
- return ERR_PTR(err);
-}
-
-static u32
-get_whitelist_reg(const struct intel_engine_cs *engine, unsigned int i)
-{
- i915_reg_t reg = i < engine->whitelist.count ?
- engine->whitelist.list[i].reg :
- RING_NOPID(engine->mmio_base);
-
- return i915_mmio_reg_offset(reg);
-}
-
-static void
-print_results(const struct intel_engine_cs *engine, const u32 *results)
-{
- unsigned int i;
-
- for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) {
- u32 expected = get_whitelist_reg(engine, i);
- u32 actual = results[i];
-
- pr_info("RING_NONPRIV[%d]: expected 0x%08x, found 0x%08x\n",
- i, expected, actual);
- }
-}
-
-static int check_whitelist(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
-{
- struct drm_i915_gem_object *results;
- struct igt_wedge_me wedge;
- u32 *vaddr;
- int err;
- int i;
-
- results = read_nonprivs(ctx, engine);
- if (IS_ERR(results))
- return PTR_ERR(results);
-
- err = 0;
- igt_wedge_on_timeout(&wedge, ctx->i915, HZ / 5) /* a safety net! */
- err = i915_gem_object_set_to_cpu_domain(results, false);
- if (i915_terminally_wedged(ctx->i915))
- err = -EIO;
- if (err)
- goto out_put;
-
- vaddr = i915_gem_object_pin_map(results, I915_MAP_WB);
- if (IS_ERR(vaddr)) {
- err = PTR_ERR(vaddr);
- goto out_put;
- }
-
- for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) {
- u32 expected = get_whitelist_reg(engine, i);
- u32 actual = vaddr[i];
-
- if (expected != actual) {
- print_results(engine, vaddr);
- pr_err("Invalid RING_NONPRIV[%d], expected 0x%08x, found 0x%08x\n",
- i, expected, actual);
-
- err = -EINVAL;
- break;
- }
- }
-
- i915_gem_object_unpin_map(results);
-out_put:
- i915_gem_object_put(results);
- return err;
-}
-
-static int do_device_reset(struct intel_engine_cs *engine)
-{
- i915_reset(engine->i915, engine->mask, "live_workarounds");
- return 0;
-}
-
-static int do_engine_reset(struct intel_engine_cs *engine)
-{
- return i915_reset_engine(engine, "live_workarounds");
-}
-
-static int
-switch_to_scratch_context(struct intel_engine_cs *engine,
- struct igt_spinner *spin)
-{
- struct i915_gem_context *ctx;
- struct i915_request *rq;
- intel_wakeref_t wakeref;
- int err = 0;
-
- ctx = kernel_context(engine->i915);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- GEM_BUG_ON(i915_gem_context_is_bannable(ctx));
-
- rq = ERR_PTR(-ENODEV);
- with_intel_runtime_pm(engine->i915, wakeref)
- rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP);
-
- kernel_context_close(ctx);
-
- if (IS_ERR(rq)) {
- spin = NULL;
- err = PTR_ERR(rq);
- goto err;
- }
-
- i915_request_add(rq);
-
- if (spin && !igt_wait_for_spinner(spin, rq)) {
- pr_err("Spinner failed to start\n");
- err = -ETIMEDOUT;
- }
-
-err:
- if (err && spin)
- igt_spinner_end(spin);
-
- return err;
-}
-
-static int check_whitelist_across_reset(struct intel_engine_cs *engine,
- int (*reset)(struct intel_engine_cs *),
- const char *name)
-{
- struct drm_i915_private *i915 = engine->i915;
- struct i915_gem_context *ctx;
- struct igt_spinner spin;
- intel_wakeref_t wakeref;
- int err;
-
- pr_info("Checking %d whitelisted registers (RING_NONPRIV) [%s]\n",
- engine->whitelist.count, name);
-
- err = igt_spinner_init(&spin, i915);
- if (err)
- return err;
-
- ctx = kernel_context(i915);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- err = check_whitelist(ctx, engine);
- if (err) {
- pr_err("Invalid whitelist *before* %s reset!\n", name);
- goto out;
- }
-
- err = switch_to_scratch_context(engine, &spin);
- if (err)
- goto out;
-
- with_intel_runtime_pm(i915, wakeref)
- err = reset(engine);
-
- igt_spinner_end(&spin);
- igt_spinner_fini(&spin);
-
- if (err) {
- pr_err("%s reset failed\n", name);
- goto out;
- }
-
- err = check_whitelist(ctx, engine);
- if (err) {
- pr_err("Whitelist not preserved in context across %s reset!\n",
- name);
- goto out;
- }
-
- kernel_context_close(ctx);
-
- ctx = kernel_context(i915);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- err = check_whitelist(ctx, engine);
- if (err) {
- pr_err("Invalid whitelist *after* %s reset in fresh context!\n",
- name);
- goto out;
- }
-
-out:
- kernel_context_close(ctx);
- return err;
-}
-
-static struct i915_vma *create_batch(struct i915_gem_context *ctx)
-{
- struct drm_i915_gem_object *obj;
- struct i915_vma *vma;
- int err;
-
- obj = i915_gem_object_create_internal(ctx->i915, 16 * PAGE_SIZE);
- if (IS_ERR(obj))
- return ERR_CAST(obj);
-
- vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL);
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto err_obj;
- }
-
- err = i915_vma_pin(vma, 0, 0, PIN_USER);
- if (err)
- goto err_obj;
-
- err = i915_gem_object_set_to_wc_domain(obj, true);
- if (err)
- goto err_obj;
-
- return vma;
-
-err_obj:
- i915_gem_object_put(obj);
- return ERR_PTR(err);
-}
-
-static u32 reg_write(u32 old, u32 new, u32 rsvd)
-{
- if (rsvd == 0x0000ffff) {
- old &= ~(new >> 16);
- old |= new & (new >> 16);
- } else {
- old &= ~rsvd;
- old |= new & rsvd;
- }
-
- return old;
-}
-
-static bool wo_register(struct intel_engine_cs *engine, u32 reg)
-{
- enum intel_platform platform = INTEL_INFO(engine->i915)->platform;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(wo_registers); i++) {
- if (wo_registers[i].platform == platform &&
- wo_registers[i].reg == reg)
- return true;
- }
-
- return false;
-}
-
-static int check_dirty_whitelist(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
-{
- const u32 values[] = {
- 0x00000000,
- 0x01010101,
- 0x10100101,
- 0x03030303,
- 0x30300303,
- 0x05050505,
- 0x50500505,
- 0x0f0f0f0f,
- 0xf00ff00f,
- 0x10101010,
- 0xf0f01010,
- 0x30303030,
- 0xa0a03030,
- 0x50505050,
- 0xc0c05050,
- 0xf0f0f0f0,
- 0x11111111,
- 0x33333333,
- 0x55555555,
- 0x0000ffff,
- 0x00ff00ff,
- 0xff0000ff,
- 0xffff00ff,
- 0xffffffff,
- };
- struct i915_vma *scratch;
- struct i915_vma *batch;
- int err = 0, i, v;
- u32 *cs, *results;
-
- scratch = create_scratch(&ctx->ppgtt->vm, 2 * ARRAY_SIZE(values) + 1);
- if (IS_ERR(scratch))
- return PTR_ERR(scratch);
-
- batch = create_batch(ctx);
- if (IS_ERR(batch)) {
- err = PTR_ERR(batch);
- goto out_scratch;
- }
-
- for (i = 0; i < engine->whitelist.count; i++) {
- u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
- u64 addr = scratch->node.start;
- struct i915_request *rq;
- u32 srm, lrm, rsvd;
- u32 expect;
- int idx;
-
- if (wo_register(engine, reg))
- continue;
-
- srm = MI_STORE_REGISTER_MEM;
- lrm = MI_LOAD_REGISTER_MEM;
- if (INTEL_GEN(ctx->i915) >= 8)
- lrm++, srm++;
-
- pr_debug("%s: Writing garbage to %x\n",
- engine->name, reg);
-
- cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
- if (IS_ERR(cs)) {
- err = PTR_ERR(cs);
- goto out_batch;
- }
-
- /* SRM original */
- *cs++ = srm;
- *cs++ = reg;
- *cs++ = lower_32_bits(addr);
- *cs++ = upper_32_bits(addr);
-
- idx = 1;
- for (v = 0; v < ARRAY_SIZE(values); v++) {
- /* LRI garbage */
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = reg;
- *cs++ = values[v];
-
- /* SRM result */
- *cs++ = srm;
- *cs++ = reg;
- *cs++ = lower_32_bits(addr + sizeof(u32) * idx);
- *cs++ = upper_32_bits(addr + sizeof(u32) * idx);
- idx++;
- }
- for (v = 0; v < ARRAY_SIZE(values); v++) {
- /* LRI garbage */
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = reg;
- *cs++ = ~values[v];
-
- /* SRM result */
- *cs++ = srm;
- *cs++ = reg;
- *cs++ = lower_32_bits(addr + sizeof(u32) * idx);
- *cs++ = upper_32_bits(addr + sizeof(u32) * idx);
- idx++;
- }
- GEM_BUG_ON(idx * sizeof(u32) > scratch->size);
-
- /* LRM original -- don't leave garbage in the context! */
- *cs++ = lrm;
- *cs++ = reg;
- *cs++ = lower_32_bits(addr);
- *cs++ = upper_32_bits(addr);
-
- *cs++ = MI_BATCH_BUFFER_END;
-
- i915_gem_object_flush_map(batch->obj);
- i915_gem_object_unpin_map(batch->obj);
- i915_gem_chipset_flush(ctx->i915);
-
- rq = i915_request_alloc(engine, ctx);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto out_batch;
- }
-
- if (engine->emit_init_breadcrumb) { /* Be nice if we hang */
- err = engine->emit_init_breadcrumb(rq);
- if (err)
- goto err_request;
- }
-
- err = engine->emit_bb_start(rq,
- batch->node.start, PAGE_SIZE,
- 0);
- if (err)
- goto err_request;
-
-err_request:
- i915_request_add(rq);
- if (err)
- goto out_batch;
-
- if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {
- pr_err("%s: Futzing %x timedout; cancelling test\n",
- engine->name, reg);
- i915_gem_set_wedged(ctx->i915);
- err = -EIO;
- goto out_batch;
- }
-
- results = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
- if (IS_ERR(results)) {
- err = PTR_ERR(results);
- goto out_batch;
- }
-
- GEM_BUG_ON(values[ARRAY_SIZE(values) - 1] != 0xffffffff);
- rsvd = results[ARRAY_SIZE(values)]; /* detect write masking */
- if (!rsvd) {
- pr_err("%s: Unable to write to whitelisted register %x\n",
- engine->name, reg);
- err = -EINVAL;
- goto out_unpin;
- }
-
- expect = results[0];
- idx = 1;
- for (v = 0; v < ARRAY_SIZE(values); v++) {
- expect = reg_write(expect, values[v], rsvd);
- if (results[idx] != expect)
- err++;
- idx++;
- }
- for (v = 0; v < ARRAY_SIZE(values); v++) {
- expect = reg_write(expect, ~values[v], rsvd);
- if (results[idx] != expect)
- err++;
- idx++;
- }
- if (err) {
- pr_err("%s: %d mismatch between values written to whitelisted register [%x], and values read back!\n",
- engine->name, err, reg);
-
- pr_info("%s: Whitelisted register: %x, original value %08x, rsvd %08x\n",
- engine->name, reg, results[0], rsvd);
-
- expect = results[0];
- idx = 1;
- for (v = 0; v < ARRAY_SIZE(values); v++) {
- u32 w = values[v];
-
- expect = reg_write(expect, w, rsvd);
- pr_info("Wrote %08x, read %08x, expect %08x\n",
- w, results[idx], expect);
- idx++;
- }
- for (v = 0; v < ARRAY_SIZE(values); v++) {
- u32 w = ~values[v];
-
- expect = reg_write(expect, w, rsvd);
- pr_info("Wrote %08x, read %08x, expect %08x\n",
- w, results[idx], expect);
- idx++;
- }
-
- err = -EINVAL;
- }
-out_unpin:
- i915_gem_object_unpin_map(scratch->obj);
- if (err)
- break;
- }
-
- if (igt_flush_test(ctx->i915, I915_WAIT_LOCKED))
- err = -EIO;
-out_batch:
- i915_vma_unpin_and_release(&batch, 0);
-out_scratch:
- i915_vma_unpin_and_release(&scratch, 0);
- return err;
-}
-
-static int live_dirty_whitelist(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct intel_engine_cs *engine;
- struct i915_gem_context *ctx;
- enum intel_engine_id id;
- intel_wakeref_t wakeref;
- struct drm_file *file;
- int err = 0;
-
- /* Can the user write to the whitelisted registers? */
-
- if (INTEL_GEN(i915) < 7) /* minimum requirement for LRI, SRM, LRM */
- return 0;
-
- wakeref = intel_runtime_pm_get(i915);
-
- mutex_unlock(&i915->drm.struct_mutex);
- file = mock_file(i915);
- mutex_lock(&i915->drm.struct_mutex);
- if (IS_ERR(file)) {
- err = PTR_ERR(file);
- goto out_rpm;
- }
-
- ctx = live_context(i915, file);
- if (IS_ERR(ctx)) {
- err = PTR_ERR(ctx);
- goto out_file;
- }
-
- for_each_engine(engine, i915, id) {
- if (engine->whitelist.count == 0)
- continue;
-
- err = check_dirty_whitelist(ctx, engine);
- if (err)
- goto out_file;
- }
-
-out_file:
- mutex_unlock(&i915->drm.struct_mutex);
- mock_file_free(i915, file);
- mutex_lock(&i915->drm.struct_mutex);
-out_rpm:
- intel_runtime_pm_put(i915, wakeref);
- return err;
-}
-
-static int live_reset_whitelist(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct intel_engine_cs *engine = i915->engine[RCS0];
- int err = 0;
-
- /* If we reset the gpu, we should not lose the RING_NONPRIV */
-
- if (!engine || engine->whitelist.count == 0)
- return 0;
-
- igt_global_reset_lock(i915);
-
- if (intel_has_reset_engine(i915)) {
- err = check_whitelist_across_reset(engine,
- do_engine_reset,
- "engine");
- if (err)
- goto out;
- }
-
- if (intel_has_gpu_reset(i915)) {
- err = check_whitelist_across_reset(engine,
- do_device_reset,
- "device");
- if (err)
- goto out;
- }
-
-out:
- igt_global_reset_unlock(i915);
- return err;
-}
-
-static int read_whitelisted_registers(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine,
- struct i915_vma *results)
-{
- intel_wakeref_t wakeref;
- struct i915_request *rq;
- int i, err = 0;
- u32 srm, *cs;
-
- rq = ERR_PTR(-ENODEV);
- with_intel_runtime_pm(engine->i915, wakeref)
- rq = i915_request_alloc(engine, ctx);
- if (IS_ERR(rq))
- return PTR_ERR(rq);
-
- srm = MI_STORE_REGISTER_MEM;
- if (INTEL_GEN(ctx->i915) >= 8)
- srm++;
-
- cs = intel_ring_begin(rq, 4 * engine->whitelist.count);
- if (IS_ERR(cs)) {
- err = PTR_ERR(cs);
- goto err_req;
- }
-
- for (i = 0; i < engine->whitelist.count; i++) {
- u64 offset = results->node.start + sizeof(u32) * i;
-
- *cs++ = srm;
- *cs++ = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
- *cs++ = lower_32_bits(offset);
- *cs++ = upper_32_bits(offset);
- }
- intel_ring_advance(rq, cs);
-
-err_req:
- i915_request_add(rq);
-
- if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0)
- err = -EIO;
-
- return err;
-}
-
-static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
-{
- intel_wakeref_t wakeref;
- struct i915_request *rq;
- struct i915_vma *batch;
- int i, err = 0;
- u32 *cs;
-
- batch = create_batch(ctx);
- if (IS_ERR(batch))
- return PTR_ERR(batch);
-
- cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
- if (IS_ERR(cs)) {
- err = PTR_ERR(cs);
- goto err_batch;
- }
-
- *cs++ = MI_LOAD_REGISTER_IMM(engine->whitelist.count);
- for (i = 0; i < engine->whitelist.count; i++) {
- *cs++ = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
- *cs++ = 0xffffffff;
- }
- *cs++ = MI_BATCH_BUFFER_END;
-
- i915_gem_object_flush_map(batch->obj);
- i915_gem_chipset_flush(ctx->i915);
-
- rq = ERR_PTR(-ENODEV);
- with_intel_runtime_pm(engine->i915, wakeref)
- rq = i915_request_alloc(engine, ctx);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto err_unpin;
- }
-
- if (engine->emit_init_breadcrumb) { /* Be nice if we hang */
- err = engine->emit_init_breadcrumb(rq);
- if (err)
- goto err_request;
- }
-
- /* Perform the writes from an unprivileged "user" batch */
- err = engine->emit_bb_start(rq, batch->node.start, 0, 0);
-
-err_request:
- i915_request_add(rq);
- if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0)
- err = -EIO;
-
-err_unpin:
- i915_gem_object_unpin_map(batch->obj);
-err_batch:
- i915_vma_unpin_and_release(&batch, 0);
- return err;
-}
-
-struct regmask {
- i915_reg_t reg;
- unsigned long gen_mask;
-};
-
-static bool find_reg(struct drm_i915_private *i915,
- i915_reg_t reg,
- const struct regmask *tbl,
- unsigned long count)
-{
- u32 offset = i915_mmio_reg_offset(reg);
-
- while (count--) {
- if (INTEL_INFO(i915)->gen_mask & tbl->gen_mask &&
- i915_mmio_reg_offset(tbl->reg) == offset)
- return true;
- tbl++;
- }
-
- return false;
-}
-
-static bool pardon_reg(struct drm_i915_private *i915, i915_reg_t reg)
-{
- /* Alas, we must pardon some whitelists. Mistakes already made */
- static const struct regmask pardon[] = {
- { GEN9_CTX_PREEMPT_REG, INTEL_GEN_MASK(9, 9) },
- { GEN8_L3SQCREG4, INTEL_GEN_MASK(9, 9) },
- };
-
- return find_reg(i915, reg, pardon, ARRAY_SIZE(pardon));
-}
-
-static bool result_eq(struct intel_engine_cs *engine,
- u32 a, u32 b, i915_reg_t reg)
-{
- if (a != b && !pardon_reg(engine->i915, reg)) {
- pr_err("Whitelisted register 0x%4x not context saved: A=%08x, B=%08x\n",
- i915_mmio_reg_offset(reg), a, b);
- return false;
- }
-
- return true;
-}
-
-static bool writeonly_reg(struct drm_i915_private *i915, i915_reg_t reg)
-{
- /* Some registers do not seem to behave and our writes unreadable */
- static const struct regmask wo[] = {
- { GEN9_SLICE_COMMON_ECO_CHICKEN1, INTEL_GEN_MASK(9, 9) },
- };
-
- return find_reg(i915, reg, wo, ARRAY_SIZE(wo));
-}
-
-static bool result_neq(struct intel_engine_cs *engine,
- u32 a, u32 b, i915_reg_t reg)
-{
- if (a == b && !writeonly_reg(engine->i915, reg)) {
- pr_err("Whitelist register 0x%4x:%08x was unwritable\n",
- i915_mmio_reg_offset(reg), a);
- return false;
- }
-
- return true;
-}
-
-static int
-check_whitelisted_registers(struct intel_engine_cs *engine,
- struct i915_vma *A,
- struct i915_vma *B,
- bool (*fn)(struct intel_engine_cs *engine,
- u32 a, u32 b,
- i915_reg_t reg))
-{
- u32 *a, *b;
- int i, err;
-
- a = i915_gem_object_pin_map(A->obj, I915_MAP_WB);
- if (IS_ERR(a))
- return PTR_ERR(a);
-
- b = i915_gem_object_pin_map(B->obj, I915_MAP_WB);
- if (IS_ERR(b)) {
- err = PTR_ERR(b);
- goto err_a;
- }
-
- err = 0;
- for (i = 0; i < engine->whitelist.count; i++) {
- if (!fn(engine, a[i], b[i], engine->whitelist.list[i].reg))
- err = -EINVAL;
- }
-
- i915_gem_object_unpin_map(B->obj);
-err_a:
- i915_gem_object_unpin_map(A->obj);
- return err;
-}
-
-static int live_isolated_whitelist(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct {
- struct i915_gem_context *ctx;
- struct i915_vma *scratch[2];
- } client[2] = {};
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int i, err = 0;
-
- /*
- * Check that a write into a whitelist register works, but
- * invisible to a second context.
- */
-
- if (!intel_engines_has_context_isolation(i915))
- return 0;
-
- if (!i915->kernel_context->ppgtt)
- return 0;
-
- for (i = 0; i < ARRAY_SIZE(client); i++) {
- struct i915_gem_context *c;
-
- c = kernel_context(i915);
- if (IS_ERR(c)) {
- err = PTR_ERR(c);
- goto err;
- }
-
- client[i].scratch[0] = create_scratch(&c->ppgtt->vm, 1024);
- if (IS_ERR(client[i].scratch[0])) {
- err = PTR_ERR(client[i].scratch[0]);
- kernel_context_close(c);
- goto err;
- }
-
- client[i].scratch[1] = create_scratch(&c->ppgtt->vm, 1024);
- if (IS_ERR(client[i].scratch[1])) {
- err = PTR_ERR(client[i].scratch[1]);
- i915_vma_unpin_and_release(&client[i].scratch[0], 0);
- kernel_context_close(c);
- goto err;
- }
-
- client[i].ctx = c;
- }
-
- for_each_engine(engine, i915, id) {
- if (!engine->whitelist.count)
- continue;
-
- /* Read default values */
- err = read_whitelisted_registers(client[0].ctx, engine,
- client[0].scratch[0]);
- if (err)
- goto err;
-
- /* Try to overwrite registers (should only affect ctx0) */
- err = scrub_whitelisted_registers(client[0].ctx, engine);
- if (err)
- goto err;
-
- /* Read values from ctx1, we expect these to be defaults */
- err = read_whitelisted_registers(client[1].ctx, engine,
- client[1].scratch[0]);
- if (err)
- goto err;
-
- /* Verify that both reads return the same default values */
- err = check_whitelisted_registers(engine,
- client[0].scratch[0],
- client[1].scratch[0],
- result_eq);
- if (err)
- goto err;
-
- /* Read back the updated values in ctx0 */
- err = read_whitelisted_registers(client[0].ctx, engine,
- client[0].scratch[1]);
- if (err)
- goto err;
-
- /* User should be granted privilege to overwhite regs */
- err = check_whitelisted_registers(engine,
- client[0].scratch[0],
- client[0].scratch[1],
- result_neq);
- if (err)
- goto err;
- }
-
-err:
- for (i = 0; i < ARRAY_SIZE(client); i++) {
- if (!client[i].ctx)
- break;
-
- i915_vma_unpin_and_release(&client[i].scratch[1], 0);
- i915_vma_unpin_and_release(&client[i].scratch[0], 0);
- kernel_context_close(client[i].ctx);
- }
-
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
- err = -EIO;
-
- return err;
-}
-
-static bool verify_gt_engine_wa(struct drm_i915_private *i915,
- struct wa_lists *lists, const char *str)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- bool ok = true;
-
- ok &= wa_list_verify(&i915->uncore, &lists->gt_wa_list, str);
-
- for_each_engine(engine, i915, id) {
- ok &= engine_wa_list_verify(engine,
- &lists->engine[id].wa_list,
- str) == 0;
- }
-
- return ok;
-}
-
-static int
-live_gpu_reset_gt_engine_workarounds(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- intel_wakeref_t wakeref;
- struct wa_lists lists;
- bool ok;
-
- if (!intel_has_gpu_reset(i915))
- return 0;
-
- pr_info("Verifying after GPU reset...\n");
-
- igt_global_reset_lock(i915);
- wakeref = intel_runtime_pm_get(i915);
-
- reference_lists_init(i915, &lists);
-
- ok = verify_gt_engine_wa(i915, &lists, "before reset");
- if (!ok)
- goto out;
-
- i915_reset(i915, ALL_ENGINES, "live_workarounds");
-
- ok = verify_gt_engine_wa(i915, &lists, "after reset");
-
-out:
- reference_lists_fini(i915, &lists);
- intel_runtime_pm_put(i915, wakeref);
- igt_global_reset_unlock(i915);
-
- return ok ? 0 : -ESRCH;
-}
-
-static int
-live_engine_reset_gt_engine_workarounds(void *arg)
-{
- struct drm_i915_private *i915 = arg;
- struct intel_engine_cs *engine;
- struct i915_gem_context *ctx;
- struct igt_spinner spin;
- enum intel_engine_id id;
- struct i915_request *rq;
- intel_wakeref_t wakeref;
- struct wa_lists lists;
- int ret = 0;
-
- if (!intel_has_reset_engine(i915))
- return 0;
-
- ctx = kernel_context(i915);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- igt_global_reset_lock(i915);
- wakeref = intel_runtime_pm_get(i915);
-
- reference_lists_init(i915, &lists);
-
- for_each_engine(engine, i915, id) {
- bool ok;
-
- pr_info("Verifying after %s reset...\n", engine->name);
-
- ok = verify_gt_engine_wa(i915, &lists, "before reset");
- if (!ok) {
- ret = -ESRCH;
- goto err;
- }
-
- i915_reset_engine(engine, "live_workarounds");
-
- ok = verify_gt_engine_wa(i915, &lists, "after idle reset");
- if (!ok) {
- ret = -ESRCH;
- goto err;
- }
-
- ret = igt_spinner_init(&spin, i915);
- if (ret)
- goto err;
-
- rq = igt_spinner_create_request(&spin, ctx, engine, MI_NOOP);
- if (IS_ERR(rq)) {
- ret = PTR_ERR(rq);
- igt_spinner_fini(&spin);
- goto err;
- }
-
- i915_request_add(rq);
-
- if (!igt_wait_for_spinner(&spin, rq)) {
- pr_err("Spinner failed to start\n");
- igt_spinner_fini(&spin);
- ret = -ETIMEDOUT;
- goto err;
- }
-
- i915_reset_engine(engine, "live_workarounds");
-
- igt_spinner_end(&spin);
- igt_spinner_fini(&spin);
-
- ok = verify_gt_engine_wa(i915, &lists, "after busy reset");
- if (!ok) {
- ret = -ESRCH;
- goto err;
- }
- }
-
-err:
- reference_lists_fini(i915, &lists);
- intel_runtime_pm_put(i915, wakeref);
- igt_global_reset_unlock(i915);
- kernel_context_close(ctx);
-
- igt_flush_test(i915, I915_WAIT_LOCKED);
-
- return ret;
-}
-
-int intel_workarounds_live_selftests(struct drm_i915_private *i915)
-{
- static const struct i915_subtest tests[] = {
- SUBTEST(live_dirty_whitelist),
- SUBTEST(live_reset_whitelist),
- SUBTEST(live_isolated_whitelist),
- SUBTEST(live_gpu_reset_gt_engine_workarounds),
- SUBTEST(live_engine_reset_gt_engine_workarounds),
- };
- int err;
-
- if (i915_terminally_wedged(i915))
- return 0;
-
- mutex_lock(&i915->drm.struct_mutex);
- err = i915_subtests(tests, i915);
- mutex_unlock(&i915->drm.struct_mutex);
-
- return err;
-}
+++ /dev/null
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#include "mock_engine.h"
-#include "mock_request.h"
-
-struct mock_ring {
- struct intel_ring base;
- struct i915_timeline timeline;
-};
-
-static void mock_timeline_pin(struct i915_timeline *tl)
-{
- tl->pin_count++;
-}
-
-static void mock_timeline_unpin(struct i915_timeline *tl)
-{
- GEM_BUG_ON(!tl->pin_count);
- tl->pin_count--;
-}
-
-static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
-{
- const unsigned long sz = PAGE_SIZE / 2;
- struct mock_ring *ring;
-
- ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL);
- if (!ring)
- return NULL;
-
- if (i915_timeline_init(engine->i915, &ring->timeline, NULL)) {
- kfree(ring);
- return NULL;
- }
-
- kref_init(&ring->base.ref);
- ring->base.size = sz;
- ring->base.effective_size = sz;
- ring->base.vaddr = (void *)(ring + 1);
- ring->base.timeline = &ring->timeline;
-
- INIT_LIST_HEAD(&ring->base.request_list);
- intel_ring_update_space(&ring->base);
-
- return &ring->base;
-}
-
-static void mock_ring_free(struct intel_ring *base)
-{
- struct mock_ring *ring = container_of(base, typeof(*ring), base);
-
- i915_timeline_fini(&ring->timeline);
- kfree(ring);
-}
-
-static struct i915_request *first_request(struct mock_engine *engine)
-{
- return list_first_entry_or_null(&engine->hw_queue,
- struct i915_request,
- mock.link);
-}
-
-static void advance(struct i915_request *request)
-{
- list_del_init(&request->mock.link);
- i915_request_mark_complete(request);
- GEM_BUG_ON(!i915_request_completed(request));
-
- intel_engine_queue_breadcrumbs(request->engine);
-}
-
-static void hw_delay_complete(struct timer_list *t)
-{
- struct mock_engine *engine = from_timer(engine, t, hw_delay);
- struct i915_request *request;
- unsigned long flags;
-
- spin_lock_irqsave(&engine->hw_lock, flags);
-
- /* Timer fired, first request is complete */
- request = first_request(engine);
- if (request)
- advance(request);
-
- /*
- * Also immediately signal any subsequent 0-delay requests, but
- * requeue the timer for the next delayed request.
- */
- while ((request = first_request(engine))) {
- if (request->mock.delay) {
- mod_timer(&engine->hw_delay,
- jiffies + request->mock.delay);
- break;
- }
-
- advance(request);
- }
-
- spin_unlock_irqrestore(&engine->hw_lock, flags);
-}
-
-static void mock_context_unpin(struct intel_context *ce)
-{
- mock_timeline_unpin(ce->ring->timeline);
-}
-
-static void mock_context_destroy(struct kref *ref)
-{
- struct intel_context *ce = container_of(ref, typeof(*ce), ref);
-
- GEM_BUG_ON(intel_context_is_pinned(ce));
-
- if (ce->ring)
- mock_ring_free(ce->ring);
-
- intel_context_free(ce);
-}
-
-static int mock_context_pin(struct intel_context *ce)
-{
- if (!ce->ring) {
- ce->ring = mock_ring(ce->engine);
- if (!ce->ring)
- return -ENOMEM;
- }
-
- mock_timeline_pin(ce->ring->timeline);
- return 0;
-}
-
-static const struct intel_context_ops mock_context_ops = {
- .pin = mock_context_pin,
- .unpin = mock_context_unpin,
-
- .destroy = mock_context_destroy,
-};
-
-static int mock_request_alloc(struct i915_request *request)
-{
- INIT_LIST_HEAD(&request->mock.link);
- request->mock.delay = 0;
-
- return 0;
-}
-
-static int mock_emit_flush(struct i915_request *request,
- unsigned int flags)
-{
- return 0;
-}
-
-static u32 *mock_emit_breadcrumb(struct i915_request *request, u32 *cs)
-{
- return cs;
-}
-
-static void mock_submit_request(struct i915_request *request)
-{
- struct mock_engine *engine =
- container_of(request->engine, typeof(*engine), base);
- unsigned long flags;
-
- i915_request_submit(request);
-
- spin_lock_irqsave(&engine->hw_lock, flags);
- list_add_tail(&request->mock.link, &engine->hw_queue);
- if (list_is_first(&request->mock.link, &engine->hw_queue)) {
- if (request->mock.delay)
- mod_timer(&engine->hw_delay,
- jiffies + request->mock.delay);
- else
- advance(request);
- }
- spin_unlock_irqrestore(&engine->hw_lock, flags);
-}
-
-static void mock_reset_prepare(struct intel_engine_cs *engine)
-{
-}
-
-static void mock_reset(struct intel_engine_cs *engine, bool stalled)
-{
- GEM_BUG_ON(stalled);
-}
-
-static void mock_reset_finish(struct intel_engine_cs *engine)
-{
-}
-
-static void mock_cancel_requests(struct intel_engine_cs *engine)
-{
- struct i915_request *request;
- unsigned long flags;
-
- spin_lock_irqsave(&engine->timeline.lock, flags);
-
- /* Mark all submitted requests as skipped. */
- list_for_each_entry(request, &engine->timeline.requests, sched.link) {
- if (!i915_request_signaled(request))
- dma_fence_set_error(&request->fence, -EIO);
-
- i915_request_mark_complete(request);
- }
-
- spin_unlock_irqrestore(&engine->timeline.lock, flags);
-}
-
-struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
- const char *name,
- int id)
-{
- struct mock_engine *engine;
-
- GEM_BUG_ON(id >= I915_NUM_ENGINES);
-
- engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL);
- if (!engine)
- return NULL;
-
- /* minimal engine setup for requests */
- engine->base.i915 = i915;
- snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
- engine->base.id = id;
- engine->base.mask = BIT(id);
- engine->base.status_page.addr = (void *)(engine + 1);
-
- engine->base.cops = &mock_context_ops;
- engine->base.request_alloc = mock_request_alloc;
- engine->base.emit_flush = mock_emit_flush;
- engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb;
- engine->base.submit_request = mock_submit_request;
-
- engine->base.reset.prepare = mock_reset_prepare;
- engine->base.reset.reset = mock_reset;
- engine->base.reset.finish = mock_reset_finish;
- engine->base.cancel_requests = mock_cancel_requests;
-
- if (i915_timeline_init(i915, &engine->base.timeline, NULL))
- goto err_free;
- i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE);
-
- intel_engine_init_breadcrumbs(&engine->base);
-
- /* fake hw queue */
- spin_lock_init(&engine->hw_lock);
- timer_setup(&engine->hw_delay, hw_delay_complete, 0);
- INIT_LIST_HEAD(&engine->hw_queue);
-
- if (pin_context(i915->kernel_context, &engine->base,
- &engine->base.kernel_context))
- goto err_breadcrumbs;
-
- return &engine->base;
-
-err_breadcrumbs:
- intel_engine_fini_breadcrumbs(&engine->base);
- i915_timeline_fini(&engine->base.timeline);
-err_free:
- kfree(engine);
- return NULL;
-}
-
-void mock_engine_flush(struct intel_engine_cs *engine)
-{
- struct mock_engine *mock =
- container_of(engine, typeof(*mock), base);
- struct i915_request *request, *rn;
-
- del_timer_sync(&mock->hw_delay);
-
- spin_lock_irq(&mock->hw_lock);
- list_for_each_entry_safe(request, rn, &mock->hw_queue, mock.link)
- advance(request);
- spin_unlock_irq(&mock->hw_lock);
-}
-
-void mock_engine_reset(struct intel_engine_cs *engine)
-{
-}
-
-void mock_engine_free(struct intel_engine_cs *engine)
-{
- struct mock_engine *mock =
- container_of(engine, typeof(*mock), base);
- struct intel_context *ce;
-
- GEM_BUG_ON(timer_pending(&mock->hw_delay));
-
- ce = fetch_and_zero(&engine->last_retired_context);
- if (ce)
- intel_context_unpin(ce);
-
- intel_context_unpin(engine->kernel_context);
-
- intel_engine_fini_breadcrumbs(engine);
- i915_timeline_fini(&engine->timeline);
-
- kfree(engine);
-}
+++ /dev/null
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef __MOCK_ENGINE_H__
-#define __MOCK_ENGINE_H__
-
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/timer.h>
-
-#include "../intel_ringbuffer.h"
-
-struct mock_engine {
- struct intel_engine_cs base;
-
- spinlock_t hw_lock;
- struct list_head hw_queue;
- struct timer_list hw_delay;
-};
-
-struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
- const char *name,
- int id);
-void mock_engine_flush(struct intel_engine_cs *engine);
-void mock_engine_reset(struct intel_engine_cs *engine);
-void mock_engine_free(struct intel_engine_cs *engine);
-
-#endif /* !__MOCK_ENGINE_H__ */
#include <linux/pm_domain.h>
#include <linux/pm_runtime.h>
-#include "mock_engine.h"
+#include "gt/mock_engine.h"
+
#include "mock_context.h"
#include "mock_request.h"
#include "mock_gem_device.h"
*
*/
-#include "mock_engine.h"
+#include "gt/mock_engine.h"
+
#include "mock_request.h"
struct i915_request *