]> git.baikalelectronics.ru Git - kernel.git/commitdiff
drm/i915/perf: introduce global sseu pinning
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Tue, 17 Mar 2020 13:22:22 +0000 (15:22 +0200)
committerLionel Landwerlin <lionel.g.landwerlin@intel.com>
Tue, 17 Mar 2020 13:27:55 +0000 (15:27 +0200)
On Gen11 powergating half the execution units is a functional
requirement when using the VME samplers. Not fullfilling this
requirement can lead to hangs.

This unfortunately plays fairly poorly with the NOA requirements. NOA
requires a stable power configuration to maintain its configuration.

As a result using OA (and NOA feeding into it) so far has required us
to use a power configuration that can work for all contexts. The only
power configuration fullfilling this is powergating half the execution
units.

This makes performance analysis for 3D workloads somewhat pointless.

Failing to find a solution that would work for everybody, this change
introduces a new i915-perf stream open parameter that punts the
decision off to userspace. If this parameter is omitted, the existing
Gen11 behavior remains (half EU array powergating).

This change takes the initiative to move all perf related sseu
configuration into i915_perf.c

v2: Make parameter priviliged if different from default

v3: Fix context modifying its sseu config while i915-perf is enabled

v4: Always consider global sseu a privileged operation (Tvrtko)
    Override req_sseu point in intel_sseu_make_rpcs() (Tvrtko)
    Remove unrelated changes (Tvrtko)

v5: Some typos (Tvrtko)
    Process sseu param in read_properties_unlocked() (Tvrtko)

v6: Actually commit the bits from v5...
    Fixup some checkpath warnings

v7: Only compare engine uabi field (Chris)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200317132222.2638719-3-lionel.g.landwerlin@intel.com
drivers/gpu/drm/i915/gem/i915_gem_context.c
drivers/gpu/drm/i915/gem/i915_gem_context.h
drivers/gpu/drm/i915/gt/intel_sseu.c
drivers/gpu/drm/i915/i915_perf.c
drivers/gpu/drm/i915/i915_perf_types.h
include/uapi/drm/i915_drm.h

index 026999b34abdc93c64fccd5be89acb686c05fa53..c0e476fcd1fafbd39195e1ee2a6c96053f66f9eb 100644 (file)
@@ -1401,10 +1401,10 @@ static int get_ringsize(struct i915_gem_context *ctx,
        return 0;
 }
 
-static int
-user_to_context_sseu(struct drm_i915_private *i915,
-                    const struct drm_i915_gem_context_param_sseu *user,
-                    struct intel_sseu *context)
+int
+i915_gem_user_to_context_sseu(struct drm_i915_private *i915,
+                             const struct drm_i915_gem_context_param_sseu *user,
+                             struct intel_sseu *context)
 {
        const struct sseu_dev_info *device = &RUNTIME_INFO(i915)->sseu;
 
@@ -1539,7 +1539,7 @@ static int set_sseu(struct i915_gem_context *ctx,
                goto out_ce;
        }
 
-       ret = user_to_context_sseu(i915, &user_sseu, &sseu);
+       ret = i915_gem_user_to_context_sseu(i915, &user_sseu, &sseu);
        if (ret)
                goto out_ce;
 
index f1d884d304bd6fa8cf53d31d60c084d70d4ffabe..3702b2fb27abd73103b078f800aeb4e8794fd2ae 100644 (file)
@@ -225,4 +225,8 @@ i915_gem_engines_iter_next(struct i915_gem_engines_iter *it);
 struct i915_lut_handle *i915_lut_handle_alloc(void);
 void i915_lut_handle_free(struct i915_lut_handle *lut);
 
+int i915_gem_user_to_context_sseu(struct drm_i915_private *i915,
+                                 const struct drm_i915_gem_context_param_sseu *user,
+                                 struct intel_sseu *context);
+
 #endif /* !__I915_GEM_CONTEXT_H__ */
index 74f793423231bcbfb8c7e578b229bfa4be55395a..d173271c73976aca878686c7dffbabe331c79c43 100644 (file)
@@ -65,7 +65,6 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
 {
        const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
        bool subslice_pg = sseu->has_subslice_pg;
-       struct intel_sseu ctx_sseu;
        u8 slices, subslices;
        u32 rpcs = 0;
 
@@ -78,31 +77,13 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
 
        /*
         * If i915/perf is active, we want a stable powergating configuration
-        * on the system.
-        *
-        * We could choose full enablement, but on ICL we know there are use
-        * cases which disable slices for functional, apart for performance
-        * reasons. So in this case we select a known stable subset.
+        * on the system. Use the configuration pinned by i915/perf.
         */
-       if (!i915->perf.exclusive_stream) {
-               ctx_sseu = *req_sseu;
-       } else {
-               ctx_sseu = intel_sseu_from_device_info(sseu);
-
-               if (IS_GEN(i915, 11)) {
-                       /*
-                        * We only need subslice count so it doesn't matter
-                        * which ones we select - just turn off low bits in the
-                        * amount of half of all available subslices per slice.
-                        */
-                       ctx_sseu.subslice_mask =
-                               ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
-                       ctx_sseu.slice_mask = 0x1;
-               }
-       }
+       if (i915->perf.exclusive_stream)
+               req_sseu = &i915->perf.sseu;
 
-       slices = hweight8(ctx_sseu.slice_mask);
-       subslices = hweight8(ctx_sseu.subslice_mask);
+       slices = hweight8(req_sseu->slice_mask);
+       subslices = hweight8(req_sseu->subslice_mask);
 
        /*
         * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
@@ -175,13 +156,13 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
        if (sseu->has_eu_pg) {
                u32 val;
 
-               val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
+               val = req_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
                GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
                val &= GEN8_RPCS_EU_MIN_MASK;
 
                rpcs |= val;
 
-               val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
+               val = req_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
                GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
                val &= GEN8_RPCS_EU_MAX_MASK;
 
index 86c6abaa3e0e7385f35c04a6dc81c4a577252c79..4611f72cd036cfaf175ecad1f9c5680a6748a341 100644 (file)
@@ -344,6 +344,10 @@ static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = {
  * @oa_periodic: Whether to enable periodic OA unit sampling
  * @oa_period_exponent: The OA unit sampling period is derived from this
  * @engine: The engine (typically rcs0) being monitored by the OA unit
+ * @has_sseu: Whether @sseu was specified by userspace
+ * @sseu: internal SSEU configuration computed either from the userspace
+ *        specified configuration in the opening parameters or a default value
+ *        (see get_default_sseu_config())
  *
  * As read_properties_unlocked() enumerates and validates the properties given
  * to open a stream of metrics the configuration is built up in the structure
@@ -363,6 +367,9 @@ struct perf_open_properties {
        int oa_period_exponent;
 
        struct intel_engine_cs *engine;
+
+       bool has_sseu;
+       struct intel_sseu sseu;
 };
 
 struct i915_oa_config_bo {
@@ -2720,6 +2727,39 @@ static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream)
        return 0;
 }
 
+static void
+get_default_sseu_config(struct intel_sseu *out_sseu,
+                       struct intel_engine_cs *engine)
+{
+       const struct sseu_dev_info *devinfo_sseu =
+               &RUNTIME_INFO(engine->i915)->sseu;
+
+       *out_sseu = intel_sseu_from_device_info(devinfo_sseu);
+
+       if (IS_GEN(engine->i915, 11)) {
+               /*
+                * We only need subslice count so it doesn't matter which ones
+                * we select - just turn off low bits in the amount of half of
+                * all available subslices per slice.
+                */
+               out_sseu->subslice_mask =
+                       ~(~0 << (hweight8(out_sseu->subslice_mask) / 2));
+               out_sseu->slice_mask = 0x1;
+       }
+}
+
+static int
+get_sseu_config(struct intel_sseu *out_sseu,
+               struct intel_engine_cs *engine,
+               const struct drm_i915_gem_context_param_sseu *drm_sseu)
+{
+       if (drm_sseu->engine.engine_class != engine->uabi_class ||
+           drm_sseu->engine.engine_instance != engine->uabi_instance)
+               return -EINVAL;
+
+       return i915_gem_user_to_context_sseu(engine->i915, drm_sseu, out_sseu);
+}
+
 /**
  * i915_oa_stream_init - validate combined props for OA stream and init
  * @stream: An i915 perf stream
@@ -2852,6 +2892,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
                goto err_oa_buf_alloc;
 
        stream->ops = &i915_oa_stream_ops;
+
+       perf->sseu = props->sseu;
        WRITE_ONCE(perf->exclusive_stream, stream);
 
        ret = i915_perf_stream_enable_sync(stream);
@@ -3397,6 +3439,14 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
                privileged_op = true;
        }
 
+       /*
+        * Asking for SSEU configuration is a priviliged operation.
+        */
+       if (props->has_sseu)
+               privileged_op = true;
+       else
+               get_default_sseu_config(&props->sseu, props->engine);
+
        /* Similar to perf's kernel.perf_paranoid_cpu sysctl option
         * we check a dev.i915.perf_stream_paranoid sysctl option
         * to determine if it's ok to access system wide OA counters
@@ -3492,6 +3542,7 @@ static int read_properties_unlocked(struct i915_perf *perf,
 {
        u64 __user *uprop = uprops;
        u32 i;
+       int ret;
 
        memset(props, 0, sizeof(struct perf_open_properties));
 
@@ -3523,7 +3574,6 @@ static int read_properties_unlocked(struct i915_perf *perf,
        for (i = 0; i < n_props; i++) {
                u64 oa_period, oa_freq_hz;
                u64 id, value;
-               int ret;
 
                ret = get_user(id, uprop);
                if (ret)
@@ -3609,6 +3659,24 @@ static int read_properties_unlocked(struct i915_perf *perf,
                case DRM_I915_PERF_PROP_HOLD_PREEMPTION:
                        props->hold_preemption = !!value;
                        break;
+               case DRM_I915_PERF_PROP_GLOBAL_SSEU: {
+                       struct drm_i915_gem_context_param_sseu user_sseu;
+
+                       if (copy_from_user(&user_sseu,
+                                          u64_to_user_ptr(value),
+                                          sizeof(user_sseu))) {
+                               DRM_DEBUG("Unable to copy global sseu parameter\n");
+                               return -EFAULT;
+                       }
+
+                       ret = get_sseu_config(&props->sseu, props->engine, &user_sseu);
+                       if (ret) {
+                               DRM_DEBUG("Invalid SSEU configuration\n");
+                               return ret;
+                       }
+                       props->has_sseu = true;
+                       break;
+               }
                case DRM_I915_PERF_PROP_MAX:
                        MISSING_CASE(id);
                        return -EINVAL;
@@ -4382,8 +4450,12 @@ int i915_perf_ioctl_version(void)
         *    preemption on a particular context so that performance data is
         *    accessible from a delta of MI_RPC reports without looking at the
         *    OA buffer.
+        *
+        * 4: Add DRM_I915_PERF_PROP_ALLOWED_SSEU to limit what contexts can
+        *    be run for the duration of the performance recording based on
+        *    their SSEU configuration.
         */
-       return 3;
+       return 4;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
index f4ccd2adfee6ba136f0e1098afe002e62d36b0f9..32289cbda64858ac0239135c2cb3a4800c36c305 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/uuid.h>
 #include <linux/wait.h>
 
+#include "gt/intel_sseu.h"
 #include "i915_reg.h"
 #include "intel_wakeref.h"
 
@@ -407,6 +408,12 @@ struct i915_perf {
         */
        struct i915_perf_stream *exclusive_stream;
 
+       /**
+        * @sseu: sseu configuration selected to run while perf is active,
+        * applies to all contexts.
+        */
+       struct intel_sseu sseu;
+
        /**
         * For rate limiting any notifications of spurious
         * invalid OA reports
index 2813e579b480cc5e6c10e85663b64ac7dafe9102..db649d03ab52f346ecbb2b78c81d6526abb86a46 100644 (file)
@@ -1969,6 +1969,17 @@ enum drm_i915_perf_property_id {
         */
        DRM_I915_PERF_PROP_HOLD_PREEMPTION,
 
+       /**
+        * Specifying this pins all contexts to the specified SSEU power
+        * configuration for the duration of the recording.
+        *
+        * This parameter's value is a pointer to a struct
+        * drm_i915_gem_context_param_sseu.
+        *
+        * This property is available in perf revision 4.
+        */
+       DRM_I915_PERF_PROP_GLOBAL_SSEU,
+
        DRM_I915_PERF_PROP_MAX /* non-ABI */
 };