v3d_gem.o \
v3d_irq.o \
v3d_mmu.o \
+ v3d_perfmon.o \
v3d_trace_points.o \
v3d_sched.o
case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:
args->value = 1;
return 0;
+ case DRM_V3D_PARAM_SUPPORTS_PERFMON:
+ args->value = (v3d->ver >= 40);
+ return 0;
default:
DRM_DEBUG("Unknown parameter %d\n", args->param);
return -EINVAL;
1, NULL);
}
+ v3d_perfmon_open_file(v3d_priv);
file->driver_priv = v3d_priv;
return 0;
drm_sched_entity_destroy(&v3d_priv->sched_entity[q]);
}
+ v3d_perfmon_close_file(v3d_priv);
kfree(v3d_priv);
}
DRM_IOCTL_DEF_DRV(V3D_GET_BO_OFFSET, v3d_get_bo_offset_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(V3D_SUBMIT_TFU, v3d_submit_tfu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CSD, v3d_submit_csd_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
+ DRM_IOCTL_DEF_DRV(V3D_PERFMON_CREATE, v3d_perfmon_create_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(V3D_PERFMON_DESTROY, v3d_perfmon_destroy_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_VALUES, v3d_perfmon_get_values_ioctl, DRM_RENDER_ALLOW),
};
static const struct drm_driver v3d_drm_driver = {
u64 emit_seqno;
};
+/* Performance monitor object. The perform lifetime is controlled by userspace
+ * using perfmon related ioctls. A perfmon can be attached to a submit_cl
+ * request, and when this is the case, HW perf counters will be activated just
+ * before the submit_cl is submitted to the GPU and disabled when the job is
+ * done. This way, only events related to a specific job will be counted.
+ */
+struct v3d_perfmon {
+ /* Tracks the number of users of the perfmon, when this counter reaches
+ * zero the perfmon is destroyed.
+ */
+ refcount_t refcnt;
+
+ /* Protects perfmon stop, as it can be invoked from multiple places. */
+ struct mutex lock;
+
+ /* Number of counters activated in this perfmon instance
+ * (should be less than DRM_V3D_MAX_PERF_COUNTERS).
+ */
+ u8 ncounters;
+
+ /* Events counted by the HW perf counters. */
+ u8 counters[DRM_V3D_MAX_PERF_COUNTERS];
+
+ /* Storage for counter values. Counters are incremented by the
+ * HW perf counter values every time the perfmon is attached
+ * to a GPU job. This way, perfmon users don't have to
+ * retrieve the results after each job if they want to track
+ * events covering several submissions. Note that counter
+ * values can't be reset, but you can fake a reset by
+ * destroying the perfmon and creating a new one.
+ */
+ u64 values[];
+};
+
struct v3d_dev {
struct drm_device drm;
*/
spinlock_t job_lock;
+ /* Used to track the active perfmon if any. */
+ struct v3d_perfmon *active_perfmon;
+
/* Protects bo_stats */
struct mutex bo_lock;
struct v3d_file_priv {
struct v3d_dev *v3d;
+ struct {
+ struct idr idr;
+ struct mutex lock;
+ } perfmon;
+
struct drm_sched_entity sched_entity[V3D_MAX_QUEUES];
};
*/
struct dma_fence *done_fence;
+ /* Pointer to a performance monitor object if the user requested it,
+ * NULL otherwise.
+ */
+ struct v3d_perfmon *perfmon;
+
/* Callback for the freeing of the job on refcount going to 0. */
void (*free)(struct kref *ref);
};
/* v3d_sched.c */
int v3d_sched_init(struct v3d_dev *v3d);
void v3d_sched_fini(struct v3d_dev *v3d);
+
+/* v3d_perfmon.c */
+void v3d_perfmon_get(struct v3d_perfmon *perfmon);
+void v3d_perfmon_put(struct v3d_perfmon *perfmon);
+void v3d_perfmon_start(struct v3d_dev *v3d, struct v3d_perfmon *perfmon);
+void v3d_perfmon_stop(struct v3d_dev *v3d, struct v3d_perfmon *perfmon,
+ bool capture);
+struct v3d_perfmon *v3d_perfmon_find(struct v3d_file_priv *v3d_priv, int id);
+void v3d_perfmon_open_file(struct v3d_file_priv *v3d_priv);
+void v3d_perfmon_close_file(struct v3d_file_priv *v3d_priv);
+int v3d_perfmon_create_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
v3d_mmu_set_page_table(v3d);
v3d_irq_reset(v3d);
+ v3d_perfmon_stop(v3d, v3d->active_perfmon, false);
+
trace_v3d_reset_end(dev);
}
pm_runtime_mark_last_busy(job->v3d->drm.dev);
pm_runtime_put_autosuspend(job->v3d->drm.dev);
+ if (job->perfmon)
+ v3d_perfmon_put(job->perfmon);
+
kfree(job);
}
trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
+ if (args->pad != 0)
+ return -EINVAL;
+
if (args->flags != 0 &&
args->flags != DRM_V3D_SUBMIT_CL_FLUSH_CACHE) {
DRM_INFO("invalid flags: %d\n", args->flags);
if (ret)
goto fail;
+ if (args->perfmon_id) {
+ render->base.perfmon = v3d_perfmon_find(v3d_priv,
+ args->perfmon_id);
+
+ if (!render->base.perfmon) {
+ ret = -ENOENT;
+ goto fail;
+ }
+ }
+
mutex_lock(&v3d->sched_lock);
if (bin) {
+ bin->base.perfmon = render->base.perfmon;
+ v3d_perfmon_get(bin->base.perfmon);
ret = v3d_push_job(v3d_priv, &bin->base, V3D_BIN);
if (ret)
goto fail_unreserve;
ret = drm_gem_fence_array_add(&clean_job->deps, render_fence);
if (ret)
goto fail_unreserve;
+ clean_job->perfmon = render->base.perfmon;
+ v3d_perfmon_get(clean_job->perfmon);
ret = v3d_push_job(v3d_priv, clean_job, V3D_CACHE_CLEAN);
if (ret)
goto fail_unreserve;
if (ret)
goto fail;
+ if (args->perfmon_id) {
+ job->base.perfmon = v3d_perfmon_find(v3d_priv,
+ args->perfmon_id);
+ if (!job->base.perfmon) {
+ ret = -ENOENT;
+ goto fail;
+ }
+ }
+
mutex_lock(&v3d->sched_lock);
ret = v3d_push_job(v3d_priv, &job->base, V3D_CSD);
if (ret)
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Raspberry Pi
+ */
+
+#include "v3d_drv.h"
+#include "v3d_regs.h"
+
+#define V3D_PERFMONID_MIN 1
+#define V3D_PERFMONID_MAX U32_MAX
+
+void v3d_perfmon_get(struct v3d_perfmon *perfmon)
+{
+ if (perfmon)
+ refcount_inc(&perfmon->refcnt);
+}
+
+void v3d_perfmon_put(struct v3d_perfmon *perfmon)
+{
+ if (perfmon && refcount_dec_and_test(&perfmon->refcnt))
+ kfree(perfmon);
+}
+
+void v3d_perfmon_start(struct v3d_dev *v3d, struct v3d_perfmon *perfmon)
+{
+ unsigned int i;
+ u32 mask;
+ u8 ncounters = perfmon->ncounters;
+
+ if (WARN_ON_ONCE(!perfmon || v3d->active_perfmon))
+ return;
+
+ mask = GENMASK(ncounters - 1, 0);
+
+ for (i = 0; i < ncounters; i++) {
+ u32 source = i / 4;
+ u32 channel = V3D_SET_FIELD(perfmon->counters[i], V3D_PCTR_S0);
+
+ i++;
+ channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0,
+ V3D_PCTR_S1);
+ i++;
+ channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0,
+ V3D_PCTR_S2);
+ i++;
+ channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0,
+ V3D_PCTR_S3);
+ V3D_CORE_WRITE(0, V3D_V4_PCTR_0_SRC_X(source), channel);
+ }
+
+ V3D_CORE_WRITE(0, V3D_V4_PCTR_0_CLR, mask);
+ V3D_CORE_WRITE(0, V3D_PCTR_0_OVERFLOW, mask);
+ V3D_CORE_WRITE(0, V3D_V4_PCTR_0_EN, mask);
+
+ v3d->active_perfmon = perfmon;
+}
+
+void v3d_perfmon_stop(struct v3d_dev *v3d, struct v3d_perfmon *perfmon,
+ bool capture)
+{
+ unsigned int i;
+
+ if (!perfmon || !v3d->active_perfmon)
+ return;
+
+ mutex_lock(&perfmon->lock);
+ if (perfmon != v3d->active_perfmon) {
+ mutex_unlock(&perfmon->lock);
+ return;
+ }
+
+ if (capture)
+ for (i = 0; i < perfmon->ncounters; i++)
+ perfmon->values[i] += V3D_CORE_READ(0, V3D_PCTR_0_PCTRX(i));
+
+ V3D_CORE_WRITE(0, V3D_V4_PCTR_0_EN, 0);
+
+ v3d->active_perfmon = NULL;
+ mutex_unlock(&perfmon->lock);
+}
+
+struct v3d_perfmon *v3d_perfmon_find(struct v3d_file_priv *v3d_priv, int id)
+{
+ struct v3d_perfmon *perfmon;
+
+ mutex_lock(&v3d_priv->perfmon.lock);
+ perfmon = idr_find(&v3d_priv->perfmon.idr, id);
+ v3d_perfmon_get(perfmon);
+ mutex_unlock(&v3d_priv->perfmon.lock);
+
+ return perfmon;
+}
+
+void v3d_perfmon_open_file(struct v3d_file_priv *v3d_priv)
+{
+ mutex_init(&v3d_priv->perfmon.lock);
+ idr_init(&v3d_priv->perfmon.idr);
+}
+
+static int v3d_perfmon_idr_del(int id, void *elem, void *data)
+{
+ struct v3d_perfmon *perfmon = elem;
+
+ v3d_perfmon_put(perfmon);
+
+ return 0;
+}
+
+void v3d_perfmon_close_file(struct v3d_file_priv *v3d_priv)
+{
+ mutex_lock(&v3d_priv->perfmon.lock);
+ idr_for_each(&v3d_priv->perfmon.idr, v3d_perfmon_idr_del, NULL);
+ idr_destroy(&v3d_priv->perfmon.idr);
+ mutex_unlock(&v3d_priv->perfmon.lock);
+}
+
+int v3d_perfmon_create_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
+ struct drm_v3d_perfmon_create *req = data;
+ struct v3d_perfmon *perfmon;
+ unsigned int i;
+ int ret;
+
+ /* Number of monitored counters cannot exceed HW limits. */
+ if (req->ncounters > DRM_V3D_MAX_PERF_COUNTERS ||
+ !req->ncounters)
+ return -EINVAL;
+
+ /* Make sure all counters are valid. */
+ for (i = 0; i < req->ncounters; i++) {
+ if (req->counters[i] >= V3D_PERFCNT_NUM)
+ return -EINVAL;
+ }
+
+ perfmon = kzalloc(struct_size(perfmon, values, req->ncounters),
+ GFP_KERNEL);
+ if (!perfmon)
+ return -ENOMEM;
+
+ for (i = 0; i < req->ncounters; i++)
+ perfmon->counters[i] = req->counters[i];
+
+ perfmon->ncounters = req->ncounters;
+
+ refcount_set(&perfmon->refcnt, 1);
+ mutex_init(&perfmon->lock);
+
+ mutex_lock(&v3d_priv->perfmon.lock);
+ ret = idr_alloc(&v3d_priv->perfmon.idr, perfmon, V3D_PERFMONID_MIN,
+ V3D_PERFMONID_MAX, GFP_KERNEL);
+ mutex_unlock(&v3d_priv->perfmon.lock);
+
+ if (ret < 0) {
+ kfree(perfmon);
+ return ret;
+ }
+
+ req->id = ret;
+
+ return 0;
+}
+
+int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
+ struct drm_v3d_perfmon_destroy *req = data;
+ struct v3d_perfmon *perfmon;
+
+ mutex_lock(&v3d_priv->perfmon.lock);
+ perfmon = idr_remove(&v3d_priv->perfmon.idr, req->id);
+ mutex_unlock(&v3d_priv->perfmon.lock);
+
+ if (!perfmon)
+ return -EINVAL;
+
+ v3d_perfmon_put(perfmon);
+
+ return 0;
+}
+
+int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
+ struct drm_v3d_perfmon_get_values *req = data;
+ struct v3d_perfmon *perfmon;
+ int ret = 0;
+
+ if (req->pad != 0)
+ return -EINVAL;
+
+ mutex_lock(&v3d_priv->perfmon.lock);
+ perfmon = idr_find(&v3d_priv->perfmon.idr, req->id);
+ v3d_perfmon_get(perfmon);
+ mutex_unlock(&v3d_priv->perfmon.lock);
+
+ if (!perfmon)
+ return -EINVAL;
+
+ v3d_perfmon_stop(v3d, perfmon, true);
+
+ if (copy_to_user(u64_to_user_ptr(req->values_ptr), perfmon->values,
+ perfmon->ncounters * sizeof(u64)))
+ ret = -EFAULT;
+
+ v3d_perfmon_put(perfmon);
+
+ return ret;
+}
/* Each src reg muxes four counters each. */
#define V3D_V4_PCTR_0_SRC_0_3 0x00660
#define V3D_V4_PCTR_0_SRC_28_31 0x0067c
+#define V3D_V4_PCTR_0_SRC_X(x) (V3D_V4_PCTR_0_SRC_0_3 + \
+ 4 * (x))
# define V3D_PCTR_S0_MASK V3D_MASK(6, 0)
# define V3D_PCTR_S0_SHIFT 0
# define V3D_PCTR_S1_MASK V3D_MASK(14, 8)
v3d_job_put(job);
}
+static void
+v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job)
+{
+ if (job->perfmon != v3d->active_perfmon)
+ v3d_perfmon_stop(v3d, v3d->active_perfmon, true);
+
+ if (job->perfmon && v3d->active_perfmon != job->perfmon)
+ v3d_perfmon_start(v3d, job->perfmon);
+}
+
/*
* Returns the fences that the job depends on, one by one.
*
trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
job->start, job->end);
+ v3d_switch_perfmon(v3d, &job->base);
+
/* Set the current and end address of the control list.
* Writing the end register is what starts the job.
*/
trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
job->start, job->end);
+ v3d_switch_perfmon(v3d, &job->base);
+
/* XXX: Set the QCFG */
/* Set the current and end address of the control list.
trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
+ v3d_switch_perfmon(v3d, &job->base);
+
for (i = 1; i <= 6; i++)
V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0 + 4 * i, job->args.cfg[i]);
/* CFG0 write kicks off the job. */
#define DRM_V3D_GET_BO_OFFSET 0x05
#define DRM_V3D_SUBMIT_TFU 0x06
#define DRM_V3D_SUBMIT_CSD 0x07
+#define DRM_V3D_PERFMON_CREATE 0x08
+#define DRM_V3D_PERFMON_DESTROY 0x09
+#define DRM_V3D_PERFMON_GET_VALUES 0x0a
#define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
#define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
#define DRM_IOCTL_V3D_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset)
#define DRM_IOCTL_V3D_SUBMIT_TFU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu)
#define DRM_IOCTL_V3D_SUBMIT_CSD DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd)
+#define DRM_IOCTL_V3D_PERFMON_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_CREATE, \
+ struct drm_v3d_perfmon_create)
+#define DRM_IOCTL_V3D_PERFMON_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_DESTROY, \
+ struct drm_v3d_perfmon_destroy)
+#define DRM_IOCTL_V3D_PERFMON_GET_VALUES DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_VALUES, \
+ struct drm_v3d_perfmon_get_values)
#define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01
__u32 bo_handle_count;
__u32 flags;
+
+ /* ID of the perfmon to attach to this job. 0 means no perfmon. */
+ __u32 perfmon_id;
+
+ __u32 pad;
};
/**
DRM_V3D_PARAM_SUPPORTS_TFU,
DRM_V3D_PARAM_SUPPORTS_CSD,
DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH,
+ DRM_V3D_PARAM_SUPPORTS_PERFMON,
};
struct drm_v3d_get_param {
__u32 in_sync;
/* Sync object to signal when the CSD job is done. */
__u32 out_sync;
+
+ /* ID of the perfmon to attach to this job. 0 means no perfmon. */
+ __u32 perfmon_id;
+};
+
+enum {
+ V3D_PERFCNT_FEP_VALID_PRIMTS_NO_PIXELS,
+ V3D_PERFCNT_FEP_VALID_PRIMS,
+ V3D_PERFCNT_FEP_EZ_NFCLIP_QUADS,
+ V3D_PERFCNT_FEP_VALID_QUADS,
+ V3D_PERFCNT_TLB_QUADS_STENCIL_FAIL,
+ V3D_PERFCNT_TLB_QUADS_STENCILZ_FAIL,
+ V3D_PERFCNT_TLB_QUADS_STENCILZ_PASS,
+ V3D_PERFCNT_TLB_QUADS_ZERO_COV,
+ V3D_PERFCNT_TLB_QUADS_NONZERO_COV,
+ V3D_PERFCNT_TLB_QUADS_WRITTEN,
+ V3D_PERFCNT_PTB_PRIM_VIEWPOINT_DISCARD,
+ V3D_PERFCNT_PTB_PRIM_CLIP,
+ V3D_PERFCNT_PTB_PRIM_REV,
+ V3D_PERFCNT_QPU_IDLE_CYCLES,
+ V3D_PERFCNT_QPU_ACTIVE_CYCLES_VERTEX_COORD_USER,
+ V3D_PERFCNT_QPU_ACTIVE_CYCLES_FRAG,
+ V3D_PERFCNT_QPU_CYCLES_VALID_INSTR,
+ V3D_PERFCNT_QPU_CYCLES_TMU_STALL,
+ V3D_PERFCNT_QPU_CYCLES_SCOREBOARD_STALL,
+ V3D_PERFCNT_QPU_CYCLES_VARYINGS_STALL,
+ V3D_PERFCNT_QPU_IC_HIT,
+ V3D_PERFCNT_QPU_IC_MISS,
+ V3D_PERFCNT_QPU_UC_HIT,
+ V3D_PERFCNT_QPU_UC_MISS,
+ V3D_PERFCNT_TMU_TCACHE_ACCESS,
+ V3D_PERFCNT_TMU_TCACHE_MISS,
+ V3D_PERFCNT_VPM_VDW_STALL,
+ V3D_PERFCNT_VPM_VCD_STALL,
+ V3D_PERFCNT_BIN_ACTIVE,
+ V3D_PERFCNT_RDR_ACTIVE,
+ V3D_PERFCNT_L2T_HITS,
+ V3D_PERFCNT_L2T_MISSES,
+ V3D_PERFCNT_CYCLE_COUNT,
+ V3D_PERFCNT_QPU_CYCLES_STALLED_VERTEX_COORD_USER,
+ V3D_PERFCNT_QPU_CYCLES_STALLED_FRAGMENT,
+ V3D_PERFCNT_PTB_PRIMS_BINNED,
+ V3D_PERFCNT_AXI_WRITES_WATCH_0,
+ V3D_PERFCNT_AXI_READS_WATCH_0,
+ V3D_PERFCNT_AXI_WRITE_STALLS_WATCH_0,
+ V3D_PERFCNT_AXI_READ_STALLS_WATCH_0,
+ V3D_PERFCNT_AXI_WRITE_BYTES_WATCH_0,
+ V3D_PERFCNT_AXI_READ_BYTES_WATCH_0,
+ V3D_PERFCNT_AXI_WRITES_WATCH_1,
+ V3D_PERFCNT_AXI_READS_WATCH_1,
+ V3D_PERFCNT_AXI_WRITE_STALLS_WATCH_1,
+ V3D_PERFCNT_AXI_READ_STALLS_WATCH_1,
+ V3D_PERFCNT_AXI_WRITE_BYTES_WATCH_1,
+ V3D_PERFCNT_AXI_READ_BYTES_WATCH_1,
+ V3D_PERFCNT_TLB_PARTIAL_QUADS,
+ V3D_PERFCNT_TMU_CONFIG_ACCESSES,
+ V3D_PERFCNT_L2T_NO_ID_STALL,
+ V3D_PERFCNT_L2T_COM_QUE_STALL,
+ V3D_PERFCNT_L2T_TMU_WRITES,
+ V3D_PERFCNT_TMU_ACTIVE_CYCLES,
+ V3D_PERFCNT_TMU_STALLED_CYCLES,
+ V3D_PERFCNT_CLE_ACTIVE,
+ V3D_PERFCNT_L2T_TMU_READS,
+ V3D_PERFCNT_L2T_CLE_READS,
+ V3D_PERFCNT_L2T_VCD_READS,
+ V3D_PERFCNT_L2T_TMUCFG_READS,
+ V3D_PERFCNT_L2T_SLC0_READS,
+ V3D_PERFCNT_L2T_SLC1_READS,
+ V3D_PERFCNT_L2T_SLC2_READS,
+ V3D_PERFCNT_L2T_TMU_W_MISSES,
+ V3D_PERFCNT_L2T_TMU_R_MISSES,
+ V3D_PERFCNT_L2T_CLE_MISSES,
+ V3D_PERFCNT_L2T_VCD_MISSES,
+ V3D_PERFCNT_L2T_TMUCFG_MISSES,
+ V3D_PERFCNT_L2T_SLC0_MISSES,
+ V3D_PERFCNT_L2T_SLC1_MISSES,
+ V3D_PERFCNT_L2T_SLC2_MISSES,
+ V3D_PERFCNT_CORE_MEM_WRITES,
+ V3D_PERFCNT_L2T_MEM_WRITES,
+ V3D_PERFCNT_PTB_MEM_WRITES,
+ V3D_PERFCNT_TLB_MEM_WRITES,
+ V3D_PERFCNT_CORE_MEM_READS,
+ V3D_PERFCNT_L2T_MEM_READS,
+ V3D_PERFCNT_PTB_MEM_READS,
+ V3D_PERFCNT_PSE_MEM_READS,
+ V3D_PERFCNT_TLB_MEM_READS,
+ V3D_PERFCNT_GMP_MEM_READS,
+ V3D_PERFCNT_PTB_W_MEM_WORDS,
+ V3D_PERFCNT_TLB_W_MEM_WORDS,
+ V3D_PERFCNT_PSE_R_MEM_WORDS,
+ V3D_PERFCNT_TLB_R_MEM_WORDS,
+ V3D_PERFCNT_TMU_MRU_HITS,
+ V3D_PERFCNT_COMPUTE_ACTIVE,
+ V3D_PERFCNT_NUM,
+};
+
+#define DRM_V3D_MAX_PERF_COUNTERS 32
+
+struct drm_v3d_perfmon_create {
+ __u32 id;
+ __u32 ncounters;
+ __u8 counters[DRM_V3D_MAX_PERF_COUNTERS];
+};
+
+struct drm_v3d_perfmon_destroy {
+ __u32 id;
+};
+
+/*
+ * Returns the values of the performance counters tracked by this
+ * perfmon (as an array of ncounters u64 values).
+ *
+ * No implicit synchronization is performed, so the user has to
+ * guarantee that any jobs using this perfmon have already been
+ * completed (probably by blocking on the seqno returned by the
+ * last exec that used the perfmon).
+ */
+struct drm_v3d_perfmon_get_values {
+ __u32 id;
+ __u32 pad;
+ __u64 values_ptr;
};
#if defined(__cplusplus)