struct drm_device *dev = gpu->dev;
struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
+ /*
+ * If stalled on SMMU fault, we could trip the GPU's hang detection,
+ * but the fault handler will trigger the devcore dump, and we want
+ * to otherwise resume normally rather than killing the submit, so
+ * just bail.
+ */
+ if (gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24))
+ return;
+
DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
ring ? ring->id : -1, ring ? ring->seqno : 0,
gpu_read(gpu, REG_A5XX_RBBM_STATUS),
{
struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
GFP_KERNEL);
+ bool stalled = !!(gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24));
if (!a5xx_state)
return ERR_PTR(-ENOMEM);
a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
- /* Get the HLSQ regs with the help of the crashdumper */
- a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
+ /*
+ * Get the HLSQ regs with the help of the crashdumper, but only if
+ * we are not stalled in an iommu fault (in which case the crashdumper
+ * would not have access to memory)
+ */
+ if (!stalled)
+ a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
a5xx_set_hwcg(gpu, true);
struct msm_gpu *gpu = arg;
struct adreno_smmu_fault_info *info = data;
const char *type = "UNKNOWN";
+ const char *block;
+ bool do_devcoredump = info && !READ_ONCE(gpu->crashstate);
+
+ /*
+ * If we aren't going to be resuming later from fault_worker, then do
+ * it now.
+ */
+ if (!do_devcoredump) {
+ gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu);
+ }
/*
* Print a default message if we couldn't get the data from the
else if (info->fsr & ARM_SMMU_FSR_EF)
type = "EXTERNAL";
+ block = a6xx_fault_block(gpu, info->fsynr1 & 0xff);
+
pr_warn_ratelimited("*** gpu fault: ttbr0=%.16llx iova=%.16lx dir=%s type=%s source=%s (%u,%u,%u,%u)\n",
info->ttbr0, iova,
- flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ", type,
- a6xx_fault_block(gpu, info->fsynr1 & 0xff),
+ flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ",
+ type, block,
gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)));
+ if (do_devcoredump) {
+ /* Turn off the hangcheck timer to keep it from bothering us */
+ del_timer(&gpu->hangcheck_timer);
+
+ gpu->fault_info.ttbr0 = info->ttbr0;
+ gpu->fault_info.iova = iova;
+ gpu->fault_info.flags = flags;
+ gpu->fault_info.type = type;
+ gpu->fault_info.block = block;
+
+ kthread_queue_work(gpu->worker, &gpu->fault_work);
+ }
+
return 0;
}
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
+ /*
+ * If stalled on SMMU fault, we could trip the GPU's hang detection,
+ * but the fault handler will trigger the devcore dump, and we want
+ * to otherwise resume normally rather than killing the submit, so
+ * just bail.
+ */
+ if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT)
+ return;
+
/*
* Force the GPU to stay on until after we finish
* collecting information
a6xx_get_ahb_gpu_registers(gpu,
a6xx_state, &a6xx_vbif_reglist,
&a6xx_state->registers[index++]);
+ if (!dumper) {
+ /*
+ * We can't use the crashdumper when the SMMU is stalled,
+ * because the GPU has no memory access until we resume
+ * translation (but we don't want to do that until after
+ * we have captured as much useful GPU state as possible).
+ * So instead collect registers via the CPU:
+ */
+ for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
+ a6xx_get_ahb_gpu_registers(gpu,
+ a6xx_state, &a6xx_reglist[i],
+ &a6xx_state->registers[index++]);
+ return;
+ }
for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
a6xx_get_crashdumper_registers(gpu,
struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
{
- struct a6xx_crashdumper dumper = { 0 };
+ struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
GFP_KERNEL);
+ bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
+ A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
if (!a6xx_state)
return ERR_PTR(-ENOMEM);
/* Get the banks of indexed registers */
a6xx_get_indexed_registers(gpu, a6xx_state);
- /* Try to initialize the crashdumper */
- if (!a6xx_crashdumper_init(gpu, &dumper)) {
- a6xx_get_registers(gpu, a6xx_state, &dumper);
- a6xx_get_shaders(gpu, a6xx_state, &dumper);
- a6xx_get_clusters(gpu, a6xx_state, &dumper);
- a6xx_get_dbgahb_clusters(gpu, a6xx_state, &dumper);
+ /*
+ * Try to initialize the crashdumper, if we are not dumping state
+ * with the SMMU stalled. The crashdumper needs memory access to
+ * write out GPU state, so we need to skip this when the SMMU is
+ * stalled in response to an iova fault
+ */
+ if (!stalled && !a6xx_crashdumper_init(gpu, &_dumper)) {
+ dumper = &_dumper;
+ }
+
+ a6xx_get_registers(gpu, a6xx_state, dumper);
+
+ if (dumper) {
+ a6xx_get_shaders(gpu, a6xx_state, dumper);
+ a6xx_get_clusters(gpu, a6xx_state, dumper);
+ a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
- msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
+ msm_gem_kernel_put(dumper->bo, gpu->aspace, true);
}
if (snapshot_debugbus)
adreno_gpu->info->revn, adreno_gpu->rev.core,
adreno_gpu->rev.major, adreno_gpu->rev.minor,
adreno_gpu->rev.patchid);
+ /*
+ * If this is state collected due to iova fault, so fault related info
+ *
+ * TTBR0 would not be zero, so this is a good way to distinguish
+ */
+ if (state->fault_info.ttbr0) {
+ const struct msm_gpu_fault_info *info = &state->fault_info;
+
+ drm_puts(p, "fault-info:\n");
+ drm_printf(p, " - ttbr0=%.16llx\n", info->ttbr0);
+ drm_printf(p, " - iova=%.16lx\n", info->iova);
+ drm_printf(p, " - dir=%s\n", info->flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ");
+ drm_printf(p, " - type=%s\n", info->type);
+ drm_printf(p, " - source=%s\n", info->block);
+ }
drm_printf(p, "rbbm-status: 0x%08x\n", state->rbbm_status);
struct dma_fence *fence;
struct msm_gpu_submitqueue *queue;
struct pid *pid; /* submitting process */
+ bool fault_dumped; /* Limit devcoredump dumping to one per submit */
bool valid; /* true if no cmdstream patching needed */
bool in_rb; /* "sudo" mode, copy cmds into RB */
struct msm_ringbuffer *ring;
submit->cmd = (void *)&submit->bos[nr_bos];
submit->queue = queue;
submit->ring = gpu->rb[queue->prio];
+ submit->fault_dumped = false;
/* initially, until copy_from_user() and bo lookup succeeds: */
submit->nr_bos = 0;
/* Fill in the additional crash state information */
state->comm = kstrdup(comm, GFP_KERNEL);
state->cmd = kstrdup(cmd, GFP_KERNEL);
+ state->fault_info = gpu->fault_info;
if (submit) {
int i, nr = 0;
msm_gpu_retire(gpu);
}
+static void fault_worker(struct kthread_work *work)
+{
+ struct msm_gpu *gpu = container_of(work, struct msm_gpu, fault_work);
+ struct drm_device *dev = gpu->dev;
+ struct msm_gem_submit *submit;
+ struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu);
+ char *comm = NULL, *cmd = NULL;
+
+ mutex_lock(&dev->struct_mutex);
+
+ submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1);
+ if (submit && submit->fault_dumped)
+ goto resume_smmu;
+
+ if (submit) {
+ struct task_struct *task;
+
+ task = get_pid_task(submit->pid, PIDTYPE_PID);
+ if (task) {
+ comm = kstrdup(task->comm, GFP_KERNEL);
+ cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL);
+ put_task_struct(task);
+ }
+
+ /*
+ * When we get GPU iova faults, we can get 1000s of them,
+ * but we really only want to log the first one.
+ */
+ submit->fault_dumped = true;
+ }
+
+ /* Record the crash state */
+ pm_runtime_get_sync(&gpu->pdev->dev);
+ msm_gpu_crashstate_capture(gpu, submit, comm, cmd);
+ pm_runtime_put_sync(&gpu->pdev->dev);
+
+ kfree(cmd);
+ kfree(comm);
+
+resume_smmu:
+ memset(&gpu->fault_info, 0, sizeof(gpu->fault_info));
+ gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu);
+
+ mutex_unlock(&dev->struct_mutex);
+}
+
static void hangcheck_timer_reset(struct msm_gpu *gpu)
{
struct msm_drm_private *priv = gpu->dev->dev_private;
INIT_LIST_HEAD(&gpu->active_list);
kthread_init_work(&gpu->retire_work, retire_worker);
kthread_init_work(&gpu->recover_work, recover_worker);
+ kthread_init_work(&gpu->fault_work, fault_worker);
timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0);
uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
};
+/* Additional state for iommu faults: */
+struct msm_gpu_fault_info {
+ u64 ttbr0;
+ unsigned long iova;
+ int flags;
+ const char *type;
+ const char *block;
+};
+
struct msm_gpu {
const char *name;
struct drm_device *dev;
#define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */
struct timer_list hangcheck_timer;
+ /* Fault info for most recent iova fault: */
+ struct msm_gpu_fault_info fault_info;
+
+ /* work for handling GPU ioval faults: */
+ struct kthread_work fault_work;
+
/* work for handling GPU recovery: */
struct kthread_work recover_work;
char *comm;
char *cmd;
+ struct msm_gpu_fault_info fault_info;
+
int nr_bos;
struct msm_gpu_state_bo *bos;
};
return 0;
}
+static void msm_gpummu_resume_translation(struct msm_mmu *mmu)
+{
+}
+
static void msm_gpummu_destroy(struct msm_mmu *mmu)
{
struct msm_gpummu *gpummu = to_msm_gpummu(mmu);
.map = msm_gpummu_map,
.unmap = msm_gpummu_unmap,
.destroy = msm_gpummu_destroy,
+ .resume_translation = msm_gpummu_resume_translation,
};
struct msm_mmu *msm_gpummu_new(struct device *dev, struct msm_gpu *gpu)
* the arm-smmu driver as a trigger to set up TTBR0
*/
if (atomic_inc_return(&iommu->pagetables) == 1) {
+ /* Enable stall on iommu fault: */
+ adreno_smmu->set_stall(adreno_smmu->cookie, true);
+
ret = adreno_smmu->set_ttbr0_cfg(adreno_smmu->cookie, &ttbr0_cfg);
if (ret) {
free_io_pgtable_ops(pagetable->pgtbl_ops);
return 0;
}
+static void msm_iommu_resume_translation(struct msm_mmu *mmu)
+{
+ struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(mmu->dev);
+
+ adreno_smmu->resume_translation(adreno_smmu->cookie, true);
+}
+
static void msm_iommu_detach(struct msm_mmu *mmu)
{
struct msm_iommu *iommu = to_msm_iommu(mmu);
.map = msm_iommu_map,
.unmap = msm_iommu_unmap,
.destroy = msm_iommu_destroy,
+ .resume_translation = msm_iommu_resume_translation,
};
struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain)
size_t len, int prot);
int (*unmap)(struct msm_mmu *mmu, uint64_t iova, size_t len);
void (*destroy)(struct msm_mmu *mmu);
+ void (*resume_translation)(struct msm_mmu *mmu);
};
enum msm_mmu_type {