]> git.baikalelectronics.ru Git - kernel.git/commitdiff
drm/amdgpu: add function to clear MMEA error status for aldebaran
authorDennis Li <Dennis.Li@amd.com>
Mon, 10 May 2021 11:08:11 +0000 (19:08 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 10 May 2021 22:11:44 +0000 (18:11 -0400)
For aldebaran, hardware will not clear error status automatically when
reading error status register, insteadly driver should set clear bit of
the error status register explicitly to clear error status.

Signed-off-by: Dennis Li <Dennis.Li@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c

index 11aa29933c1f8c968101bb8b0b09a2f8f2251f68..b27fcbccce2b6f27dcb70ec99527323db075863d 100644 (file)
@@ -28,6 +28,7 @@ struct amdgpu_mmhub_ras_funcs {
                                      void *ras_error_status);
        void (*query_ras_error_status)(struct amdgpu_device *adev);
        void (*reset_ras_error_count)(struct amdgpu_device *adev);
+       void (*reset_ras_error_status)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_mmhub_funcs {
index 4eebb97994d65debe3bf62f5c25e1679560b3c0c..a324dc2da101c0b6ecc53f38f72dc28bc93b4ccc 100644 (file)
@@ -938,6 +938,10 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
                if (adev->mmhub.ras_funcs &&
                    adev->mmhub.ras_funcs->reset_ras_error_count)
                        adev->mmhub.ras_funcs->reset_ras_error_count(adev);
+
+               if (adev->mmhub.ras_funcs &&
+                   adev->mmhub.ras_funcs->reset_ras_error_status)
+                       adev->mmhub.ras_funcs->reset_ras_error_status(adev);
                break;
        case AMDGPU_RAS_BLOCK__SDMA:
                if (adev->sdma.funcs->reset_ras_error_count)
index 9aaa137662b5cb62284ecea058ddea29643e1ef8..6264934b67ac00b612cc74677d44818fb13d83b3 100644 (file)
@@ -1315,12 +1315,31 @@ static void mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev)
        }
 }
 
+static void mmhub_v1_7_reset_ras_error_status(struct amdgpu_device *adev)
+{
+       int i;
+       uint32_t reg_value;
+
+       if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB))
+               return;
+
+       for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) {
+               reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
+                       mmhub_v1_7_ea_err_status_regs[i]));
+               reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS,
+                                         CLEAR_ERROR_STATUS, 0x01);
+               WREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_regs[i]),
+                      reg_value);
+       }
+}
+
 const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs = {
        .ras_late_init = amdgpu_mmhub_ras_late_init,
        .ras_fini = amdgpu_mmhub_ras_fini,
        .query_ras_error_count = mmhub_v1_7_query_ras_error_count,
        .reset_ras_error_count = mmhub_v1_7_reset_ras_error_count,
        .query_ras_error_status = mmhub_v1_7_query_ras_error_status,
+       .reset_ras_error_status = mmhub_v1_7_reset_ras_error_status,
 };
 
 const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs = {