#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0)
#define AMDGPU_RAS_FLAG_INIT_NEED_RESET (0x1 << 1)
-#define AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV (0x1 << 2)
enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__UMC = 0,
{
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
- /*
- * Save bad page to eeprom before gpu reset, i2c may be unstable
- * in gpu reset.
- *
- * Also, exclude the case when ras recovery issuer is
- * eeprom page write itself.
- */
- if (!(ras->flags & AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV) && in_task())
+ if (in_task())
amdgpu_ras_reserve_bad_pages(adev);
if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
int i, ret = 0;
struct i2c_msg *msgs, *msg;
unsigned char *buffs, *buff;
- bool sched_ras_recovery = false;
struct eeprom_table_record *record;
struct amdgpu_device *adev = to_amdgpu_device(control);
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
"Saved bad pages(%d) reaches threshold value(%d).\n",
control->num_recs + num, ras->bad_page_cnt_threshold);
control->tbl_hdr.header = EEPROM_TABLE_HDR_BAD;
- sched_ras_recovery = true;
}
/* In case of overflow just start from beginning to not lose newest records */
__update_tbl_checksum(control, records, num, old_hdr_byte_sum);
__update_table_header(control, buffs);
-
- if (sched_ras_recovery) {
- /*
- * Before scheduling ras recovery, assert the related
- * flag first, which shall bypass common bad page
- * reservation execution in amdgpu_ras_reset_gpu.
- */
- amdgpu_ras_get_context(adev)->flags |=
- AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV;
-
- dev_warn(adev->dev, "Conduct ras recovery due to bad "
- "page threshold reached.\n");
- amdgpu_ras_reset_gpu(adev);
- }
} else if (!__validate_tbl_checksum(control, records, num)) {
DRM_WARN("EEPROM Table checksum mismatch!");
/* TODO Uncomment when EEPROM read/write is relliable */