From 9e108c9bc626cfa44437c7e0fceaf9a675ae615e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 5 May 2014 09:29:04 +1000 Subject: [PATCH] powerpc/powernv: Don't escalate non-existing frozen PE Commit ac9fed22 ("powerpc/eeh: Escalate error on non-existing PE") escalates the frozen state on non-existing PE to fenced PHB. It was to improve kdump reliability. After that, commit 194fbd03 ("powrpc/powernv: Reset PHB in kdump kernel") was introduced to issue complete reset on all PHBs to increase the reliability of kdump kernel. Commit ac9fed22 becomes unuseful and it would be reverted. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/powernv/eeh-ioda.c | 24 +++++++++-------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 68167cd9ea97d..5711f6f1fda66 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -794,23 +794,17 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) break; case OPAL_EEH_PE_ERROR: /* - * If we can't find the corresponding PE, the - * PEEV / PEST would be messy. So we force an - * fenced PHB so that it can be recovered. - * - * If the PE has been marked as isolated, that - * should have been removed permanently or in - * progress with recovery. We needn't report - * it again. + * If we can't find the corresponding PE, we + * just try to unfreeze. */ if (ioda_eeh_get_pe(hose, - be64_to_cpu(frozen_pe_no), pe)) { - *pe = phb_pe; - pr_err("EEH: Escalated fenced PHB#%x " - "detected for PE#%llx\n", - hose->global_number, - be64_to_cpu(frozen_pe_no)); - ret = EEH_NEXT_ERR_FENCED_PHB; + be64_to_cpu(frozen_pe_no), pe)) { + /* Try best to clear it */ + pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n", + hose->global_number, frozen_pe_no); + opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + ret = EEH_NEXT_ERR_NONE; } else if ((*pe)->state & EEH_PE_ISOLATED) { ret = EEH_NEXT_ERR_NONE; } else { -- 2.39.5