]> git.baikalelectronics.ru Git - kernel.git/commitdiff
scsi: lpfc: Improve PCI EEH Error and Recovery Handling
authorJames Smart <jsmart2021@gmail.com>
Thu, 17 Mar 2022 03:27:34 +0000 (20:27 -0700)
committerMartin K. Petersen <martin.petersen@oracle.com>
Wed, 30 Mar 2022 03:19:37 +0000 (23:19 -0400)
Following EEH errors, the driver can crash or hang when deleting the
localport or when attempting to unload.

The EEH handlers in the driver did not notify the NVMe-FC transport before
tearing the driver down. This was delayed until the resume steps. This
worked for SCSI because lpfc_block_scsi() would notify the
scsi_fc_transport that the target was not available but it would not clean
up all the references to the ndlp.

The SLI3 prep for dev reset handler did the lpfc_offline_prep() and
lpfc_offline() calls to get the port stopped before restarting. The SLI4
version of the prep for dev reset just destroyed the queues and did not
stop NVMe from continuing.  Also because the port was not really stopped
the localport destroy would hang because the transport was still waiting
for I/O. Additionally, a devloss tmo can fire and post events to a stopped
worker thread creating another hang condition.

lpfc_sli4_prep_dev_for_reset() is modified to call lpfc_offline_prep() and
lpfc_offline() rather than just lpfc_scsi_dev_block() to ensure both SCSI
and NVMe transports are notified to block I/O to the driver.

Logic is added to devloss handler and worker thread to clean up ndlp
references and quiesce appropriately.

Link: https://lore.kernel.org/r/20220317032737.45308-2-jsmart2021@gmail.com
Co-developed-by: Justin Tee <justin.tee@broadcom.com>
Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/lpfc/lpfc.h
drivers/scsi/lpfc/lpfc_crtn.h
drivers/scsi/lpfc/lpfc_hbadisc.c
drivers/scsi/lpfc/lpfc_init.c
drivers/scsi/lpfc/lpfc_nvme.c
drivers/scsi/lpfc/lpfc_sli.c

index 86653aa9b38909c8b6ef624ca8e1b022732196a6..8405fd0bbc598348e6a31edf49fa5c7b9a623838 100644 (file)
@@ -896,6 +896,11 @@ enum lpfc_irq_chann_mode {
        NHT_MODE,
 };
 
+enum lpfc_hba_bit_flags {
+       FABRIC_COMANDS_BLOCKED,
+       HBA_PCI_ERR,
+};
+
 struct lpfc_hba {
        /* SCSI interface function jump table entries */
        struct lpfc_io_buf * (*lpfc_get_scsi_buf)
@@ -1042,7 +1047,6 @@ struct lpfc_hba {
                                         * Firmware supports Forced Link Speed
                                         * capability
                                         */
-#define HBA_PCI_ERR            0x80000 /* The PCI slot is offline */
 #define HBA_FLOGI_ISSUED       0x100000 /* FLOGI was issued */
 #define HBA_SHORT_CMF          0x200000 /* shorter CMF timer routine */
 #define HBA_CGN_DAY_WRAP       0x400000 /* HBA Congestion info day wraps */
@@ -1349,7 +1353,6 @@ struct lpfc_hba {
        atomic_t fabric_iocb_count;
        struct timer_list fabric_block_timer;
        unsigned long bit_flags;
-#define        FABRIC_COMANDS_BLOCKED  0
        atomic_t num_rsrc_err;
        atomic_t num_cmd_success;
        unsigned long last_rsrc_error_time;
index 96408cd6c4c81668a68d31e5c7f6793711fe7b41..9897a1aa387b63a00a4e34fcd6975fb351fb3a39 100644 (file)
@@ -670,3 +670,6 @@ struct lpfc_vmid *lpfc_get_vmid_from_hashtable(struct lpfc_vport *vport,
                                              uint32_t hash, uint8_t *buf);
 void lpfc_vmid_vport_cleanup(struct lpfc_vport *vport);
 int lpfc_issue_els_qfpa(struct lpfc_vport *vport);
+
+void lpfc_sli_rpi_release(struct lpfc_vport *vport,
+                         struct lpfc_nodelist *ndlp);
index 0144da30e3dbd0c5448d55e0fb71ad9126312143..6983c70f2fc6c75853a7ce2b63f9a337f97363ad 100644 (file)
@@ -109,8 +109,8 @@ lpfc_rport_invalid(struct fc_rport *rport)
 
        ndlp = rdata->pnode;
        if (!rdata->pnode) {
-               pr_err("**** %s: NULL ndlp on rport x%px SID x%x\n",
-                      __func__, rport, rport->scsi_target_id);
+               pr_info("**** %s: NULL ndlp on rport x%px SID x%x\n",
+                       __func__, rport, rport->scsi_target_id);
                return -EINVAL;
        }
 
@@ -169,9 +169,10 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
 
        lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
                         "3181 dev_loss_callbk x%06x, rport x%px flg x%x "
-                        "load_flag x%x refcnt %d\n",
+                        "load_flag x%x refcnt %d state %d xpt x%x\n",
                         ndlp->nlp_DID, ndlp->rport, ndlp->nlp_flag,
-                        vport->load_flag, kref_read(&ndlp->kref));
+                        vport->load_flag, kref_read(&ndlp->kref),
+                        ndlp->nlp_state, ndlp->fc4_xpt_flags);
 
        /* Don't schedule a worker thread event if the vport is going down.
         * The teardown process cleans up the node via lpfc_drop_node.
@@ -181,6 +182,11 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
                ndlp->rport = NULL;
 
                ndlp->fc4_xpt_flags &= ~SCSI_XPT_REGD;
+               /* clear the NLP_XPT_REGD if the node is not registered
+                * with nvme-fc
+                */
+               if (ndlp->fc4_xpt_flags == NLP_XPT_REGD)
+                       ndlp->fc4_xpt_flags &= ~NLP_XPT_REGD;
 
                /* Remove the node reference from remote_port_add now.
                 * The driver will not call remote_port_delete.
@@ -225,18 +231,36 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
        ndlp->rport = NULL;
        spin_unlock_irqrestore(&ndlp->lock, iflags);
 
-       /* We need to hold the node by incrementing the reference
-        * count until this queued work is done
-        */
-       evtp->evt_arg1 = lpfc_nlp_get(ndlp);
+       if (phba->worker_thread) {
+               /* We need to hold the node by incrementing the reference
+                * count until this queued work is done
+                */
+               evtp->evt_arg1 = lpfc_nlp_get(ndlp);
+
+               spin_lock_irqsave(&phba->hbalock, iflags);
+               if (evtp->evt_arg1) {
+                       evtp->evt = LPFC_EVT_DEV_LOSS;
+                       list_add_tail(&evtp->evt_listp, &phba->work_list);
+                       lpfc_worker_wake_up(phba);
+               }
+               spin_unlock_irqrestore(&phba->hbalock, iflags);
+       } else {
+               lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
+                                "3188 worker thread is stopped %s x%06x, "
+                                " rport x%px flg x%x load_flag x%x refcnt "
+                                "%d\n", __func__, ndlp->nlp_DID,
+                                ndlp->rport, ndlp->nlp_flag,
+                                vport->load_flag, kref_read(&ndlp->kref));
+               if (!(ndlp->fc4_xpt_flags & NVME_XPT_REGD)) {
+                       spin_lock_irqsave(&ndlp->lock, iflags);
+                       /* Node is in dev loss.  No further transaction. */
+                       ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS;
+                       spin_unlock_irqrestore(&ndlp->lock, iflags);
+                       lpfc_disc_state_machine(vport, ndlp, NULL,
+                                               NLP_EVT_DEVICE_RM);
+               }
 
-       spin_lock_irqsave(&phba->hbalock, iflags);
-       if (evtp->evt_arg1) {
-               evtp->evt = LPFC_EVT_DEV_LOSS;
-               list_add_tail(&evtp->evt_listp, &phba->work_list);
-               lpfc_worker_wake_up(phba);
        }
-       spin_unlock_irqrestore(&phba->hbalock, iflags);
 
        return;
 }
@@ -503,11 +527,12 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
                lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
                                 "0203 Devloss timeout on "
                                 "WWPN %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x "
-                                "NPort x%06x Data: x%x x%x x%x\n",
+                                "NPort x%06x Data: x%x x%x x%x refcnt %d\n",
                                 *name, *(name+1), *(name+2), *(name+3),
                                 *(name+4), *(name+5), *(name+6), *(name+7),
                                 ndlp->nlp_DID, ndlp->nlp_flag,
-                                ndlp->nlp_state, ndlp->nlp_rpi);
+                                ndlp->nlp_state, ndlp->nlp_rpi,
+                                kref_read(&ndlp->kref));
        } else {
                lpfc_printf_vlog(vport, KERN_INFO, LOG_TRACE_EVENT,
                                 "0204 Devloss timeout on "
@@ -755,18 +780,22 @@ lpfc_work_list_done(struct lpfc_hba *phba)
        int free_evt;
        int fcf_inuse;
        uint32_t nlp_did;
+       bool hba_pci_err;
 
        spin_lock_irq(&phba->hbalock);
        while (!list_empty(&phba->work_list)) {
                list_remove_head((&phba->work_list), evtp, typeof(*evtp),
                                 evt_listp);
                spin_unlock_irq(&phba->hbalock);
+               hba_pci_err = test_bit(HBA_PCI_ERR, &phba->bit_flags);
                free_evt = 1;
                switch (evtp->evt) {
                case LPFC_EVT_ELS_RETRY:
                        ndlp = (struct lpfc_nodelist *) (evtp->evt_arg1);
-                       lpfc_els_retry_delay_handler(ndlp);
-                       free_evt = 0; /* evt is part of ndlp */
+                       if (!hba_pci_err) {
+                               lpfc_els_retry_delay_handler(ndlp);
+                               free_evt = 0; /* evt is part of ndlp */
+                       }
                        /* decrement the node reference count held
                         * for this queued work
                         */
@@ -788,8 +817,10 @@ lpfc_work_list_done(struct lpfc_hba *phba)
                        break;
                case LPFC_EVT_RECOVER_PORT:
                        ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1);
-                       lpfc_sli_abts_recover_port(ndlp->vport, ndlp);
-                       free_evt = 0;
+                       if (!hba_pci_err) {
+                               lpfc_sli_abts_recover_port(ndlp->vport, ndlp);
+                               free_evt = 0;
+                       }
                        /* decrement the node reference count held for
                         * this queued work
                         */
@@ -859,14 +890,18 @@ lpfc_work_done(struct lpfc_hba *phba)
        struct lpfc_vport **vports;
        struct lpfc_vport *vport;
        int i;
+       bool hba_pci_err;
 
+       hba_pci_err = test_bit(HBA_PCI_ERR, &phba->bit_flags);
        spin_lock_irq(&phba->hbalock);
        ha_copy = phba->work_ha;
        phba->work_ha = 0;
        spin_unlock_irq(&phba->hbalock);
+       if (hba_pci_err)
+               ha_copy = 0;
 
        /* First, try to post the next mailbox command to SLI4 device */
-       if (phba->pci_dev_grp == LPFC_PCI_DEV_OC)
+       if (phba->pci_dev_grp == LPFC_PCI_DEV_OC && !hba_pci_err)
                lpfc_sli4_post_async_mbox(phba);
 
        if (ha_copy & HA_ERATT) {
@@ -886,7 +921,7 @@ lpfc_work_done(struct lpfc_hba *phba)
                lpfc_handle_latt(phba);
 
        /* Handle VMID Events */
-       if (lpfc_is_vmid_enabled(phba)) {
+       if (lpfc_is_vmid_enabled(phba) && !hba_pci_err) {
                if (phba->pport->work_port_events &
                    WORKER_CHECK_VMID_ISSUE_QFPA) {
                        lpfc_check_vmid_qfpa_issue(phba);
@@ -936,6 +971,8 @@ lpfc_work_done(struct lpfc_hba *phba)
                        work_port_events = vport->work_port_events;
                        vport->work_port_events &= ~work_port_events;
                        spin_unlock_irq(&vport->work_port_lock);
+                       if (hba_pci_err)
+                               continue;
                        if (work_port_events & WORKER_DISC_TMO)
                                lpfc_disc_timeout_handler(vport);
                        if (work_port_events & WORKER_ELS_TMO)
@@ -1173,12 +1210,14 @@ lpfc_linkdown(struct lpfc_hba *phba)
        struct lpfc_vport **vports;
        LPFC_MBOXQ_t          *mb;
        int i;
+       int offline;
 
        if (phba->link_state == LPFC_LINK_DOWN)
                return 0;
 
        /* Block all SCSI stack I/Os */
        lpfc_scsi_dev_block(phba);
+       offline = pci_channel_offline(phba->pcidev);
 
        phba->defer_flogi_acc_flag = false;
 
@@ -1219,7 +1258,7 @@ lpfc_linkdown(struct lpfc_hba *phba)
        lpfc_destroy_vport_work_array(phba, vports);
 
        /* Clean up any SLI3 firmware default rpi's */
-       if (phba->sli_rev > LPFC_SLI_REV3)
+       if (phba->sli_rev > LPFC_SLI_REV3 || offline)
                goto skip_unreg_did;
 
        mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
@@ -4712,6 +4751,11 @@ lpfc_nlp_unreg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
        spin_lock_irqsave(&ndlp->lock, iflags);
        if (!(ndlp->fc4_xpt_flags & NLP_XPT_REGD)) {
                spin_unlock_irqrestore(&ndlp->lock, iflags);
+               lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
+                                "0999 %s Not regd: ndlp x%px rport x%px DID "
+                                "x%x FLG x%x XPT x%x\n",
+                                 __func__, ndlp, ndlp->rport, ndlp->nlp_DID,
+                                 ndlp->nlp_flag, ndlp->fc4_xpt_flags);
                return;
        }
 
@@ -4722,6 +4766,13 @@ lpfc_nlp_unreg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
            ndlp->fc4_xpt_flags & SCSI_XPT_REGD) {
                vport->phba->nport_event_cnt++;
                lpfc_unregister_remote_port(ndlp);
+       } else if (!ndlp->rport) {
+               lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
+                                "1999 %s NDLP in devloss x%px DID x%x FLG x%x"
+                                " XPT x%x refcnt %d\n",
+                                __func__, ndlp, ndlp->nlp_DID, ndlp->nlp_flag,
+                                ndlp->fc4_xpt_flags,
+                                kref_read(&ndlp->kref));
        }
 
        if (ndlp->fc4_xpt_flags & NVME_XPT_REGD) {
@@ -6097,12 +6148,34 @@ lpfc_disc_flush_list(struct lpfc_vport *vport)
        }
 }
 
+/*
+ * lpfc_notify_xport_npr - notifies xport of node disappearance
+ * @vport: Pointer to Virtual Port object.
+ *
+ * Transitions all ndlps to NPR state.  When lpfc_nlp_set_state
+ * calls lpfc_nlp_state_cleanup, the ndlp->rport is unregistered
+ * and transport notified that the node is gone.
+ * Return Code:
+ *     none
+ */
+static void
+lpfc_notify_xport_npr(struct lpfc_vport *vport)
+{
+       struct lpfc_nodelist *ndlp, *next_ndlp;
+
+       list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes,
+                                nlp_listp) {
+               lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+       }
+}
 void
 lpfc_cleanup_discovery_resources(struct lpfc_vport *vport)
 {
        lpfc_els_flush_rscn(vport);
        lpfc_els_flush_cmd(vport);
        lpfc_disc_flush_list(vport);
+       if (pci_channel_offline(vport->phba->pcidev))
+               lpfc_notify_xport_npr(vport);
 }
 
 /*****************************************************************************/
index eed6464bd880669807eb339e25e71532c75010dc..b8ab6dcbadc5a587a6dde5f35e1ae4d5c6946a4c 100644 (file)
@@ -1642,7 +1642,7 @@ lpfc_sli4_offline_eratt(struct lpfc_hba *phba)
 {
        spin_lock_irq(&phba->hbalock);
        if (phba->link_state == LPFC_HBA_ERROR &&
-           phba->hba_flag & HBA_PCI_ERR) {
+               test_bit(HBA_PCI_ERR, &phba->bit_flags)) {
                spin_unlock_irq(&phba->hbalock);
                return;
        }
@@ -3682,7 +3682,8 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
        struct lpfc_vport **vports;
        struct Scsi_Host *shost;
        int i;
-       int offline = 0;
+       int offline;
+       bool hba_pci_err;
 
        if (vport->fc_flag & FC_OFFLINE_MODE)
                return;
@@ -3692,6 +3693,7 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
        lpfc_linkdown(phba);
 
        offline =  pci_channel_offline(phba->pcidev);
+       hba_pci_err = test_bit(HBA_PCI_ERR, &phba->bit_flags);
 
        /* Issue an unreg_login to all nodes on all vports */
        vports = lpfc_create_vport_work_array(phba);
@@ -3715,11 +3717,14 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
                                ndlp->nlp_flag &= ~NLP_NPR_ADISC;
                                spin_unlock_irq(&ndlp->lock);
 
-                               if (offline) {
+                               if (offline || hba_pci_err) {
                                        spin_lock_irq(&ndlp->lock);
                                        ndlp->nlp_flag &= ~(NLP_UNREG_INP |
                                                            NLP_RPI_REGISTERED);
                                        spin_unlock_irq(&ndlp->lock);
+                                       if (phba->sli_rev == LPFC_SLI_REV4)
+                                               lpfc_sli_rpi_release(vports[i],
+                                                                    ndlp);
                                } else {
                                        lpfc_unreg_rpi(vports[i], ndlp);
                                }
@@ -13374,15 +13379,12 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba)
        /* Disable FW logging to host memory */
        lpfc_ras_stop_fwlog(phba);
 
-       /* Unset the queues shared with the hardware then release all
-        * allocated resources.
-        */
-       lpfc_sli4_queue_unset(phba);
-       lpfc_sli4_queue_destroy(phba);
-
        /* Reset SLI4 HBA FCoE function */
        lpfc_pci_function_reset(phba);
 
+       /* release all queue allocated resources. */
+       lpfc_sli4_queue_destroy(phba);
+
        /* Free RAS DMA memory */
        if (phba->ras_fwlog.ras_enabled)
                lpfc_sli4_ras_dma_free(phba);
@@ -15057,24 +15059,28 @@ lpfc_sli4_prep_dev_for_recover(struct lpfc_hba *phba)
 static void
 lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
 {
-       lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
-                       "2826 PCI channel disable preparing for reset\n");
+       int offline =  pci_channel_offline(phba->pcidev);
+
+       lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                       "2826 PCI channel disable preparing for reset offline"
+                       " %d\n", offline);
 
        /* Block any management I/Os to the device */
        lpfc_block_mgmt_io(phba, LPFC_MBX_NO_WAIT);
 
-       /* Block all SCSI devices' I/Os on the host */
-       lpfc_scsi_dev_block(phba);
 
+       /* HBA_PCI_ERR was set in io_error_detect */
+       lpfc_offline_prep(phba, LPFC_MBX_NO_WAIT);
        /* Flush all driver's outstanding I/Os as we are to reset */
        lpfc_sli_flush_io_rings(phba);
+       lpfc_offline(phba);
 
        /* stop all timers */
        lpfc_stop_hba_timers(phba);
 
+       lpfc_sli4_queue_destroy(phba);
        /* Disable interrupt and pci device */
        lpfc_sli4_disable_intr(phba);
-       lpfc_sli4_queue_destroy(phba);
        pci_disable_device(phba->pcidev);
 }
 
@@ -15123,6 +15129,7 @@ lpfc_io_error_detected_s4(struct pci_dev *pdev, pci_channel_state_t state)
 {
        struct Scsi_Host *shost = pci_get_drvdata(pdev);
        struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+       bool hba_pci_err;
 
        switch (state) {
        case pci_channel_io_normal:
@@ -15130,17 +15137,24 @@ lpfc_io_error_detected_s4(struct pci_dev *pdev, pci_channel_state_t state)
                lpfc_sli4_prep_dev_for_recover(phba);
                return PCI_ERS_RESULT_CAN_RECOVER;
        case pci_channel_io_frozen:
-               phba->hba_flag |= HBA_PCI_ERR;
+               hba_pci_err = test_and_set_bit(HBA_PCI_ERR, &phba->bit_flags);
                /* Fatal error, prepare for slot reset */
-               lpfc_sli4_prep_dev_for_reset(phba);
+               if (!hba_pci_err)
+                       lpfc_sli4_prep_dev_for_reset(phba);
+               else
+                       lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                                       "2832  Already handling PCI error "
+                                       "state: x%x\n", state);
                return PCI_ERS_RESULT_NEED_RESET;
        case pci_channel_io_perm_failure:
-               phba->hba_flag |= HBA_PCI_ERR;
+               set_bit(HBA_PCI_ERR, &phba->bit_flags);
                /* Permanent failure, prepare for device down */
                lpfc_sli4_prep_dev_for_perm_failure(phba);
                return PCI_ERS_RESULT_DISCONNECT;
        default:
-               phba->hba_flag |= HBA_PCI_ERR;
+               hba_pci_err = test_and_set_bit(HBA_PCI_ERR, &phba->bit_flags);
+               if (!hba_pci_err)
+                       lpfc_sli4_prep_dev_for_reset(phba);
                /* Unknown state, prepare and request slot reset */
                lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
                                "2825 Unknown PCI error state: x%x\n", state);
@@ -15174,17 +15188,21 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev)
        struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
        struct lpfc_sli *psli = &phba->sli;
        uint32_t intr_mode;
+       bool hba_pci_err;
 
        dev_printk(KERN_INFO, &pdev->dev, "recovering from a slot reset.\n");
        if (pci_enable_device_mem(pdev)) {
                printk(KERN_ERR "lpfc: Cannot re-enable "
-                       "PCI device after reset.\n");
+                      "PCI device after reset.\n");
                return PCI_ERS_RESULT_DISCONNECT;
        }
 
        pci_restore_state(pdev);
 
-       phba->hba_flag &= ~HBA_PCI_ERR;
+       hba_pci_err = test_and_clear_bit(HBA_PCI_ERR, &phba->bit_flags);
+       if (!hba_pci_err)
+               dev_info(&pdev->dev,
+                        "hba_pci_err was not set, recovering slot reset.\n");
        /*
         * As the new kernel behavior of pci_restore_state() API call clears
         * device saved_state flag, need to save the restored state again.
@@ -15239,8 +15257,6 @@ lpfc_io_resume_s4(struct pci_dev *pdev)
         */
        if (!(phba->sli.sli_flag & LPFC_SLI_ACTIVE)) {
                /* Perform device reset */
-               lpfc_offline_prep(phba, LPFC_MBX_WAIT);
-               lpfc_offline(phba);
                lpfc_sli_brdrestart(phba);
                /* Bring the device back online */
                lpfc_online(phba);
index 1213a299f9aae96efd3404cd0c4554b3dd236d6f..e47205e0d3e2ac960059bc8abbc02f6df4782e7f 100644 (file)
@@ -2169,8 +2169,7 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport,
                        abts_nvme = 0;
                        for (i = 0; i < phba->cfg_hdw_queue; i++) {
                                qp = &phba->sli4_hba.hdwq[i];
-                               if (!vport || !vport->localport ||
-                                   !qp || !qp->io_wq)
+                               if (!vport->localport || !qp || !qp->io_wq)
                                        return;
 
                                pring = qp->io_wq->pring;
@@ -2180,8 +2179,9 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport,
                                abts_scsi += qp->abts_scsi_io_bufs;
                                abts_nvme += qp->abts_nvme_io_bufs;
                        }
-                       if (!vport || !vport->localport ||
-                           vport->phba->hba_flag & HBA_PCI_ERR)
+                       if (!vport->localport ||
+                           test_bit(HBA_PCI_ERR, &vport->phba->bit_flags) ||
+                           vport->load_flag & FC_UNLOADING)
                                return;
 
                        lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
@@ -2541,8 +2541,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                 * return values is ignored.  The upcall is a courtesy to the
                 * transport.
                 */
-               if (vport->load_flag & FC_UNLOADING ||
-                   unlikely(vport->phba->hba_flag & HBA_PCI_ERR))
+               if (vport->load_flag & FC_UNLOADING)
                        (void)nvme_fc_set_remoteport_devloss(remoteport, 0);
 
                ret = nvme_fc_unregister_remoteport(remoteport);
index 20d40957a3853d9868e1c25038bcea4997b9ffc5..26f6a147b5ae8eb34cb560cdac14c6e15fa6cf36 100644 (file)
@@ -2828,6 +2828,12 @@ __lpfc_sli_rpi_release(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
        ndlp->nlp_flag &= ~NLP_UNREG_INP;
 }
 
+void
+lpfc_sli_rpi_release(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+{
+       __lpfc_sli_rpi_release(vport, ndlp);
+}
+
 /**
  * lpfc_sli_def_mbox_cmpl - Default mailbox completion handler
  * @phba: Pointer to HBA context object.
@@ -4624,11 +4630,6 @@ lpfc_sli_flush_io_rings(struct lpfc_hba *phba)
        struct lpfc_iocbq *piocb, *next_iocb;
 
        spin_lock_irq(&phba->hbalock);
-       if (phba->hba_flag & HBA_IOQ_FLUSH ||
-           !phba->sli4_hba.hdwq) {
-               spin_unlock_irq(&phba->hbalock);
-               return;
-       }
        /* Indicate the I/O queues are flushed */
        phba->hba_flag |= HBA_IOQ_FLUSH;
        spin_unlock_irq(&phba->hbalock);
@@ -10997,6 +10998,10 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
        unsigned long iflags;
        int rc;
 
+       /* If the PCI channel is in offline state, do not post iocbs. */
+       if (unlikely(pci_channel_offline(phba->pcidev)))
+               return IOCB_ERROR;
+
        if (phba->sli_rev == LPFC_SLI_REV4) {
                lpfc_sli_prep_wqe(phba, piocb);