]> git.baikalelectronics.ru Git - kernel.git/commitdiff
NFS: Fix flexfiles read failover
authorTrond Myklebust <trond.myklebust@hammerspace.com>
Tue, 11 Aug 2020 17:36:32 +0000 (13:36 -0400)
committerTrond Myklebust <trond.myklebust@hammerspace.com>
Wed, 12 Aug 2020 15:20:29 +0000 (11:20 -0400)
The current mirrored read failover code is correctly resetting the mirror
index between failed reads, however it is not able to actually flip the
RPC call over to the next RPC client.
The end result is that we keep resending the RPC call to the same client
over and over.

The fix is to use the pnfs_read_resend_pnfs() mechanism to schedule a
new RPC call, but we need to add the ability to pass in a mirror
index so that we always retry the next mirror in the list.

Fixes: 5953fe82ca88 ("pNFS/flexfiles: Fix layoutstats handling during read failovers")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
fs/nfs/flexfilelayout/flexfilelayout.c
fs/nfs/pnfs.c
fs/nfs/pnfs.h

index b26173d7273587351f7d2267c66504378c619c85..965145592750afe3312e9569c666233e386dbda4 100644 (file)
@@ -790,6 +790,19 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
        return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx);
 }
 
+static struct nfs4_pnfs_ds *
+ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, int *best_idx)
+{
+       struct pnfs_layout_segment *lseg = pgio->pg_lseg;
+       struct nfs4_pnfs_ds *ds;
+
+       ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx,
+                                              best_idx);
+       if (ds || !pgio->pg_mirror_idx)
+               return ds;
+       return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx);
+}
+
 static void
 ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
                      struct nfs_page *req,
@@ -840,7 +853,7 @@ retry:
                        goto out_nolseg;
        }
 
-       ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx);
+       ds = ff_layout_get_ds_for_read(pgio, &ds_idx);
        if (!ds) {
                if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
                        goto out_mds;
@@ -1022,11 +1035,24 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
        }
 }
 
+static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr)
+{
+       u32 idx = hdr->pgio_mirror_idx + 1;
+       int new_idx = 0;
+
+       if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx + 1, &new_idx))
+               ff_layout_send_layouterror(hdr->lseg);
+       else
+               pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg);
+       pnfs_read_resend_pnfs(hdr, new_idx);
+}
+
 static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
 {
        struct rpc_task *task = &hdr->task;
 
        pnfs_layoutcommit_inode(hdr->inode, false);
+       pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg);
 
        if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
                dprintk("%s Reset task %5u for i/o through MDS "
@@ -1228,6 +1254,12 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
                break;
        case NFS4ERR_NXIO:
                ff_layout_mark_ds_unreachable(lseg, idx);
+               /*
+                * Don't return the layout if this is a read and we still
+                * have layouts to try
+                */
+               if (opnum == OP_READ)
+                       break;
                /* Fallthrough */
        default:
                pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode,
@@ -1241,7 +1273,6 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
 static int ff_layout_read_done_cb(struct rpc_task *task,
                                struct nfs_pgio_header *hdr)
 {
-       int new_idx = hdr->pgio_mirror_idx;
        int err;
 
        if (task->tk_status < 0) {
@@ -1261,10 +1292,6 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
        clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
        switch (err) {
        case -NFS4ERR_RESET_TO_PNFS:
-               if (ff_layout_choose_best_ds_for_read(hdr->lseg,
-                                       hdr->pgio_mirror_idx + 1,
-                                       &new_idx))
-                       goto out_layouterror;
                set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
                return task->tk_status;
        case -NFS4ERR_RESET_TO_MDS:
@@ -1275,10 +1302,6 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
        }
 
        return 0;
-out_layouterror:
-       ff_layout_read_record_layoutstats_done(task, hdr);
-       ff_layout_send_layouterror(hdr->lseg);
-       hdr->pgio_mirror_idx = new_idx;
 out_eagain:
        rpc_restart_call_prepare(task);
        return -EAGAIN;
@@ -1405,10 +1428,9 @@ static void ff_layout_read_release(void *data)
        struct nfs_pgio_header *hdr = data;
 
        ff_layout_read_record_layoutstats_done(&hdr->task, hdr);
-       if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) {
-               ff_layout_send_layouterror(hdr->lseg);
-               pnfs_read_resend_pnfs(hdr);
-       } else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
+       if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags))
+               ff_layout_resend_pnfs_read(hdr);
+       else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
                ff_layout_reset_read(hdr);
        pnfs_generic_rw_release(data);
 }
index b5baf36d4de507e1fc8fa316abf2629036f74eb6..40332c758d8466392fdf264d9c0ae8954ec2941e 100644 (file)
@@ -2939,7 +2939,8 @@ pnfs_try_to_read_data(struct nfs_pgio_header *hdr,
 }
 
 /* Resend all requests through pnfs. */
-void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr)
+void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr,
+                          unsigned int mirror_idx)
 {
        struct nfs_pageio_descriptor pgio;
 
@@ -2950,6 +2951,7 @@ void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr)
 
                nfs_pageio_init_read(&pgio, hdr->inode, false,
                                        hdr->completion_ops);
+               pgio.pg_mirror_idx = mirror_idx;
                hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr);
        }
 }
index 8e0ada581b92e3ef3f8e84d3c7165a4c177d7b1c..2661c44c62db402fbbd4c1832ba533f2f187f433 100644 (file)
@@ -311,7 +311,7 @@ int _pnfs_return_layout(struct inode *);
 int pnfs_commit_and_return_layout(struct inode *);
 void pnfs_ld_write_done(struct nfs_pgio_header *);
 void pnfs_ld_read_done(struct nfs_pgio_header *);
-void pnfs_read_resend_pnfs(struct nfs_pgio_header *);
+void pnfs_read_resend_pnfs(struct nfs_pgio_header *, unsigned int mirror_idx);
 struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
                                               struct nfs_open_context *ctx,
                                               loff_t pos,