NFS/pnfs: Bulk destroy of layouts needs to be safe w.r.t. umount
authorTrond Myklebust <trond.myklebust@hammerspace.com>
Fri, 22 Feb 2019 19:20:27 +0000 (14:20 -0500)
committerTrond Myklebust <trond.myklebust@hammerspace.com>
Sat, 23 Feb 2019 18:59:29 +0000 (13:59 -0500)
If a bulk layout recall or a metadata server reboot coincides with a
umount, then holding a reference to an inode is unsafe unless we
also hold a reference to the super block.

Fixes: ae05458216e26 ("NFSv4.1: Fix bulk recall and destroy of layouts")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
fs/nfs/pnfs.c
fs/nfs/pnfs.h

index 53726da5c01008bb321cf600115da8290008b375..8247bd1634cb8c80bee4e4658a2e9754a0ebbf8b 100644 (file)
@@ -758,22 +758,35 @@ static int
 pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
                struct nfs_server *server,
                struct list_head *layout_list)
+       __must_hold(&clp->cl_lock)
+       __must_hold(RCU)
 {
        struct pnfs_layout_hdr *lo, *next;
        struct inode *inode;
 
        list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) {
-               if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags))
+               if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) ||
+                   test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) ||
+                   !list_empty(&lo->plh_bulk_destroy))
                        continue;
+               /* If the sb is being destroyed, just bail */
+               if (!nfs_sb_active(server->super))
+                       break;
                inode = igrab(lo->plh_inode);
-               if (inode == NULL)
-                       continue;
-               list_del_init(&lo->plh_layouts);
-               if (pnfs_layout_add_bulk_destroy_list(inode, layout_list))
-                       continue;
-               rcu_read_unlock();
-               spin_unlock(&clp->cl_lock);
-               iput(inode);
+               if (inode != NULL) {
+                       list_del_init(&lo->plh_layouts);
+                       if (pnfs_layout_add_bulk_destroy_list(inode,
+                                               layout_list))
+                               continue;
+                       rcu_read_unlock();
+                       spin_unlock(&clp->cl_lock);
+                       iput(inode);
+               } else {
+                       rcu_read_unlock();
+                       spin_unlock(&clp->cl_lock);
+                       set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags);
+               }
+               nfs_sb_deactive(server->super);
                spin_lock(&clp->cl_lock);
                rcu_read_lock();
                return -EAGAIN;
@@ -811,7 +824,7 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
                /* Free all lsegs that are attached to commit buckets */
                nfs_commit_inode(inode, 0);
                pnfs_put_layout_hdr(lo);
-               iput(inode);
+               nfs_iput_and_deactive(inode);
        }
        return ret;
 }
index 5e80a07b7beac1d2c21177980c17164fd5d735d5..56659ccce1d8995ff5e100d23af387a4fe9f668e 100644 (file)
@@ -104,6 +104,7 @@ enum {
        NFS_LAYOUT_RETURN_REQUESTED,    /* Return this layout ASAP */
        NFS_LAYOUT_INVALID_STID,        /* layout stateid id is invalid */
        NFS_LAYOUT_FIRST_LAYOUTGET,     /* Serialize first layoutget */
+       NFS_LAYOUT_INODE_FREEING,       /* The inode is being freed */
 };
 
 enum layoutdriver_policy_flags {