pNFS: Fix a hang in nfs4_evict_inode()
authorTrond Myklebust <trond.myklebust@hammerspace.com>
Sun, 8 Oct 2023 18:20:19 +0000 (14:20 -0400)
committerAnna Schumaker <Anna.Schumaker@Netapp.com>
Wed, 18 Oct 2023 18:41:52 +0000 (14:41 -0400)
We are not allowed to call pnfs_mark_matching_lsegs_return() without
also holding a reference to the layout header, since doing so could lead
to the reference count going to zero when we call
pnfs_layout_remove_lseg(). This again can lead to a hang when we get to
nfs4_evict_inode() and are unable to clear the layout pointer.

pnfs_layout_return_unused_byserver() is guilty of this behaviour, and
has been seen to trigger the refcount warning prior to a hang.

Fixes: b6d49ecd1081 ("NFSv4: Fix a pNFS layout related use-after-free race when freeing the inode")
Cc: stable@vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
fs/nfs/pnfs.c

index 306cba0..84343ae 100644 (file)
@@ -2634,31 +2634,44 @@ pnfs_should_return_unused_layout(struct pnfs_layout_hdr *lo,
        return mode == 0;
 }
 
-static int
-pnfs_layout_return_unused_byserver(struct nfs_server *server, void *data)
+static int pnfs_layout_return_unused_byserver(struct nfs_server *server,
+                                             void *data)
 {
        const struct pnfs_layout_range *range = data;
+       const struct cred *cred;
        struct pnfs_layout_hdr *lo;
        struct inode *inode;
+       nfs4_stateid stateid;
+       enum pnfs_iomode iomode;
+
 restart:
        rcu_read_lock();
        list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
-               if (!pnfs_layout_can_be_returned(lo) ||
+               inode = lo->plh_inode;
+               if (!inode || !pnfs_layout_can_be_returned(lo) ||
                    test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
                        continue;
-               inode = lo->plh_inode;
                spin_lock(&inode->i_lock);
-               if (!pnfs_should_return_unused_layout(lo, range)) {
+               if (!lo->plh_inode ||
+                   !pnfs_should_return_unused_layout(lo, range)) {
                        spin_unlock(&inode->i_lock);
                        continue;
                }
+               pnfs_get_layout_hdr(lo);
+               pnfs_set_plh_return_info(lo, range->iomode, 0);
+               if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
+                                                   range, 0) != 0 ||
+                   !pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode)) {
+                       spin_unlock(&inode->i_lock);
+                       rcu_read_unlock();
+                       pnfs_put_layout_hdr(lo);
+                       cond_resched();
+                       goto restart;
+               }
                spin_unlock(&inode->i_lock);
-               inode = pnfs_grab_inode_layout_hdr(lo);
-               if (!inode)
-                       continue;
                rcu_read_unlock();
-               pnfs_mark_layout_for_return(inode, range);
-               iput(inode);
+               pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
+               pnfs_put_layout_hdr(lo);
                cond_resched();
                goto restart;
        }