NFS: don't unhash dentry during unlink/rename

author NeilBrown <neilb@suse.de>

Mon, 1 Aug 2022 00:33:34 +0000 (10:33 +1000)

committer Trond Myklebust <trond.myklebust@hammerspace.com>

Mon, 8 Aug 2022 20:25:56 +0000 (16:25 -0400)
author NeilBrown <neilb@suse.de>
Mon, 1 Aug 2022 00:33:34 +0000 (10:33 +1000)
committer Trond Myklebust <trond.myklebust@hammerspace.com>
Mon, 8 Aug 2022 20:25:56 +0000 (16:25 -0400)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c

index 7b22302..dbab3ca 100644 (file)
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1782,6 +1782,8 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
         int ret;
  
         if (flags & LOOKUP_RCU) {
         int ret;
  
         if (flags & LOOKUP_RCU) {
+               if (dentry->d_fsdata == NFS_FSDATA_BLOCKED)
+                       return -ECHILD;
                 parent = READ_ONCE(dentry->d_parent);
                 dir = d_inode_rcu(parent);
                 if (!dir)
                 parent = READ_ONCE(dentry->d_parent);
                 dir = d_inode_rcu(parent);
                 if (!dir)
@@ -1790,6 +1792,9 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
                 if (parent != READ_ONCE(dentry->d_parent))
                         return -ECHILD;
         } else {
                 if (parent != READ_ONCE(dentry->d_parent))
                         return -ECHILD;
         } else {
+               /* Wait for unlink to complete */
+               wait_var_event(&dentry->d_fsdata,
+                              dentry->d_fsdata != NFS_FSDATA_BLOCKED);
                 parent = dget_parent(dentry);
                 ret = reval(d_inode(parent), dentry, flags);
                 dput(parent);
                 parent = dget_parent(dentry);
                 ret = reval(d_inode(parent), dentry, flags);
                 dput(parent);
@@ -2458,7 +2463,6 @@ out:
  int nfs_unlink(struct inode *dir, struct dentry *dentry)
  {
         int error;
  int nfs_unlink(struct inode *dir, struct dentry *dentry)
  {
         int error;
-       int need_rehash = 0;
  
         dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id,
                 dir->i_ino, dentry);
  
         dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id,
                 dir->i_ino, dentry);
@@ -2473,15 +2477,25 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
                 error = nfs_sillyrename(dir, dentry);
                 goto out;
         }
                 error = nfs_sillyrename(dir, dentry);
                 goto out;
         }
-       if (!d_unhashed(dentry)) {
-               __d_drop(dentry);
-               need_rehash = 1;
-       }
+       /* We must prevent any concurrent open until the unlink
+        * completes.  ->d_revalidate will wait for ->d_fsdata
+        * to clear.  We set it here to ensure no lookup succeeds until
+        * the unlink is complete on the server.
+        */
+       error = -ETXTBSY;
+       if (WARN_ON(dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
+           WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED))
+               goto out;
+       if (dentry->d_fsdata)
+               /* old devname */
+               kfree(dentry->d_fsdata);
+       dentry->d_fsdata = NFS_FSDATA_BLOCKED;
+
         spin_unlock(&dentry->d_lock);
         error = nfs_safe_remove(dentry);
         nfs_dentry_remove_handle_error(dir, dentry, error);
         spin_unlock(&dentry->d_lock);
         error = nfs_safe_remove(dentry);
         nfs_dentry_remove_handle_error(dir, dentry, error);
-       if (need_rehash)
-               d_rehash(dentry);
+       dentry->d_fsdata = NULL;
+       wake_up_var(&dentry->d_fsdata);
  out:
         trace_nfs_unlink_exit(dir, dentry, error);
         return error;
  out:
         trace_nfs_unlink_exit(dir, dentry, error);
         return error;
@@ -2588,6 +2602,15 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
  }
  EXPORT_SYMBOL_GPL(nfs_link);
  
  }
  EXPORT_SYMBOL_GPL(nfs_link);
  
+static void
+nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
+{
+       struct dentry *new_dentry = data->new_dentry;
+
+       new_dentry->d_fsdata = NULL;
+       wake_up_var(&new_dentry->d_fsdata);
+}
+
  /*
   * RENAME
   * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
  /*
   * RENAME
   * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
@@ -2618,8 +2641,9 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
  {
         struct inode *old_inode = d_inode(old_dentry);
         struct inode *new_inode = d_inode(new_dentry);
  {
         struct inode *old_inode = d_inode(old_dentry);
         struct inode *new_inode = d_inode(new_dentry);
-       struct dentry *dentry = NULL, *rehash = NULL;
+       struct dentry *dentry = NULL;
         struct rpc_task *task;
         struct rpc_task *task;
+       bool must_unblock = false;
         int error = -EBUSY;
  
         if (flags)
         int error = -EBUSY;
  
         if (flags)
@@ -2637,18 +2661,27 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
          * the new target.
          */
         if (new_inode && !S_ISDIR(new_inode->i_mode)) {
          * the new target.
          */
         if (new_inode && !S_ISDIR(new_inode->i_mode)) {
-               /*
-                * To prevent any new references to the target during the
-                * rename, we unhash the dentry in advance.
+               /* We must prevent any concurrent open until the unlink
+                * completes.  ->d_revalidate will wait for ->d_fsdata
+                * to clear.  We set it here to ensure no lookup succeeds until
+                * the unlink is complete on the server.
                  */
                  */
-               if (!d_unhashed(new_dentry)) {
-                       d_drop(new_dentry);
-                       rehash = new_dentry;
+               error = -ETXTBSY;
+               if (WARN_ON(new_dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
+                   WARN_ON(new_dentry->d_fsdata == NFS_FSDATA_BLOCKED))
+                       goto out;
+               if (new_dentry->d_fsdata) {
+                       /* old devname */
+                       kfree(new_dentry->d_fsdata);
+                       new_dentry->d_fsdata = NULL;
                 }
  
                 }
  
+               spin_lock(&new_dentry->d_lock);
                 if (d_count(new_dentry) > 2) {
                         int err;
  
                 if (d_count(new_dentry) > 2) {
                         int err;
  
+                       spin_unlock(&new_dentry->d_lock);
+
                         /* copy the target dentry's name */
                         dentry = d_alloc(new_dentry->d_parent,
                                          &new_dentry->d_name);
                         /* copy the target dentry's name */
                         dentry = d_alloc(new_dentry->d_parent,
                                          &new_dentry->d_name);
@@ -2661,14 +2694,19 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
                                 goto out;
  
                         new_dentry = dentry;
                                 goto out;
  
                         new_dentry = dentry;
-                       rehash = NULL;
                         new_inode = NULL;
                         new_inode = NULL;
+               } else {
+                       new_dentry->d_fsdata = NFS_FSDATA_BLOCKED;
+                       must_unblock = true;
+                       spin_unlock(&new_dentry->d_lock);
                 }
                 }
+
         }
  
         if (S_ISREG(old_inode->i_mode))
                 nfs_sync_inode(old_inode);
         }
  
         if (S_ISREG(old_inode->i_mode))
                 nfs_sync_inode(old_inode);
-       task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
+       task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
+                               must_unblock ? nfs_unblock_rename : NULL);
         if (IS_ERR(task)) {
                 error = PTR_ERR(task);
                 goto out;
         if (IS_ERR(task)) {
                 error = PTR_ERR(task);
                 goto out;
@@ -2692,8 +2730,6 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
                 spin_unlock(&old_inode->i_lock);
         }
  out:
                 spin_unlock(&old_inode->i_lock);
         }
  out:
-       if (rehash)
-               d_rehash(rehash);
         trace_nfs_rename_exit(old_dir, old_dentry,
                         new_dir, new_dentry, error);
         if (!error) {
         trace_nfs_rename_exit(old_dir, old_dentry,
                         new_dir, new_dentry, error);
         if (!error) {
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h

index a17c337..b32ed68 100644 (file)
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -617,6 +617,15 @@ nfs_fileid_to_ino_t(u64 fileid)
  
  #define NFS_JUKEBOX_RETRY_TIME (5 * HZ)
  
  
  #define NFS_JUKEBOX_RETRY_TIME (5 * HZ)
  
+/* We need to block new opens while a file is being unlinked.
+ * If it is opened *before* we decide to unlink, we will silly-rename
+ * instead. If it is opened *after*, then we need to create or will fail.
+ * If we allow the two to race, we could end up with a file that is open
+ * but deleted on the server resulting in ESTALE.
+ * So use ->d_fsdata to record when the unlink is happening
+ * and block dentry revalidation while it is set.
+ */
+#define NFS_FSDATA_BLOCKED ((void*)1)
  
  # undef ifdebug
  # ifdef NFS_DEBUG
  
  # undef ifdebug
  # ifdef NFS_DEBUG
author	NeilBrown <neilb@suse.de>
	Mon, 1 Aug 2022 00:33:34 +0000 (10:33 +1000)
committer	Trond Myklebust <trond.myklebust@hammerspace.com>
	Mon, 8 Aug 2022 20:25:56 +0000 (16:25 -0400)
fs/nfs/dir.c		patch \| blob \| history
include/linux/nfs_fs.h		patch \| blob \| history