btrfs: do not pin logs too early during renames
[platform/kernel/linux-rpi.git] / fs / btrfs / inode.c
index 3ed88f2..bfa3991 100644 (file)
@@ -9350,8 +9350,6 @@ static int btrfs_rename_exchange(struct inode *old_dir,
                /* force full log commit if subvolume involved. */
                btrfs_set_log_full_commit(trans);
        } else {
-               btrfs_pin_log_trans(root);
-               root_log_pinned = true;
                ret = btrfs_insert_inode_ref(trans, dest,
                                             new_dentry->d_name.name,
                                             new_dentry->d_name.len,
@@ -9368,8 +9366,6 @@ static int btrfs_rename_exchange(struct inode *old_dir,
                /* force full log commit if subvolume involved. */
                btrfs_set_log_full_commit(trans);
        } else {
-               btrfs_pin_log_trans(dest);
-               dest_log_pinned = true;
                ret = btrfs_insert_inode_ref(trans, root,
                                             old_dentry->d_name.name,
                                             old_dentry->d_name.len,
@@ -9400,6 +9396,29 @@ static int btrfs_rename_exchange(struct inode *old_dir,
                                BTRFS_I(new_inode), 1);
        }
 
+       /*
+        * Now pin the logs of the roots. We do it to ensure that no other task
+        * can sync the logs while we are in progress with the rename, because
+        * that could result in an inconsistency in case any of the inodes that
+        * are part of this rename operation were logged before.
+        *
+        * We pin the logs even if at this precise moment none of the inodes was
+        * logged before. This is because right after we checked for that, some
+        * other task fsyncing some other inode not involved with this rename
+        * operation could log that one of our inodes exists.
+        *
+        * We don't need to pin the logs before the above calls to
+        * btrfs_insert_inode_ref(), since those don't ever need to change a log.
+        */
+       if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
+               btrfs_pin_log_trans(root);
+               root_log_pinned = true;
+       }
+       if (new_ino != BTRFS_FIRST_FREE_OBJECTID) {
+               btrfs_pin_log_trans(dest);
+               dest_log_pinned = true;
+       }
+
        /* src is a subvolume */
        if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
                ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
@@ -9652,8 +9671,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                /* force full log commit if subvolume involved. */
                btrfs_set_log_full_commit(trans);
        } else {
-               btrfs_pin_log_trans(root);
-               log_pinned = true;
                ret = btrfs_insert_inode_ref(trans, dest,
                                             new_dentry->d_name.name,
                                             new_dentry->d_name.len,
@@ -9677,6 +9694,25 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
                ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
        } else {
+               /*
+                * Now pin the log. We do it to ensure that no other task can
+                * sync the log while we are in progress with the rename, as
+                * that could result in an inconsistency in case any of the
+                * inodes that are part of this rename operation were logged
+                * before.
+                *
+                * We pin the log even if at this precise moment none of the
+                * inodes was logged before. This is because right after we
+                * checked for that, some other task fsyncing some other inode
+                * not involved with this rename operation could log that one of
+                * our inodes exists.
+                *
+                * We don't need to pin the logs before the above call to
+                * btrfs_insert_inode_ref(), since that does not need to change
+                * a log.
+                */
+               btrfs_pin_log_trans(root);
+               log_pinned = true;
                ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
                                        BTRFS_I(d_inode(old_dentry)),
                                        old_dentry->d_name.name,