btrfs: fix lock inversion problem when doing qgroup extent tracing

author Filipe Manana <fdmanana@suse.com>

Wed, 21 Jul 2021 16:31:48 +0000 (17:31 +0100)

committer David Sterba <dsterba@suse.com>

Thu, 22 Jul 2021 13:50:07 +0000 (15:50 +0200)
author Filipe Manana <fdmanana@suse.com>
Wed, 21 Jul 2021 16:31:48 +0000 (17:31 +0100)
committer David Sterba <dsterba@suse.com>
Thu, 22 Jul 2021 13:50:07 +0000 (15:50 +0200)
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c

index 7a8a2fc195338c21f76faf7215ea34ec5954fc2a..78b202d198b8e1741690a2a10fec1955cf221440 100644 (file)
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1488,15 +1488,15 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
  int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
                          struct btrfs_fs_info *fs_info, u64 bytenr,
                          u64 time_seq, struct ulist **roots,
-                        bool ignore_offset)
+                        bool ignore_offset, bool skip_commit_root_sem)
  {
         int ret;
  
-       if (!trans)
+       if (!trans && !skip_commit_root_sem)
                 down_read(&fs_info->commit_root_sem);
         ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr,
                                         time_seq, roots, ignore_offset);
-       if (!trans)
+       if (!trans && !skip_commit_root_sem)
                 up_read(&fs_info->commit_root_sem);
         return ret;
  }
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h

index 17abde7f794ce37d7f817decf34577fd90885cc9..ff5f07f9940bd9bb7ae99e69617f0a63715f2abb 100644 (file)
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -47,7 +47,8 @@ int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
                          const u64 *extent_item_pos, bool ignore_offset);
  int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
                          struct btrfs_fs_info *fs_info, u64 bytenr,
-                        u64 time_seq, struct ulist **roots, bool ignore_offset);
+                        u64 time_seq, struct ulist **roots, bool ignore_offset,
+                        bool skip_commit_root_sem);
  char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
                         u32 name_len, unsigned long name_off,
                         struct extent_buffer *eb_in, u64 parent,
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c

index 06bc842ecdb34ea55cccffd0f5318681bf74dd99..ca848b1834747c22604ebf37907ff9aec09f0977 100644 (file)
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -974,7 +974,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
                 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
  
         if (qrecord_inserted)
-               btrfs_qgroup_trace_extent_post(fs_info, record);
+               btrfs_qgroup_trace_extent_post(trans, record);
  
         return 0;
  }
@@ -1069,7 +1069,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
  
  
         if (qrecord_inserted)
-               return btrfs_qgroup_trace_extent_post(fs_info, record);
+               return btrfs_qgroup_trace_extent_post(trans, record);
         return 0;
  }
  
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c

index 07ec06d4e972635c2922d9727e395a8320772324..0fa121171ca17fc286538f5b67ece77beb696df5 100644 (file)
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1704,17 +1704,39 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
         return 0;
  }
  
-int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
+int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
                                    struct btrfs_qgroup_extent_record *qrecord)
  {
         struct ulist *old_root;
         u64 bytenr = qrecord->bytenr;
         int ret;
  
-       ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false);
+       /*
+        * We are always called in a context where we are already holding a
+        * transaction handle. Often we are called when adding a data delayed
+        * reference from btrfs_truncate_inode_items() (truncating or unlinking),
+        * in which case we will be holding a write lock on extent buffer from a
+        * subvolume tree. In this case we can't allow btrfs_find_all_roots() to
+        * acquire fs_info->commit_root_sem, because that is a higher level lock
+        * that must be acquired before locking any extent buffers.
+        *
+        * So we want btrfs_find_all_roots() to not acquire the commit_root_sem
+        * but we can't pass it a non-NULL transaction handle, because otherwise
+        * it would not use commit roots and would lock extent buffers, causing
+        * a deadlock if it ends up trying to read lock the same extent buffer
+        * that was previously write locked at btrfs_truncate_inode_items().
+        *
+        * So pass a NULL transaction handle to btrfs_find_all_roots() and
+        * explicitly tell it to not acquire the commit_root_sem - if we are
+        * holding a transaction handle we don't need its protection.
+        */
+       ASSERT(trans != NULL);
+
+       ret = btrfs_find_all_roots(NULL, trans->fs_info, bytenr, 0, &old_root,
+                                  false, true);
         if (ret < 0) {
-               fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
-               btrfs_warn(fs_info,
+               trans->fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+               btrfs_warn(trans->fs_info,
  "error accounting new delayed refs extent (err code: %d), quota inconsistent",
                         ret);
                 return 0;
@@ -1758,7 +1780,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
                 kfree(record);
                 return 0;
         }
-       return btrfs_qgroup_trace_extent_post(fs_info, record);
+       return btrfs_qgroup_trace_extent_post(trans, record);
  }
  
  int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
@@ -2629,7 +2651,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
                                 /* Search commit root to find old_roots */
                                 ret = btrfs_find_all_roots(NULL, fs_info,
                                                 record->bytenr, 0,
-                                               &record->old_roots, false);
+                                               &record->old_roots, false, false);
                                 if (ret < 0)
                                         goto cleanup;
                         }
@@ -2645,7 +2667,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
                          * current root. It's safe inside commit_transaction().
                          */
                         ret = btrfs_find_all_roots(trans, fs_info,
-                               record->bytenr, BTRFS_SEQ_LAST, &new_roots, false);
+                          record->bytenr, BTRFS_SEQ_LAST, &new_roots, false, false);
                         if (ret < 0)
                                 goto cleanup;
                         if (qgroup_to_skip) {
@@ -3179,7 +3201,7 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans,
                         num_bytes = found.offset;
  
                 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
-                                          &roots, false);
+                                          &roots, false, false);
                 if (ret < 0)
                         goto out;
                 /* For rescan, just pass old_roots as NULL */
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h

index 7283e4f549af74269adef7c1617c086ee2ac0f9b..880e9df0dac1d74d5905de08c13bfe5e7339351f 100644 (file)
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -298,7 +298,7 @@ int btrfs_qgroup_trace_extent_nolock(
   * using current root, then we can move all expensive backref walk out of
   * transaction committing, but not now as qgroup accounting will be wrong again.
   */
-int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
+int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
                                    struct btrfs_qgroup_extent_record *qrecord);
  
  /*
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c

index f3137285a9e2df92523453d5be9f081c7e115575..98b5aaba46f165c203320080765ca8ff0846e7a3 100644 (file)
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -224,7 +224,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
          * quota.
          */
         ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
-                       false);
+                       false, false);
         if (ret) {
                 ulist_free(old_roots);
                 test_err("couldn't find old roots: %d", ret);
@@ -237,7 +237,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
                 return ret;
  
         ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
-                       false);
+                       false, false);
         if (ret) {
                 ulist_free(old_roots);
                 ulist_free(new_roots);
@@ -261,7 +261,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
         new_roots = NULL;
  
         ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
-                       false);
+                       false, false);
         if (ret) {
                 ulist_free(old_roots);
                 test_err("couldn't find old roots: %d", ret);
@@ -273,7 +273,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
                 return -EINVAL;
  
         ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
-                       false);
+                       false, false);
         if (ret) {
                 ulist_free(old_roots);
                 ulist_free(new_roots);
@@ -325,7 +325,7 @@ static int test_multiple_refs(struct btrfs_root *root,
         }
  
         ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
-                       false);
+                       false, false);
         if (ret) {
                 ulist_free(old_roots);
                 test_err("couldn't find old roots: %d", ret);
@@ -338,7 +338,7 @@ static int test_multiple_refs(struct btrfs_root *root,
                 return ret;
  
         ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
-                       false);
+                       false, false);
         if (ret) {
                 ulist_free(old_roots);
                 ulist_free(new_roots);
@@ -360,7 +360,7 @@ static int test_multiple_refs(struct btrfs_root *root,
         }
  
         ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
-                       false);
+                       false, false);
         if (ret) {
                 ulist_free(old_roots);
                 test_err("couldn't find old roots: %d", ret);
@@ -373,7 +373,7 @@ static int test_multiple_refs(struct btrfs_root *root,
                 return ret;
  
         ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
-                       false);
+                       false, false);
         if (ret) {
                 ulist_free(old_roots);
                 ulist_free(new_roots);
@@ -401,7 +401,7 @@ static int test_multiple_refs(struct btrfs_root *root,
         }
  
         ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
-                       false);
+                       false, false);
         if (ret) {
                 ulist_free(old_roots);
                 test_err("couldn't find old roots: %d", ret);
@@ -414,7 +414,7 @@ static int test_multiple_refs(struct btrfs_root *root,
                 return ret;
  
         ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
-                       false);
+                       false, false);
         if (ret) {
                 ulist_free(old_roots);
                 ulist_free(new_roots);
author	Filipe Manana <fdmanana@suse.com>
	Wed, 21 Jul 2021 16:31:48 +0000 (17:31 +0100)
committer	David Sterba <dsterba@suse.com>
	Thu, 22 Jul 2021 13:50:07 +0000 (15:50 +0200)
fs/btrfs/backref.c		patch \| blob \| history
fs/btrfs/backref.h		patch \| blob \| history
fs/btrfs/delayed-ref.c		patch \| blob \| history
fs/btrfs/qgroup.c		patch \| blob \| history
fs/btrfs/qgroup.h		patch \| blob \| history
fs/btrfs/tests/qgroup-tests.c		patch \| blob \| history