Merge tag 'for-6.5-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 20 Jul 2023 15:11:30 +0000 (08:11 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 20 Jul 2023 15:11:30 +0000 (08:11 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 20 Jul 2023 15:11:30 +0000 (08:11 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 20 Jul 2023 15:11:30 +0000 (08:11 -0700)
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c

index 48ae509..2372615 100644 (file)
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1640,13 +1640,14 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
  {
         struct btrfs_fs_info *fs_info = bg->fs_info;
  
-       trace_btrfs_add_unused_block_group(bg);
         spin_lock(&fs_info->unused_bgs_lock);
         if (list_empty(&bg->bg_list)) {
                 btrfs_get_block_group(bg);
+               trace_btrfs_add_unused_block_group(bg);
                 list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
-       } else {
+       } else if (!test_bit(BLOCK_GROUP_FLAG_NEW, &bg->runtime_flags)) {
                 /* Pull out the block group from the reclaim_bgs list. */
+               trace_btrfs_add_unused_block_group(bg);
                 list_move_tail(&bg->bg_list, &fs_info->unused_bgs);
         }
         spin_unlock(&fs_info->unused_bgs_lock);
@@ -2087,6 +2088,7 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
  
                 /* Shouldn't have super stripes in sequential zones */
                 if (zoned && nr) {
+                       kfree(logical);
                         btrfs_err(fs_info,
                         "zoned: block group %llu must not contain super block",
                                   cache->start);
@@ -2668,6 +2670,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
  next:
                 btrfs_delayed_refs_rsv_release(fs_info, 1);
                 list_del_init(&block_group->bg_list);
+               clear_bit(BLOCK_GROUP_FLAG_NEW, &block_group->runtime_flags);
         }
         btrfs_trans_release_chunk_metadata(trans);
  }
@@ -2707,6 +2710,13 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
         if (!cache)
                 return ERR_PTR(-ENOMEM);
  
+       /*
+        * Mark it as new before adding it to the rbtree of block groups or any
+        * list, so that no other task finds it and calls btrfs_mark_bg_unused()
+        * before the new flag is set.
+        */
+       set_bit(BLOCK_GROUP_FLAG_NEW, &cache->runtime_flags);
+
         cache->length = size;
         set_free_space_tree_thresholds(cache);
         cache->flags = type;
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h

index f204add..381c54a 100644 (file)
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -70,6 +70,11 @@ enum btrfs_block_group_flags {
         BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
         /* Indicate that the block group is placed on a sequential zone */
         BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE,
+       /*
+        * Indicate that block group is in the list of new block groups of a
+        * transaction.
+        */
+       BLOCK_GROUP_FLAG_NEW,
  };
  
  enum btrfs_caching_type {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index dbbb672..49cef61 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3482,15 +3482,21 @@ zeroit:
  void btrfs_add_delayed_iput(struct btrfs_inode *inode)
  {
         struct btrfs_fs_info *fs_info = inode->root->fs_info;
+       unsigned long flags;
  
         if (atomic_add_unless(&inode->vfs_inode.i_count, -1, 1))
                 return;
  
         atomic_inc(&fs_info->nr_delayed_iputs);
-       spin_lock(&fs_info->delayed_iput_lock);
+       /*
+        * Need to be irq safe here because we can be called from either an irq
+        * context (see bio.c and btrfs_put_ordered_extent()) or a non-irq
+        * context.
+        */
+       spin_lock_irqsave(&fs_info->delayed_iput_lock, flags);
         ASSERT(list_empty(&inode->delayed_iput));
         list_add_tail(&inode->delayed_iput, &fs_info->delayed_iputs);
-       spin_unlock(&fs_info->delayed_iput_lock);
+       spin_unlock_irqrestore(&fs_info->delayed_iput_lock, flags);
         if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
                 wake_up_process(fs_info->cleaner_kthread);
  }
@@ -3499,37 +3505,46 @@ static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info,
                                     struct btrfs_inode *inode)
  {
         list_del_init(&inode->delayed_iput);
-       spin_unlock(&fs_info->delayed_iput_lock);
+       spin_unlock_irq(&fs_info->delayed_iput_lock);
         iput(&inode->vfs_inode);
         if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
                 wake_up(&fs_info->delayed_iputs_wait);
-       spin_lock(&fs_info->delayed_iput_lock);
+       spin_lock_irq(&fs_info->delayed_iput_lock);
  }
  
  static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info,
                                    struct btrfs_inode *inode)
  {
         if (!list_empty(&inode->delayed_iput)) {
-               spin_lock(&fs_info->delayed_iput_lock);
+               spin_lock_irq(&fs_info->delayed_iput_lock);
                 if (!list_empty(&inode->delayed_iput))
                         run_delayed_iput_locked(fs_info, inode);
-               spin_unlock(&fs_info->delayed_iput_lock);
+               spin_unlock_irq(&fs_info->delayed_iput_lock);
         }
  }
  
  void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
  {
-
-       spin_lock(&fs_info->delayed_iput_lock);
+       /*
+        * btrfs_put_ordered_extent() can run in irq context (see bio.c), which
+        * calls btrfs_add_delayed_iput() and that needs to lock
+        * fs_info->delayed_iput_lock. So we need to disable irqs here to
+        * prevent a deadlock.
+        */
+       spin_lock_irq(&fs_info->delayed_iput_lock);
         while (!list_empty(&fs_info->delayed_iputs)) {
                 struct btrfs_inode *inode;
  
                 inode = list_first_entry(&fs_info->delayed_iputs,
                                 struct btrfs_inode, delayed_iput);
                 run_delayed_iput_locked(fs_info, inode);
-               cond_resched_lock(&fs_info->delayed_iput_lock);
+               if (need_resched()) {
+                       spin_unlock_irq(&fs_info->delayed_iput_lock);
+                       cond_resched();
+                       spin_lock_irq(&fs_info->delayed_iput_lock);
+               }
         }
-       spin_unlock(&fs_info->delayed_iput_lock);
+       spin_unlock_irq(&fs_info->delayed_iput_lock);
  }
  
  /*
@@ -3659,11 +3674,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                 found_key.type = BTRFS_INODE_ITEM_KEY;
                 found_key.offset = 0;
                 inode = btrfs_iget(fs_info->sb, last_objectid, root);
-               ret = PTR_ERR_OR_ZERO(inode);
-               if (ret && ret != -ENOENT)
-                       goto out;
+               if (IS_ERR(inode)) {
+                       ret = PTR_ERR(inode);
+                       inode = NULL;
+                       if (ret != -ENOENT)
+                               goto out;
+               }
  
-               if (ret == -ENOENT && root == fs_info->tree_root) {
+               if (!inode && root == fs_info->tree_root) {
                         struct btrfs_root *dead_root;
                         int is_dead_root = 0;
  
@@ -3724,17 +3742,17 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                  * deleted but wasn't. The inode number may have been reused,
                  * but either way, we can delete the orphan item.
                  */
-               if (ret == -ENOENT || inode->i_nlink) {
-                       if (!ret) {
+               if (!inode || inode->i_nlink) {
+                       if (inode) {
                                 ret = btrfs_drop_verity_items(BTRFS_I(inode));
                                 iput(inode);
+                               inode = NULL;
                                 if (ret)
                                         goto out;
                         }
                         trans = btrfs_start_transaction(root, 1);
                         if (IS_ERR(trans)) {
                                 ret = PTR_ERR(trans);
-                               iput(inode);
                                 goto out;
                         }
                         btrfs_debug(fs_info, "auto deleting %Lu",
@@ -3742,10 +3760,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                         ret = btrfs_del_orphan_item(trans, root,
                                                     found_key.objectid);
                         btrfs_end_transaction(trans);
-                       if (ret) {
-                               iput(inode);
+                       if (ret)
                                 goto out;
-                       }
                         continue;
                 }
  
@@ -4847,9 +4863,6 @@ again:
                 ret = -ENOMEM;
                 goto out;
         }
-       ret = set_page_extent_mapped(page);
-       if (ret < 0)
-               goto out_unlock;
  
         if (!PageUptodate(page)) {
                 ret = btrfs_read_folio(NULL, page_folio(page));
@@ -4864,6 +4877,17 @@ again:
                         goto out_unlock;
                 }
         }
+
+       /*
+        * We unlock the page after the io is completed and then re-lock it
+        * above.  release_folio() could have come in between that and cleared
+        * PagePrivate(), but left the page in the mapping.  Set the page mapped
+        * here to make sure it's properly set for the subpage stuff.
+        */
+       ret = set_page_extent_mapped(page);
+       if (ret < 0)
+               goto out_unlock;
+
         wait_on_page_writeback(page);
  
         lock_extent(io_tree, block_start, block_end, &cached_state);
@@ -7849,8 +7873,11 @@ static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio,
  
                 ret = btrfs_extract_ordered_extent(bbio, dio_data->ordered);
                 if (ret) {
-                       bbio->bio.bi_status = errno_to_blk_status(ret);
-                       btrfs_dio_end_io(bbio);
+                       btrfs_finish_ordered_extent(dio_data->ordered, NULL,
+                                                   file_offset, dip->bytes,
+                                                   !ret);
+                       bio->bi_status = errno_to_blk_status(ret);
+                       iomap_dio_bio_end_io(bio);
                         return;
                 }
         }
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c

index da1f84a..2637d6b 100644 (file)
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -4445,4 +4445,5 @@ void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans)
                 ulist_free(entry->old_roots);
                 kfree(entry);
         }
+       *root = RB_ROOT;
  }
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c

index f37b925..0249ea5 100644 (file)
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -71,7 +71,7 @@ static void rmw_rbio_work_locked(struct work_struct *work);
  static void index_rbio_pages(struct btrfs_raid_bio *rbio);
  static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
  
-static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check);
+static int finish_parity_scrub(struct btrfs_raid_bio *rbio);
  static void scrub_rbio_work_locked(struct work_struct *work);
  
  static void free_raid_bio_pointers(struct btrfs_raid_bio *rbio)
@@ -2404,7 +2404,7 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
         return 0;
  }
  
-static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
+static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
  {
         struct btrfs_io_context *bioc = rbio->bioc;
         const u32 sectorsize = bioc->fs_info->sectorsize;
@@ -2445,9 +2445,6 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
          */
         clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
  
-       if (!need_check)
-               goto writeback;
-
         p_sector.page = alloc_page(GFP_NOFS);
         if (!p_sector.page)
                 return -ENOMEM;
@@ -2516,7 +2513,6 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
                 q_sector.page = NULL;
         }
  
-writeback:
         /*
          * time to start writing.  Make bios for everything from the
          * higher layers (the bio_list in our rbio) and our p/q.  Ignore
@@ -2699,7 +2695,6 @@ static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio)
  
  static void scrub_rbio(struct btrfs_raid_bio *rbio)
  {
-       bool need_check = false;
         int sector_nr;
         int ret;
  
@@ -2722,7 +2717,7 @@ static void scrub_rbio(struct btrfs_raid_bio *rbio)
          * We have every sector properly prepared. Can finish the scrub
          * and writeback the good content.
          */
-       ret = finish_parity_scrub(rbio, need_check);
+       ret = finish_parity_scrub(rbio);
         wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
         for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) {
                 int found_errors;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c

index 73f9ea7..2ecb76c 100644 (file)
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4078,14 +4078,6 @@ static int alloc_profile_is_valid(u64 flags, int extended)
         return has_single_bit_set(flags);
  }
  
-static inline int balance_need_close(struct btrfs_fs_info *fs_info)
-{
-       /* cancel requested || normal exit path */
-       return atomic_read(&fs_info->balance_cancel_req) ||
-               (atomic_read(&fs_info->balance_pause_req) == 0 &&
-                atomic_read(&fs_info->balance_cancel_req) == 0);
-}
-
  /*
   * Validate target profile against allowed profiles and return true if it's OK.
   * Otherwise print the error message and return false.
@@ -4275,6 +4267,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
         u64 num_devices;
         unsigned seq;
         bool reducing_redundancy;
+       bool paused = false;
         int i;
  
         if (btrfs_fs_closing(fs_info) ||
@@ -4405,6 +4398,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
         if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req)) {
                 btrfs_info(fs_info, "balance: paused");
                 btrfs_exclop_balance(fs_info, BTRFS_EXCLOP_BALANCE_PAUSED);
+               paused = true;
         }
         /*
          * Balance can be canceled by:
@@ -4433,8 +4427,8 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
                 btrfs_update_ioctl_balance_args(fs_info, bargs);
         }
  
-       if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
-           balance_need_close(fs_info)) {
+       /* We didn't pause, we can clean everything up. */
+       if (!paused) {
                 reset_balance_state(fs_info);
                 btrfs_exclop_finish(fs_info);
         }
@@ -6404,7 +6398,8 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
             (op == BTRFS_MAP_READ || !dev_replace_is_ongoing ||
              !dev_replace->tgtdev)) {
                 set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr);
-               *mirror_num_ret = mirror_num;
+               if (mirror_num_ret)
+                       *mirror_num_ret = mirror_num;
                 *bioc_ret = NULL;
                 ret = 0;
                 goto out;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 20 Jul 2023 15:11:30 +0000 (08:11 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 20 Jul 2023 15:11:30 +0000 (08:11 -0700)
fs/btrfs/block-group.c		patch \| blob \| history
fs/btrfs/block-group.h		patch \| blob \| history
fs/btrfs/inode.c		patch \| blob \| history
fs/btrfs/qgroup.c		patch \| blob \| history
fs/btrfs/raid56.c		patch \| blob \| history
fs/btrfs/volumes.c		patch \| blob \| history