Merge tag 'for-5.13-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 3 Jun 2021 18:37:14 +0000 (11:37 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 3 Jun 2021 18:37:14 +0000 (11:37 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 3 Jun 2021 18:37:14 +0000 (11:37 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 3 Jun 2021 18:37:14 +0000 (11:37 -0700)
diff --git a/MAINTAINERS b/MAINTAINERS

index 673cadd..9487061 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3877,6 +3877,7 @@ L:        linux-btrfs@vger.kernel.org
  S:     Maintained
  W:     http://btrfs.wiki.kernel.org/
  Q:     http://patchwork.kernel.org/project/linux-btrfs/list/
+C:     irc://irc.libera.chat/btrfs
  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git
  F:     Documentation/filesystems/btrfs.rst
  F:     fs/btrfs/
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c

index d17ac30..1346d69 100644 (file)
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -457,7 +457,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
         bytes_left = compressed_len;
         for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
                 int submit = 0;
-               int len;
+               int len = 0;
  
                 page = compressed_pages[pg_index];
                 page->mapping = inode->vfs_inode.i_mapping;
@@ -465,10 +465,17 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
                         submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio,
                                                           0);
  
-               if (pg_index == 0 && use_append)
-                       len = bio_add_zone_append_page(bio, page, PAGE_SIZE, 0);
-               else
-                       len = bio_add_page(bio, page, PAGE_SIZE, 0);
+               /*
+                * Page can only be added to bio if the current bio fits in
+                * stripe.
+                */
+               if (!submit) {
+                       if (pg_index == 0 && use_append)
+                               len = bio_add_zone_append_page(bio, page,
+                                                              PAGE_SIZE, 0);
+                       else
+                               len = bio_add_page(bio, page, PAGE_SIZE, 0);
+               }
  
                 page->mapping = NULL;
                 if (submit || len < PAGE_SIZE) {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

index f1d15b6..3d5c35e 100644 (file)
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1868,7 +1868,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
         trace_run_delayed_ref_head(fs_info, head, 0);
         btrfs_delayed_ref_unlock(head);
         btrfs_put_delayed_ref_head(head);
-       return 0;
+       return ret;
  }
  
  static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c

index 294602f..441cee7 100644 (file)
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -788,7 +788,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
         u64 end_byte = bytenr + len;
         u64 csum_end;
         struct extent_buffer *leaf;
-       int ret;
+       int ret = 0;
         const u32 csum_size = fs_info->csum_size;
         u32 blocksize_bits = fs_info->sectorsize_bits;
  
@@ -806,6 +806,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
  
                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
                 if (ret > 0) {
+                       ret = 0;
                         if (path->slots[0] == 0)
                                 break;
                         path->slots[0]--;
@@ -862,7 +863,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
                         ret = btrfs_del_items(trans, root, path,
                                               path->slots[0], del_nr);
                         if (ret)
-                               goto out;
+                               break;
                         if (key.offset == bytenr)
                                 break;
                 } else if (key.offset < bytenr && csum_end > end_byte) {
@@ -906,8 +907,9 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
                         ret = btrfs_split_item(trans, root, path, &key, offset);
                         if (ret && ret != -EAGAIN) {
                                 btrfs_abort_transaction(trans, ret);
-                               goto out;
+                               break;
                         }
+                       ret = 0;
  
                         key.offset = end_byte - 1;
                 } else {
@@ -917,12 +919,41 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
                 }
                 btrfs_release_path(path);
         }
-       ret = 0;
-out:
         btrfs_free_path(path);
         return ret;
  }
  
+static int find_next_csum_offset(struct btrfs_root *root,
+                                struct btrfs_path *path,
+                                u64 *next_offset)
+{
+       const u32 nritems = btrfs_header_nritems(path->nodes[0]);
+       struct btrfs_key found_key;
+       int slot = path->slots[0] + 1;
+       int ret;
+
+       if (nritems == 0 || slot >= nritems) {
+               ret = btrfs_next_leaf(root, path);
+               if (ret < 0) {
+                       return ret;
+               } else if (ret > 0) {
+                       *next_offset = (u64)-1;
+                       return 0;
+               }
+               slot = path->slots[0];
+       }
+
+       btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
+
+       if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
+           found_key.type != BTRFS_EXTENT_CSUM_KEY)
+               *next_offset = (u64)-1;
+       else
+               *next_offset = found_key.offset;
+
+       return 0;
+}
+
  int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
                            struct btrfs_root *root,
                            struct btrfs_ordered_sum *sums)
@@ -938,7 +969,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
         u64 total_bytes = 0;
         u64 csum_offset;
         u64 bytenr;
-       u32 nritems;
         u32 ins_size;
         int index = 0;
         int found_next;
@@ -981,26 +1011,10 @@ again:
                         goto insert;
                 }
         } else {
-               int slot = path->slots[0] + 1;
-               /* we didn't find a csum item, insert one */
-               nritems = btrfs_header_nritems(path->nodes[0]);
-               if (!nritems || (path->slots[0] >= nritems - 1)) {
-                       ret = btrfs_next_leaf(root, path);
-                       if (ret < 0) {
-                               goto out;
-                       } else if (ret > 0) {
-                               found_next = 1;
-                               goto insert;
-                       }
-                       slot = path->slots[0];
-               }
-               btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
-               if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
-                   found_key.type != BTRFS_EXTENT_CSUM_KEY) {
-                       found_next = 1;
-                       goto insert;
-               }
-               next_offset = found_key.offset;
+               /* We didn't find a csum item, insert one. */
+               ret = find_next_csum_offset(root, path, &next_offset);
+               if (ret < 0)
+                       goto out;
                 found_next = 1;
                 goto insert;
         }
@@ -1056,8 +1070,48 @@ extend_csum:
                 tmp = sums->len - total_bytes;
                 tmp >>= fs_info->sectorsize_bits;
                 WARN_ON(tmp < 1);
+               extend_nr = max_t(int, 1, tmp);
+
+               /*
+                * A log tree can already have checksum items with a subset of
+                * the checksums we are trying to log. This can happen after
+                * doing a sequence of partial writes into prealloc extents and
+                * fsyncs in between, with a full fsync logging a larger subrange
+                * of an extent for which a previous fast fsync logged a smaller
+                * subrange. And this happens in particular due to merging file
+                * extent items when we complete an ordered extent for a range
+                * covered by a prealloc extent - this is done at
+                * btrfs_mark_extent_written().
+                *
+                * So if we try to extend the previous checksum item, which has
+                * a range that ends at the start of the range we want to insert,
+                * make sure we don't extend beyond the start offset of the next
+                * checksum item. If we are at the last item in the leaf, then
+                * forget the optimization of extending and add a new checksum
+                * item - it is not worth the complexity of releasing the path,
+                * getting the first key for the next leaf, repeat the btree
+                * search, etc, because log trees are temporary anyway and it
+                * would only save a few bytes of leaf space.
+                */
+               if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
+                       if (path->slots[0] + 1 >=
+                           btrfs_header_nritems(path->nodes[0])) {
+                               ret = find_next_csum_offset(root, path, &next_offset);
+                               if (ret < 0)
+                                       goto out;
+                               found_next = 1;
+                               goto insert;
+                       }
+
+                       ret = find_next_csum_offset(root, path, &next_offset);
+                       if (ret < 0)
+                               goto out;
+
+                       tmp = (next_offset - bytenr) >> fs_info->sectorsize_bits;
+                       if (tmp <= INT_MAX)
+                               extend_nr = min_t(int, extend_nr, tmp);
+               }
  
-               extend_nr = max_t(int, 1, (int)tmp);
                 diff = (csum_offset + extend_nr) * csum_size;
                 diff = min(diff,
                            MAX_CSUM_ITEMS(fs_info, csum_size) * csum_size);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index 33f1457..46f3929 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3000,6 +3000,18 @@ out:
         if (ret || truncated) {
                 u64 unwritten_start = start;
  
+               /*
+                * If we failed to finish this ordered extent for any reason we
+                * need to make sure BTRFS_ORDERED_IOERR is set on the ordered
+                * extent, and mark the inode with the error if it wasn't
+                * already set.  Any error during writeback would have already
+                * set the mapping error, so we need to set it if we're the ones
+                * marking this ordered extent as failed.
+                */
+               if (ret && !test_and_set_bit(BTRFS_ORDERED_IOERR,
+                                            &ordered_extent->flags))
+                       mapping_set_error(ordered_extent->inode->i_mapping, -EIO);
+
                 if (truncated)
                         unwritten_start += logical_len;
                 clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
@@ -9076,6 +9088,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
         int ret2;
         bool root_log_pinned = false;
         bool dest_log_pinned = false;
+       bool need_abort = false;
  
         /* we only allow rename subvolume link between subvolumes */
         if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
@@ -9135,6 +9148,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
                                              old_idx);
                 if (ret)
                         goto out_fail;
+               need_abort = true;
         }
  
         /* And now for the dest. */
@@ -9150,8 +9164,11 @@ static int btrfs_rename_exchange(struct inode *old_dir,
                                              new_ino,
                                              btrfs_ino(BTRFS_I(old_dir)),
                                              new_idx);
-               if (ret)
+               if (ret) {
+                       if (need_abort)
+                               btrfs_abort_transaction(trans, ret);
                         goto out_fail;
+               }
         }
  
         /* Update inode version and ctime/mtime. */
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c

index d434dc7..9178da0 100644 (file)
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -203,10 +203,7 @@ static int clone_copy_inline_extent(struct inode *dst,
                          * inline extent's data to the page.
                          */
                         ASSERT(key.offset > 0);
-                       ret = copy_inline_to_page(BTRFS_I(dst), new_key->offset,
-                                                 inline_data, size, datal,
-                                                 comp_type);
-                       goto out;
+                       goto copy_to_page;
                 }
         } else if (i_size_read(dst) <= datal) {
                 struct btrfs_file_extent_item *ei;
@@ -222,13 +219,10 @@ static int clone_copy_inline_extent(struct inode *dst,
                     BTRFS_FILE_EXTENT_INLINE)
                         goto copy_inline_extent;
  
-               ret = copy_inline_to_page(BTRFS_I(dst), new_key->offset,
-                                         inline_data, size, datal, comp_type);
-               goto out;
+               goto copy_to_page;
         }
  
  copy_inline_extent:
-       ret = 0;
         /*
          * We have no extent items, or we have an extent at offset 0 which may
          * or may not be inlined. All these cases are dealt the same way.
@@ -240,11 +234,13 @@ copy_inline_extent:
                  * clone. Deal with all these cases by copying the inline extent
                  * data into the respective page at the destination inode.
                  */
-               ret = copy_inline_to_page(BTRFS_I(dst), new_key->offset,
-                                         inline_data, size, datal, comp_type);
-               goto out;
+               goto copy_to_page;
         }
  
+       /*
+        * Release path before starting a new transaction so we don't hold locks
+        * that would confuse lockdep.
+        */
         btrfs_release_path(path);
         /*
          * If we end up here it means were copy the inline extent into a leaf
@@ -282,11 +278,6 @@ copy_inline_extent:
  out:
         if (!ret && !trans) {
                 /*
-                * Release path before starting a new transaction so we don't
-                * hold locks that would confuse lockdep.
-                */
-               btrfs_release_path(path);
-               /*
                  * No transaction here means we copied the inline extent into a
                  * page of the destination inode.
                  *
@@ -306,6 +297,21 @@ out:
                 *trans_out = trans;
  
         return ret;
+
+copy_to_page:
+       /*
+        * Release our path because we don't need it anymore and also because
+        * copy_inline_to_page() needs to reserve data and metadata, which may
+        * need to flush delalloc when we are low on available space and
+        * therefore cause a deadlock if writeback of an inline extent needs to
+        * write to the same leaf or an ordered extent completion needs to write
+        * to the same leaf.
+        */
+       btrfs_release_path(path);
+
+       ret = copy_inline_to_page(BTRFS_I(dst), new_key->offset,
+                                 inline_data, size, datal, comp_type);
+       goto out;
  }
  
  /**
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c

index 326be57..362d14d 100644 (file)
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1574,7 +1574,9 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
                         if (ret)
                                 goto out;
  
-                       btrfs_update_inode(trans, root, BTRFS_I(inode));
+                       ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
+                       if (ret)
+                               goto out;
                 }
  
                 ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
@@ -1749,7 +1751,9 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
  
         if (nlink != inode->i_nlink) {
                 set_nlink(inode, nlink);
-               btrfs_update_inode(trans, root, BTRFS_I(inode));
+               ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
+               if (ret)
+                       goto out;
         }
         BTRFS_I(inode)->index_cnt = (u64)-1;
  
@@ -1787,6 +1791,7 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
                         break;
  
                 if (ret == 1) {
+                       ret = 0;
                         if (path->slots[0] == 0)
                                 break;
                         path->slots[0]--;
@@ -1799,17 +1804,19 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
  
                 ret = btrfs_del_item(trans, root, path);
                 if (ret)
-                       goto out;
+                       break;
  
                 btrfs_release_path(path);
                 inode = read_one_inode(root, key.offset);
-               if (!inode)
-                       return -EIO;
+               if (!inode) {
+                       ret = -EIO;
+                       break;
+               }
  
                 ret = fixup_inode_link_count(trans, root, inode);
                 iput(inode);
                 if (ret)
-                       goto out;
+                       break;
  
                 /*
                  * fixup on a directory may create new entries,
@@ -1818,8 +1825,6 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
                  */
                 key.offset = (u64)-1;
         }
-       ret = 0;
-out:
         btrfs_release_path(path);
         return ret;
  }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 3 Jun 2021 18:37:14 +0000 (11:37 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 3 Jun 2021 18:37:14 +0000 (11:37 -0700)
MAINTAINERS		patch \| blob \| history
fs/btrfs/compression.c		patch \| blob \| history
fs/btrfs/extent-tree.c		patch \| blob \| history
fs/btrfs/file-item.c		patch \| blob \| history
fs/btrfs/inode.c		patch \| blob \| history
fs/btrfs/reflink.c		patch \| blob \| history
fs/btrfs/tree-log.c		patch \| blob \| history