btrfs: fix fsync failure and transaction abort after writes to prealloc extents
[platform/kernel/linux-rpi.git] / fs / btrfs / file-item.c
index a5a8dac..441cee7 100644 (file)
@@ -923,6 +923,37 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
        return ret;
 }
 
+static int find_next_csum_offset(struct btrfs_root *root,
+                                struct btrfs_path *path,
+                                u64 *next_offset)
+{
+       const u32 nritems = btrfs_header_nritems(path->nodes[0]);
+       struct btrfs_key found_key;
+       int slot = path->slots[0] + 1;
+       int ret;
+
+       if (nritems == 0 || slot >= nritems) {
+               ret = btrfs_next_leaf(root, path);
+               if (ret < 0) {
+                       return ret;
+               } else if (ret > 0) {
+                       *next_offset = (u64)-1;
+                       return 0;
+               }
+               slot = path->slots[0];
+       }
+
+       btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
+
+       if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
+           found_key.type != BTRFS_EXTENT_CSUM_KEY)
+               *next_offset = (u64)-1;
+       else
+               *next_offset = found_key.offset;
+
+       return 0;
+}
+
 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root,
                           struct btrfs_ordered_sum *sums)
@@ -938,7 +969,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
        u64 total_bytes = 0;
        u64 csum_offset;
        u64 bytenr;
-       u32 nritems;
        u32 ins_size;
        int index = 0;
        int found_next;
@@ -981,26 +1011,10 @@ again:
                        goto insert;
                }
        } else {
-               int slot = path->slots[0] + 1;
-               /* we didn't find a csum item, insert one */
-               nritems = btrfs_header_nritems(path->nodes[0]);
-               if (!nritems || (path->slots[0] >= nritems - 1)) {
-                       ret = btrfs_next_leaf(root, path);
-                       if (ret < 0) {
-                               goto out;
-                       } else if (ret > 0) {
-                               found_next = 1;
-                               goto insert;
-                       }
-                       slot = path->slots[0];
-               }
-               btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
-               if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
-                   found_key.type != BTRFS_EXTENT_CSUM_KEY) {
-                       found_next = 1;
-                       goto insert;
-               }
-               next_offset = found_key.offset;
+               /* We didn't find a csum item, insert one. */
+               ret = find_next_csum_offset(root, path, &next_offset);
+               if (ret < 0)
+                       goto out;
                found_next = 1;
                goto insert;
        }
@@ -1056,8 +1070,48 @@ extend_csum:
                tmp = sums->len - total_bytes;
                tmp >>= fs_info->sectorsize_bits;
                WARN_ON(tmp < 1);
+               extend_nr = max_t(int, 1, tmp);
+
+               /*
+                * A log tree can already have checksum items with a subset of
+                * the checksums we are trying to log. This can happen after
+                * doing a sequence of partial writes into prealloc extents and
+                * fsyncs in between, with a full fsync logging a larger subrange
+                * of an extent for which a previous fast fsync logged a smaller
+                * subrange. And this happens in particular due to merging file
+                * extent items when we complete an ordered extent for a range
+                * covered by a prealloc extent - this is done at
+                * btrfs_mark_extent_written().
+                *
+                * So if we try to extend the previous checksum item, which has
+                * a range that ends at the start of the range we want to insert,
+                * make sure we don't extend beyond the start offset of the next
+                * checksum item. If we are at the last item in the leaf, then
+                * forget the optimization of extending and add a new checksum
+                * item - it is not worth the complexity of releasing the path,
+                * getting the first key for the next leaf, repeat the btree
+                * search, etc, because log trees are temporary anyway and it
+                * would only save a few bytes of leaf space.
+                */
+               if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
+                       if (path->slots[0] + 1 >=
+                           btrfs_header_nritems(path->nodes[0])) {
+                               ret = find_next_csum_offset(root, path, &next_offset);
+                               if (ret < 0)
+                                       goto out;
+                               found_next = 1;
+                               goto insert;
+                       }
+
+                       ret = find_next_csum_offset(root, path, &next_offset);
+                       if (ret < 0)
+                               goto out;
+
+                       tmp = (next_offset - bytenr) >> fs_info->sectorsize_bits;
+                       if (tmp <= INT_MAX)
+                               extend_nr = min_t(int, extend_nr, tmp);
+               }
 
-               extend_nr = max_t(int, 1, (int)tmp);
                diff = (csum_offset + extend_nr) * csum_size;
                diff = min(diff,
                           MAX_CSUM_ITEMS(fs_info, csum_size) * csum_size);