* If an extent intersects the range but is not entirely inside the range
* it is either truncated or split. Anything entirely inside the range
* is deleted from the tree.
+ *
+ * Note: the VFS' inode number of bytes is not updated, it's up to the caller
+ * to deal with that. We set the field 'bytes_found' of the arguments structure
+ * with the number of allocated bytes found in the target range, so that the
+ * caller can update the inode's number of bytes in an atomic way when
+ * replacing extents in a range to avoid races with stat(2).
*/
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_inode *inode,
struct btrfs_ref ref = { 0 };
struct btrfs_key key;
struct btrfs_key new_key;
- struct inode *vfs_inode = &inode->vfs_inode;
u64 ino = btrfs_ino(inode);
u64 search_start = args->start;
u64 disk_bytenr = 0;
int leafs_visited = 0;
struct btrfs_path *path = args->path;
+ args->bytes_found = 0;
args->extent_inserted = false;
/* Must always have a path if ->replace_extent is true */
extent_end - args->end);
btrfs_mark_buffer_dirty(leaf);
if (update_refs && disk_bytenr > 0)
- inode_sub_bytes(vfs_inode,
- args->end - key.offset);
+ args->bytes_found += args->end - key.offset;
break;
}
args->start - key.offset);
btrfs_mark_buffer_dirty(leaf);
if (update_refs && disk_bytenr > 0)
- inode_sub_bytes(vfs_inode,
- extent_end - args->start);
+ args->bytes_found += extent_end - args->start;
if (args->end == extent_end)
break;
if (update_refs &&
extent_type == BTRFS_FILE_EXTENT_INLINE) {
- inode_sub_bytes(vfs_inode,
- extent_end - key.offset);
+ args->bytes_found += extent_end - key.offset;
extent_end = ALIGN(extent_end,
fs_info->sectorsize);
} else if (update_refs && disk_bytenr > 0) {
key.offset - extent_offset);
ret = btrfs_free_extent(trans, &ref);
BUG_ON(ret); /* -ENOMEM */
- inode_sub_bytes(vfs_inode,
- extent_end - key.offset);
+ args->bytes_found += extent_end - key.offset;
}
if (args->end == extent_end)
struct inode *inode,
struct btrfs_path *path,
struct btrfs_replace_extent_info *extent_info,
- const u64 replace_len)
+ const u64 replace_len,
+ const u64 bytes_to_drop)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
return 0;
if (extent_info->disk_offset == 0 &&
- btrfs_fs_incompat(fs_info, NO_HOLES))
+ btrfs_fs_incompat(fs_info, NO_HOLES)) {
+ btrfs_update_inode_bytes(BTRFS_I(inode), 0, bytes_to_drop);
return 0;
+ }
key.objectid = btrfs_ino(BTRFS_I(inode));
key.type = BTRFS_EXTENT_DATA_KEY;
return ret;
/* If it's a hole, nothing more needs to be done. */
- if (extent_info->disk_offset == 0)
+ if (extent_info->disk_offset == 0) {
+ btrfs_update_inode_bytes(BTRFS_I(inode), 0, bytes_to_drop);
return 0;
+ }
- inode_add_bytes(inode, replace_len);
+ btrfs_update_inode_bytes(BTRFS_I(inode), replace_len, bytes_to_drop);
if (extent_info->is_new_extent && extent_info->insertions == 0) {
key.objectid = extent_info->disk_offset;
while (cur_offset < end) {
drop_args.start = cur_offset;
ret = btrfs_drop_extents(trans, root, BTRFS_I(inode), &drop_args);
+ /* If we are punching a hole decrement the inode's byte count */
+ if (!extent_info)
+ btrfs_update_inode_bytes(BTRFS_I(inode), 0,
+ drop_args.bytes_found);
if (ret != -ENOSPC) {
/*
* When cloning we want to avoid transaction aborts when
extent_info->file_offset;
ret = btrfs_insert_replace_extent(trans, inode, path,
- extent_info, replace_len);
+ extent_info, replace_len,
+ drop_args.bytes_found);
if (ret) {
btrfs_abort_transaction(trans, ret);
break;
}
if (extent_info) {
ret = btrfs_insert_replace_extent(trans, inode, path, extent_info,
- extent_info->data_len);
+ extent_info->data_len,
+ drop_args.bytes_found);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out_trans;
if (compressed_size && compressed_pages)
cur_size = compressed_size;
- inode_add_bytes(inode, size);
-
if (!extent_inserted) {
struct btrfs_key key;
size_t datasize;
* could end up racing with unlink.
*/
BTRFS_I(inode)->disk_i_size = inode->i_size;
- ret = btrfs_update_inode(trans, root, inode);
-
fail:
return ret;
}
goto out;
}
+ btrfs_update_inode_bytes(inode, inline_len, drop_args.bytes_found);
+ ret = btrfs_update_inode(trans, root, &inode->vfs_inode);
+ if (ret && ret != -ENOSPC) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ } else if (ret == -ENOSPC) {
+ ret = 1;
+ goto out;
+ }
+
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
out:
spin_lock(&inode->lock);
ASSERT(inode->new_delalloc_bytes >= len);
inode->new_delalloc_bytes -= len;
+ if (*bits & EXTENT_ADD_INODE_BYTES)
+ inode_add_bytes(&inode->vfs_inode, len);
spin_unlock(&inode->lock);
}
}
static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, u64 file_pos,
struct btrfs_file_extent_item *stack_fi,
+ const bool update_inode_bytes,
u64 qgroup_reserved)
{
struct btrfs_root *root = inode->root;
+ const u64 sectorsize = root->fs_info->sectorsize;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_key ins;
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
- inode_add_bytes(&inode->vfs_inode, num_bytes);
+ /*
+ * If we dropped an inline extent here, we know the range where it is
+ * was not marked with the EXTENT_DELALLOC_NEW bit, so we update the
+ * number of bytes only for that range contaning the inline extent.
+ * The remaining of the range will be processed when clearning the
+ * EXTENT_DELALLOC_BIT bit through the ordered extent completion.
+ */
+ if (file_pos == 0 && !IS_ALIGNED(drop_args.bytes_found, sectorsize)) {
+ u64 inline_size = round_down(drop_args.bytes_found, sectorsize);
+
+ inline_size = drop_args.bytes_found - inline_size;
+ btrfs_update_inode_bytes(inode, sectorsize, inline_size);
+ drop_args.bytes_found -= inline_size;
+ num_bytes -= sectorsize;
+ }
+
+ if (update_inode_bytes)
+ btrfs_update_inode_bytes(inode, num_bytes, drop_args.bytes_found);
ins.objectid = disk_bytenr;
ins.offset = disk_num_bytes;
{
struct btrfs_file_extent_item stack_fi;
u64 logical_len;
+ bool update_inode_bytes;
memset(&stack_fi, 0, sizeof(stack_fi));
btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_REG);
btrfs_set_stack_file_extent_compression(&stack_fi, oe->compress_type);
/* Encryption and other encoding is reserved and all 0 */
+ /*
+ * For delalloc, when completing an ordered extent we update the inode's
+ * bytes when clearing the range in the inode's io tree, so pass false
+ * as the argument 'update_inode_bytes' to insert_reserved_file_extent(),
+ * except if the ordered extent was truncated.
+ */
+ update_inode_bytes = test_bit(BTRFS_ORDERED_DIRECT, &oe->flags) ||
+ test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags);
+
return insert_reserved_file_extent(trans, BTRFS_I(oe->inode),
oe->file_offset, &stack_fi,
- oe->qgroup_rsv);
+ update_inode_bytes, oe->qgroup_rsv);
}
/*
u64 logical_len = ordered_extent->num_bytes;
bool freespace_inode;
bool truncated = false;
- bool range_locked = false;
- bool clear_new_delalloc_bytes = false;
bool clear_reserved_extent = true;
- unsigned int clear_bits;
+ unsigned int clear_bits = EXTENT_DEFRAG;
start = ordered_extent->file_offset;
end = start + ordered_extent->num_bytes - 1;
if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
!test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) &&
!test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags))
- clear_new_delalloc_bytes = true;
+ clear_bits |= EXTENT_DELALLOC_NEW;
freespace_inode = btrfs_is_free_space_inode(BTRFS_I(inode));
goto out;
}
- range_locked = true;
+ clear_bits |= EXTENT_LOCKED;
lock_extent_bits(io_tree, start, end, &cached_state);
if (freespace_inode)
goto out;
}
+ /*
+ * If this is a new delalloc range, clear its new delalloc flag to
+ * update the inode's number of bytes. This needs to be done first
+ * before updating the inode item.
+ */
+ if ((clear_bits & EXTENT_DELALLOC_NEW) &&
+ !test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags))
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end,
+ EXTENT_DELALLOC_NEW | EXTENT_ADD_INODE_BYTES,
+ 0, 0, &cached_state);
+
btrfs_inode_safe_disk_i_size_write(inode, 0);
ret = btrfs_update_inode_fallback(trans, root, inode);
if (ret) { /* -ENOMEM or corruption */
}
ret = 0;
out:
- clear_bits = EXTENT_DEFRAG;
- if (range_locked)
- clear_bits |= EXTENT_LOCKED;
- if (clear_new_delalloc_bytes)
- clear_bits |= EXTENT_DELALLOC_NEW;
clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits,
(clear_bits & EXTENT_LOCKED) ? 1 : 0, 0,
&cached_state);
ret = btrfs_insert_file_extent(trans, root, btrfs_ino(BTRFS_I(inode)),
offset, 0, 0, len, 0, len, 0, 0, 0);
- if (ret)
+ if (ret) {
btrfs_abort_transaction(trans, ret);
- else
+ } else {
+ btrfs_update_inode_bytes(BTRFS_I(inode), 0, drop_args.bytes_found);
btrfs_update_inode(trans, root, inode);
+ }
btrfs_end_transaction(trans);
return ret;
}
u64 start;
u64 end;
int inode_evicting = inode->vfs_inode.i_state & I_FREEING;
+ bool found_ordered = false;
+ bool completed_ordered = false;
/*
* we have the page locked, so new writeback can't start,
start = page_start;
ordered = btrfs_lookup_ordered_range(inode, start, page_end - start + 1);
if (ordered) {
+ found_ordered = true;
end = min(page_end,
ordered->file_offset + ordered->num_bytes - 1);
/*
- * IO on this page will never be started, so we need
- * to account for any ordered extents now
+ * IO on this page will never be started, so we need to account
+ * for any ordered extents now. Don't clear EXTENT_DELALLOC_NEW
+ * here, must leave that up for the ordered extent completion.
*/
if (!inode_evicting)
clear_extent_bit(tree, start, end,
- EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
+ EXTENT_DELALLOC |
EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, 1, 0, &cached_state);
/*
if (btrfs_dec_test_ordered_pending(inode, &ordered,
start,
- end - start + 1, 1))
+ end - start + 1, 1)) {
btrfs_finish_ordered_io(ordered);
+ completed_ordered = true;
+ }
}
btrfs_put_ordered_extent(ordered);
if (!inode_evicting) {
*/
btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
if (!inode_evicting) {
+ bool delete = true;
+
+ /*
+ * If there's an ordered extent for this range and we have not
+ * finished it ourselves, we must leave EXTENT_DELALLOC_NEW set
+ * in the range for the ordered extent completion. We must also
+ * not delete the range, otherwise we would lose that bit (and
+ * any other bits set in the range). Make sure EXTENT_UPTODATE
+ * is cleared if we don't delete, otherwise it can lead to
+ * corruptions if the i_size is extented later.
+ */
+ if (found_ordered && !completed_ordered)
+ delete = false;
clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED |
- EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
- EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
- &cached_state);
+ EXTENT_DELALLOC | EXTENT_UPTODATE |
+ EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1,
+ delete, &cached_state);
__btrfs_releasepage(page, GFP_NOFS);
}
u32 request_mask, unsigned int flags)
{
u64 delalloc_bytes;
+ u64 inode_bytes;
struct inode *inode = d_inode(path->dentry);
u32 blocksize = inode->i_sb->s_blocksize;
u32 bi_flags = BTRFS_I(inode)->flags;
spin_lock(&BTRFS_I(inode)->lock);
delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes;
+ inode_bytes = inode_get_bytes(inode);
spin_unlock(&BTRFS_I(inode)->lock);
- stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
+ stat->blocks = (ALIGN(inode_bytes, blocksize) +
ALIGN(delalloc_bytes, blocksize)) >> 9;
return 0;
}
if (trans) {
ret = insert_reserved_file_extent(trans, BTRFS_I(inode),
- file_offset, &stack_fi, ret);
+ file_offset, &stack_fi,
+ true, ret);
if (ret)
return ERR_PTR(ret);
return trans;
}
#endif
+/*
+ * Update the number of bytes used in the VFS' inode. When we replace extents in
+ * a range (clone, dedupe, fallocate's zero range), we must update the number of
+ * bytes used by the inode in an atomic manner, so that concurrent stat(2) calls
+ * always get a correct value.
+ */
+void btrfs_update_inode_bytes(struct btrfs_inode *inode,
+ const u64 add_bytes,
+ const u64 del_bytes)
+{
+ if (add_bytes == del_bytes)
+ return;
+
+ spin_lock(&inode->lock);
+ if (del_bytes > 0)
+ inode_sub_bytes(&inode->vfs_inode, del_bytes);
+ if (add_bytes > 0)
+ inode_add_bytes(&inode->vfs_inode, add_bytes);
+ spin_unlock(&inode->lock);
+}
+
static const struct inode_operations btrfs_dir_inode_operations = {
.getattr = btrfs_getattr,
.lookup = btrfs_lookup,