static int btrfs_setsize(struct inode *inode, struct iattr *attr);
static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback);
+
static noinline int cow_file_range(struct btrfs_inode *inode,
struct page *locked_page,
u64 start, u64 end, int *page_started,
- unsigned long *nr_written, int unlock,
- u64 *done_offset);
+ unsigned long *nr_written, u64 *done_offset,
+ bool keep_locked, bool no_inline);
static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
u64 len, u64 orig_start, u64 block_start,
u64 block_len, u64 orig_block_len,
* can directly submit them without interruption.
*/
ret = cow_file_range(inode, locked_page, start, end, &page_started,
- &nr_written, 0, NULL);
+ &nr_written, NULL, true, false);
/* Inline extent inserted, page gets unlocked and everything is done */
if (page_started)
return 0;
async_extent->compressed_size,
0, *alloc_hint, &ins, 1, 1);
if (ret) {
- free_async_extent_pages(async_extent);
/*
* Here we used to try again by going back to non-compressed
* path for ENOSPC. But we can't reserve space even for
* locked_page is the page that writepage had locked already. We use
* it to make sure we don't do extra locks or unlocks.
*
- * *page_started is set to one if we unlock locked_page and do everything
- * required to start IO on it. It may be clean and already done with
- * IO when we return.
- *
- * When unlock == 1, we unlock the pages in successfully allocated regions.
- * When unlock == 0, we leave them locked for writing them out.
+ * When this function fails, it unlocks all pages except @locked_page.
*
- * However, we unlock all the pages except @locked_page in case of failure.
+ * When this function successfully creates an inline extent, it sets page_started
+ * to 1 and unlocks all pages including locked_page and starts I/O on them.
+ * (In reality inline extents are limited to a single page, so locked_page is
+ * the only page handled anyway).
*
- * In summary, page locking state will be as follow:
+ * When this function succeed and creates a normal extent, the page locking
+ * status depends on the passed in flags:
*
- * - page_started == 1 (return value)
- * - All the pages are unlocked. IO is started.
- * - Note that this can happen only on success
- * - unlock == 1
- * - All the pages except @locked_page are unlocked in any case
- * - unlock == 0
- * - On success, all the pages are locked for writing out them
- * - On failure, all the pages except @locked_page are unlocked
+ * - If @keep_locked is set, all pages are kept locked.
+ * - Else all pages except for @locked_page are unlocked.
*
* When a failure happens in the second or later iteration of the
* while-loop, the ordered extents created in previous iterations are kept
static noinline int cow_file_range(struct btrfs_inode *inode,
struct page *locked_page,
u64 start, u64 end, int *page_started,
- unsigned long *nr_written, int unlock,
- u64 *done_offset)
+ unsigned long *nr_written, u64 *done_offset,
+ bool keep_locked, bool no_inline)
{
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
* This means we can trigger inline extent even if we didn't want to.
* So here we skip inline extent creation completely.
*/
- if (start == 0 && fs_info->sectorsize == PAGE_SIZE) {
+ if (start == 0 && fs_info->sectorsize == PAGE_SIZE && !no_inline) {
u64 actual_end = min_t(u64, i_size_read(&inode->vfs_inode),
end + 1);
* Do set the Ordered (Private2) bit so we know this page was
* properly setup for writepage.
*/
- page_ops = unlock ? PAGE_UNLOCK : 0;
+ page_ops = (keep_locked ? 0 : PAGE_UNLOCK);
page_ops |= PAGE_SET_ORDERED;
extent_clear_unlock_delalloc(inode, start, start + ram_size - 1,
* EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV are handled by the cleanup
* function.
*
- * However, in case of unlock == 0, we still need to unlock the pages
+ * However, in case of @keep_locked, we still need to unlock the pages
* (except @locked_page) to ensure all the pages are unlocked.
*/
- if (!unlock && orig_start < start) {
+ if (keep_locked && orig_start < start) {
if (!locked_page)
mapping_set_error(inode->vfs_inode.i_mapping, ret);
extent_clear_unlock_delalloc(inode, orig_start, start - 1,
while (start <= end) {
ret = cow_file_range(inode, locked_page, start, end, page_started,
- nr_written, 0, &done_offset);
+ nr_written, &done_offset, true, false);
if (ret && ret != -EAGAIN)
return ret;
}
static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
- const u64 start, const u64 end,
- int *page_started, unsigned long *nr_written)
+ const u64 start, const u64 end)
{
const bool is_space_ino = btrfs_is_free_space_inode(inode);
const bool is_reloc_ino = btrfs_is_data_reloc_root(inode->root);
const u64 range_bytes = end + 1 - start;
struct extent_io_tree *io_tree = &inode->io_tree;
+ int page_started = 0;
+ unsigned long nr_written;
u64 range_start = start;
u64 count;
+ int ret;
/*
* If EXTENT_NORESERVE is set it means that when the buffered write was
NULL);
}
- return cow_file_range(inode, locked_page, start, end, page_started,
- nr_written, 1, NULL);
+ /*
+ * Don't try to create inline extents, as a mix of inline extent that
+ * is written out and unlocked directly and a normal NOCOW extent
+ * doesn't work.
+ */
+ ret = cow_file_range(inode, locked_page, start, end, &page_started,
+ &nr_written, NULL, false, true);
+ ASSERT(!page_started);
+ return ret;
}
struct can_nocow_file_extent_args {
*/
static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
struct page *locked_page,
- const u64 start, const u64 end,
- int *page_started,
- unsigned long *nr_written)
+ const u64 start, const u64 end)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_root *root = inode->root;
*/
if (cow_start != (u64)-1) {
ret = fallback_to_cow(inode, locked_page,
- cow_start, found_key.offset - 1,
- page_started, nr_written);
+ cow_start, found_key.offset - 1);
if (ret)
goto error;
cow_start = (u64)-1;
if (cow_start != (u64)-1) {
cur_offset = end;
- ret = fallback_to_cow(inode, locked_page, cow_start, end,
- page_started, nr_written);
+ ret = fallback_to_cow(inode, locked_page, cow_start, end);
if (ret)
goto error;
}
* preallocated inodes.
*/
ASSERT(!zoned || btrfs_is_data_reloc_root(inode->root));
- ret = run_delalloc_nocow(inode, locked_page, start, end,
- page_started, nr_written);
+ ret = run_delalloc_nocow(inode, locked_page, start, end);
goto out;
}
page_started, nr_written, wbc);
else
ret = cow_file_range(inode, locked_page, start, end,
- page_started, nr_written, 1, NULL);
+ page_started, nr_written, NULL, false, false);
out:
ASSERT(ret <= 0);
btrfs_free_reserved_extent(fs_info,
ordered_extent->disk_bytenr,
ordered_extent->disk_num_bytes, 1);
+ /*
+ * Actually free the qgroup rsv which was released when
+ * the ordered extent was created.
+ */
+ btrfs_qgroup_free_refroot(fs_info, inode->root->root_key.objectid,
+ ordered_extent->qgroup_rsv,
+ BTRFS_QGROUP_RSV_DATA);
}
}
}
/*
+ * Find the highest existing sequence number in a directory and then set the
+ * in-memory index_cnt variable to the first free sequence number.
+ */
+static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
+{
+ struct btrfs_root *root = inode->root;
+ struct btrfs_key key, found_key;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ int ret;
+
+ key.objectid = btrfs_ino(inode);
+ key.type = BTRFS_DIR_INDEX_KEY;
+ key.offset = (u64)-1;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ /* FIXME: we should be able to handle this */
+ if (ret == 0)
+ goto out;
+ ret = 0;
+
+ if (path->slots[0] == 0) {
+ inode->index_cnt = BTRFS_DIR_START_INDEX;
+ goto out;
+ }
+
+ path->slots[0]--;
+
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+ if (found_key.objectid != btrfs_ino(inode) ||
+ found_key.type != BTRFS_DIR_INDEX_KEY) {
+ inode->index_cnt = BTRFS_DIR_START_INDEX;
+ goto out;
+ }
+
+ inode->index_cnt = found_key.offset + 1;
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index)
+{
+ if (dir->index_cnt == (u64)-1) {
+ int ret;
+
+ ret = btrfs_inode_delayed_dir_index_count(dir);
+ if (ret) {
+ ret = btrfs_set_inode_index_count(dir);
+ if (ret)
+ return ret;
+ }
+ }
+
+ *index = dir->index_cnt;
+
+ return 0;
+}
+
+/*
* All this infrastructure exists because dir_emit can fault, and we are holding
* the tree lock when doing readdir. For now just allocate a buffer and copy
* our information into that, and then dir_emit from the buffer. This is
static int btrfs_opendir(struct inode *inode, struct file *file)
{
struct btrfs_file_private *private;
+ u64 last_index;
+ int ret;
+
+ ret = btrfs_get_dir_last_index(BTRFS_I(inode), &last_index);
+ if (ret)
+ return ret;
private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
if (!private)
return -ENOMEM;
+ private->last_index = last_index;
private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (!private->filldir_buf) {
kfree(private);
INIT_LIST_HEAD(&ins_list);
INIT_LIST_HEAD(&del_list);
- put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
+ put = btrfs_readdir_get_delayed_items(inode, private->last_index,
+ &ins_list, &del_list);
again:
key.type = BTRFS_DIR_INDEX_KEY;
break;
if (found_key.offset < ctx->pos)
continue;
+ if (found_key.offset > private->last_index)
+ break;
if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
continue;
di = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
}
/*
- * find the highest existing sequence number in a directory
- * and then set the in-memory index_cnt variable to reflect
- * free sequence numbers
- */
-static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
-{
- struct btrfs_root *root = inode->root;
- struct btrfs_key key, found_key;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- int ret;
-
- key.objectid = btrfs_ino(inode);
- key.type = BTRFS_DIR_INDEX_KEY;
- key.offset = (u64)-1;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
- /* FIXME: we should be able to handle this */
- if (ret == 0)
- goto out;
- ret = 0;
-
- if (path->slots[0] == 0) {
- inode->index_cnt = BTRFS_DIR_START_INDEX;
- goto out;
- }
-
- path->slots[0]--;
-
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-
- if (found_key.objectid != btrfs_ino(inode) ||
- found_key.type != BTRFS_DIR_INDEX_KEY) {
- inode->index_cnt = BTRFS_DIR_START_INDEX;
- goto out;
- }
-
- inode->index_cnt = found_key.offset + 1;
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-/*
* helper to find a free sequence number in a given directory. This current
* code is very simple, later versions will do smarter things in the btree
*/