* 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
Btrfs: always pin metadata in discard mode
Btrfs: enable discard support
Btrfs: add -o discard option
Btrfs: properly wait log writers during log sync
Btrfs: fix possible ENOSPC problems with truncate
Btrfs: fix btrfs acl #ifdef checks
Btrfs: streamline tree-log btree block writeout
Btrfs: avoid tree log commit when there are no changes
Btrfs: only write one super copy during fsync
#include "btrfs_inode.h"
#include "xattr.h"
-#ifdef CONFIG_BTRFS_POSIX_ACL
+#ifdef CONFIG_BTRFS_FS_POSIX_ACL
static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
{
.set = btrfs_xattr_acl_access_set,
};
-#else /* CONFIG_BTRFS_POSIX_ACL */
+#else /* CONFIG_BTRFS_FS_POSIX_ACL */
int btrfs_acl_chmod(struct inode *inode)
{
return 0;
}
-#endif /* CONFIG_BTRFS_POSIX_ACL */
+#endif /* CONFIG_BTRFS_FS_POSIX_ACL */
* transid of the trans_handle that last modified this inode
*/
u64 last_trans;
+
+ /*
+ * log transid when this inode was last modified
+ */
+ u64 last_sub_trans;
+
/*
* transid that last logged this inode
*/
atomic_t log_writers;
atomic_t log_commit[2];
unsigned long log_transid;
+ unsigned long last_log_commit;
unsigned long log_batch;
pid_t log_start_pid;
bool log_multiple_pids;
#define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7)
#define BTRFS_MOUNT_SSD_SPREAD (1 << 8)
#define BTRFS_MOUNT_NOSSD (1 << 9)
+#define BTRFS_MOUNT_DISCARD (1 << 10)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
int btrfs_sync_fs(struct super_block *sb, int wait);
/* acl.c */
-#ifdef CONFIG_BTRFS_POSIX_ACL
+#ifdef CONFIG_BTRFS_FS_POSIX_ACL
int btrfs_check_acl(struct inode *inode, int mask);
#else
#define btrfs_check_acl NULL
atomic_set(&root->log_writers, 0);
root->log_batch = 0;
root->log_transid = 0;
+ root->last_log_commit = 0;
extent_io_tree_init(&root->dirty_log_pages,
fs_info->btree_inode->i_mapping, GFP_NOFS);
WARN_ON(root->log_root);
root->log_root = log_root;
root->log_transid = 0;
+ root->last_log_commit = 0;
return 0;
}
return ret;
}
-#ifdef BIO_RW_DISCARD
static void btrfs_issue_discard(struct block_device *bdev,
u64 start, u64 len)
{
blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
DISCARD_FL_BARRIER);
}
-#endif
static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
u64 num_bytes)
{
-#ifdef BIO_RW_DISCARD
int ret;
u64 map_length = num_bytes;
struct btrfs_multi_bio *multi = NULL;
+ if (!btrfs_test_opt(root, DISCARD))
+ return 0;
+
/* Tell the block device(s) that the sectors can be discarded */
ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
bytenr, &map_length, &multi, 0);
}
return ret;
-#else
- return 0;
-#endif
}
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
if (is_data)
goto pinit;
+ /*
+ * discard is sloooow, and so triggering discards on
+ * individual btree blocks isn't a good plan. Just
+ * pin everything in discard mode.
+ */
+ if (btrfs_test_opt(root, DISCARD))
+ goto pinit;
+
buf = btrfs_find_tree_block(root, bytenr, num_bytes);
if (!buf)
goto pinit;
btrfs_end_transaction(trans, root);
else
btrfs_commit_transaction(trans, root);
- } else {
+ } else if (ret != BTRFS_NO_LOG_SYNC) {
btrfs_commit_transaction(trans, root);
+ } else {
+ btrfs_end_transaction(trans, root);
}
}
if (file->f_flags & O_DIRECT) {
int ret = 0;
struct btrfs_trans_handle *trans;
+
+ /* we wait first, since the writeback may change the inode */
+ root->log_batch++;
+ /* the VFS called filemap_fdatawrite for us */
+ btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ root->log_batch++;
+
/*
* check the transaction that last modified this inode
* and see if its already been committed
if (!BTRFS_I(inode)->last_trans)
goto out;
+ /*
+ * if the last transaction that changed this file was before
+ * the current transaction, we can bail out now without any
+ * syncing
+ */
mutex_lock(&root->fs_info->trans_mutex);
if (BTRFS_I(inode)->last_trans <=
root->fs_info->last_trans_committed) {
}
mutex_unlock(&root->fs_info->trans_mutex);
- root->log_batch++;
- filemap_fdatawrite(inode->i_mapping);
- btrfs_wait_ordered_range(inode, 0, (u64)-1);
- root->log_batch++;
-
- if (datasync && !(inode->i_state & I_DIRTY_PAGES))
- goto out;
/*
* ok we haven't committed the transaction yet, lets do a commit
*/
*/
mutex_unlock(&dentry->d_inode->i_mutex);
- if (ret > 0) {
- ret = btrfs_commit_transaction(trans, root);
- } else {
- ret = btrfs_sync_log(trans, root);
- if (ret == 0)
- ret = btrfs_end_transaction(trans, root);
- else
+ if (ret != BTRFS_NO_LOG_SYNC) {
+ if (ret > 0) {
ret = btrfs_commit_transaction(trans, root);
+ } else {
+ ret = btrfs_sync_log(trans, root);
+ if (ret == 0)
+ ret = btrfs_end_transaction(trans, root);
+ else
+ ret = btrfs_commit_transaction(trans, root);
+ }
+ } else {
+ ret = btrfs_end_transaction(trans, root);
}
mutex_lock(&dentry->d_inode->i_mutex);
out:
if ((offset & (blocksize - 1)) == 0)
goto out;
+ ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
+ if (ret)
+ goto out;
+
+ ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
+ if (ret)
+ goto out;
ret = -ENOMEM;
again:
page = grab_cache_page(mapping, index);
- if (!page)
+ if (!page) {
+ btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
+ btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
goto out;
+ }
page_start = page_offset(page);
page_end = page_start + PAGE_CACHE_SIZE - 1;
goto again;
}
+ clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
+ EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
+ GFP_NOFS);
+
ret = btrfs_set_extent_delalloc(inode, page_start, page_end);
if (ret) {
unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
out_unlock:
+ if (ret)
+ btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
+ btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
unlock_page(page);
page_cache_release(page);
out:
if (size <= hole_start)
return 0;
- btrfs_truncate_page(inode->i_mapping, inode->i_size);
+ err = btrfs_truncate_page(inode->i_mapping, inode->i_size);
+ if (err)
+ return err;
while (1) {
struct btrfs_ordered_extent *ordered;
bi->generation = 0;
bi->sequence = 0;
bi->last_trans = 0;
+ bi->last_sub_trans = 0;
bi->logged_trans = 0;
bi->delalloc_bytes = 0;
bi->reserved_bytes = 0;
set_page_dirty(page);
SetPageUptodate(page);
- BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
+ BTRFS_I(inode)->last_trans = root->fs_info->generation;
+ BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
+
unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
out_unlock:
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
return;
- btrfs_truncate_page(inode->i_mapping, inode->i_size);
+ ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
+ if (ret)
+ return;
btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
trans = btrfs_start_transaction(root, 1);
if (!ei)
return NULL;
ei->last_trans = 0;
+ ei->last_sub_trans = 0;
ei->logged_trans = 0;
ei->outstanding_extents = 0;
ei->reserved_extents = 0;
Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl,
- Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_err,
+ Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
+ Opt_discard, Opt_err,
};
static match_table_t tokens = {
{Opt_notreelog, "notreelog"},
{Opt_flushoncommit, "flushoncommit"},
{Opt_ratio, "metadata_ratio=%d"},
+ {Opt_discard, "discard"},
{Opt_err, NULL},
};
info->metadata_ratio);
}
break;
+ case Opt_discard:
+ btrfs_set_opt(info->mount_opt, DISCARD);
+ break;
default:
break;
}
sb->s_export_op = &btrfs_export_ops;
sb->s_xattr = btrfs_xattr_handlers;
sb->s_time_gran = 1;
-#ifdef CONFIG_BTRFS_POSIX_ACL
+#ifdef CONFIG_BTRFS_FS_POSIX_ACL
sb->s_flags |= MS_POSIXACL;
#endif
/*
* when btree blocks are allocated, they have some corresponding bits set for
* them in one of two extent_io trees. This is used to make sure all of
- * those extents are on disk for transaction or log commit
+ * those extents are sent to disk but does not wait on them
*/
-int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages)
+int btrfs_write_marked_extents(struct btrfs_root *root,
+ struct extent_io_tree *dirty_pages)
{
int ret;
int err = 0;
page_cache_release(page);
}
}
+ if (err)
+ werr = err;
+ return werr;
+}
+
+/*
+ * when btree blocks are allocated, they have some corresponding bits set for
+ * them in one of two extent_io trees. This is used to make sure all of
+ * those extents are on disk for transaction or log commit. We wait
+ * on all the pages and clear them from the dirty pages state tree
+ */
+int btrfs_wait_marked_extents(struct btrfs_root *root,
+ struct extent_io_tree *dirty_pages)
+{
+ int ret;
+ int err = 0;
+ int werr = 0;
+ struct page *page;
+ struct inode *btree_inode = root->fs_info->btree_inode;
+ u64 start = 0;
+ u64 end;
+ unsigned long index;
+
while (1) {
ret = find_first_extent_bit(dirty_pages, 0, &start, &end,
EXTENT_DIRTY);
return werr;
}
+/*
+ * when btree blocks are allocated, they have some corresponding bits set for
+ * them in one of two extent_io trees. This is used to make sure all of
+ * those extents are on disk for transaction or log commit
+ */
+int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
+ struct extent_io_tree *dirty_pages)
+{
+ int ret;
+ int ret2;
+
+ ret = btrfs_write_marked_extents(root, dirty_pages);
+ ret2 = btrfs_wait_marked_extents(root, dirty_pages);
+ return ret || ret2;
+}
+
int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct inode *inode)
{
BTRFS_I(inode)->last_trans = trans->transaction->transid;
+ BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
}
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages);
+int btrfs_write_marked_extents(struct btrfs_root *root,
+ struct extent_io_tree *dirty_pages);
+int btrfs_wait_marked_extents(struct btrfs_root *root,
+ struct extent_io_tree *dirty_pages);
int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
#endif
int ret;
struct btrfs_root *log = root->log_root;
struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
+ u64 log_transid = 0;
mutex_lock(&root->log_mutex);
index1 = root->log_transid % 2;
if (atomic_read(&root->log_commit[(index1 + 1) % 2]))
wait_log_commit(trans, root, root->log_transid - 1);
- while (root->log_multiple_pids) {
+ while (1) {
unsigned long batch = root->log_batch;
- mutex_unlock(&root->log_mutex);
- schedule_timeout_uninterruptible(1);
- mutex_lock(&root->log_mutex);
-
+ if (root->log_multiple_pids) {
+ mutex_unlock(&root->log_mutex);
+ schedule_timeout_uninterruptible(1);
+ mutex_lock(&root->log_mutex);
+ }
wait_for_writer(trans, root);
if (batch == root->log_batch)
break;
goto out;
}
- ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages);
+ /* we start IO on all the marked extents here, but we don't actually
+ * wait for them until later.
+ */
+ ret = btrfs_write_marked_extents(log, &log->dirty_log_pages);
BUG_ON(ret);
btrfs_set_root_node(&log->root_item, log->node);
root->log_batch = 0;
+ log_transid = root->log_transid;
root->log_transid++;
log->log_transid = root->log_transid;
root->log_start_pid = 0;
index2 = log_root_tree->log_transid % 2;
if (atomic_read(&log_root_tree->log_commit[index2])) {
+ btrfs_wait_marked_extents(log, &log->dirty_log_pages);
wait_log_commit(trans, log_root_tree,
log_root_tree->log_transid);
mutex_unlock(&log_root_tree->log_mutex);
* check the full commit flag again
*/
if (root->fs_info->last_trans_log_full_commit == trans->transid) {
+ btrfs_wait_marked_extents(log, &log->dirty_log_pages);
mutex_unlock(&log_root_tree->log_mutex);
ret = -EAGAIN;
goto out_wake_log_root;
ret = btrfs_write_and_wait_marked_extents(log_root_tree,
&log_root_tree->dirty_log_pages);
BUG_ON(ret);
+ btrfs_wait_marked_extents(log, &log->dirty_log_pages);
btrfs_set_super_log_root(&root->fs_info->super_for_commit,
log_root_tree->node->start);
* the running transaction open, so a full commit can't hop
* in and cause problems either.
*/
- write_ctree_super(trans, root->fs_info->tree_root, 2);
+ write_ctree_super(trans, root->fs_info->tree_root, 1);
ret = 0;
+ mutex_lock(&root->log_mutex);
+ if (root->last_log_commit < log_transid)
+ root->last_log_commit = log_transid;
+ mutex_unlock(&root->log_mutex);
+
out_wake_log_root:
atomic_set(&log_root_tree->log_commit[index2], 0);
smp_mb();
return ret;
}
+static int inode_in_log(struct btrfs_trans_handle *trans,
+ struct inode *inode)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ int ret = 0;
+
+ mutex_lock(&root->log_mutex);
+ if (BTRFS_I(inode)->logged_trans == trans->transid &&
+ BTRFS_I(inode)->last_sub_trans <= root->last_log_commit)
+ ret = 1;
+ mutex_unlock(&root->log_mutex);
+ return ret;
+}
+
+
/*
* helper function around btrfs_log_inode to make sure newly created
* parent directories also end up in the log. A minimal inode and backref
if (ret)
goto end_no_trans;
+ if (inode_in_log(trans, inode)) {
+ ret = BTRFS_NO_LOG_SYNC;
+ goto end_no_trans;
+ }
+
start_log_trans(trans, root);
ret = btrfs_log_inode(trans, root, inode, inode_only);
#ifndef __TREE_LOG_
#define __TREE_LOG_
+/* return value for btrfs_log_dentry_safe that means we don't need to log it at all */
+#define BTRFS_NO_LOG_SYNC 256
+
int btrfs_sync_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root);
* attributes are handled directly.
*/
struct xattr_handler *btrfs_xattr_handlers[] = {
-#ifdef CONFIG_BTRFS_POSIX_ACL
+#ifdef CONFIG_BTRFS_FS_POSIX_ACL
&btrfs_xattr_acl_access_handler,
&btrfs_xattr_acl_default_handler,
#endif