*/
void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
{
- struct btrfs_transaction *cur_trans = trans->transaction;
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_space_info *info;
u64 left;
lockdep_assert_held(&fs_info->chunk_mutex);
info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
-again:
spin_lock(&info->lock);
left = info->total_bytes - btrfs_space_info_used(info, true);
spin_unlock(&info->lock);
if (left < thresh) {
u64 flags = btrfs_system_alloc_profile(fs_info);
- u64 reserved = atomic64_read(&cur_trans->chunk_bytes_reserved);
-
- /*
- * If there's not available space for the chunk tree (system
- * space) and there are other tasks that reserved space for
- * creating a new system block group, wait for them to complete
- * the creation of their system block group and release excess
- * reserved space. We do this because:
- *
- * *) We can end up allocating more system chunks than necessary
- * when there are multiple tasks that are concurrently
- * allocating block groups, which can lead to exhaustion of
- * the system array in the superblock;
- *
- * *) If we allocate extra and unnecessary system block groups,
- * despite being empty for a long time, and possibly forever,
- * they end not being added to the list of unused block groups
- * because that typically happens only when deallocating the
- * last extent from a block group - which never happens since
- * we never allocate from them in the first place. The few
- * exceptions are when mounting a filesystem or running scrub,
- * which add unused block groups to the list of unused block
- * groups, to be deleted by the cleaner kthread.
- * And even when they are added to the list of unused block
- * groups, it can take a long time until they get deleted,
- * since the cleaner kthread might be sleeping or busy with
- * other work (deleting subvolumes, running delayed iputs,
- * defrag scheduling, etc);
- *
- * This is rare in practice, but can happen when too many tasks
- * are allocating blocks groups in parallel (via fallocate())
- * and before the one that reserved space for a new system block
- * group finishes the block group creation and releases the space
- * reserved in excess (at btrfs_create_pending_block_groups()),
- * other tasks end up here and see free system space temporarily
- * not enough for updating the chunk tree.
- *
- * We unlock the chunk mutex before waiting for such tasks and
- * lock it again after the wait, otherwise we would deadlock.
- * It is safe to do so because allocating a system chunk is the
- * first thing done while allocating a new block group.
- */
- if (reserved > trans->chunk_bytes_reserved) {
- const u64 min_needed = reserved - thresh;
-
- mutex_unlock(&fs_info->chunk_mutex);
- wait_event(cur_trans->chunk_reserve_wait,
- atomic64_read(&cur_trans->chunk_bytes_reserved) <=
- min_needed);
- mutex_lock(&fs_info->chunk_mutex);
- goto again;
- }
/*
* Ignore failure to create system chunk. We might end up not
ret = btrfs_block_rsv_add(fs_info->chunk_root,
&fs_info->chunk_block_rsv,
thresh, BTRFS_RESERVE_NO_FLUSH);
- if (!ret) {
- atomic64_add(thresh, &cur_trans->chunk_bytes_reserved);
+ if (!ret)
trans->chunk_bytes_reserved += thresh;
- }
}
}
void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_transaction *cur_trans = trans->transaction;
if (!trans->chunk_bytes_reserved)
return;
btrfs_block_rsv_release(fs_info, &fs_info->chunk_block_rsv,
trans->chunk_bytes_reserved, NULL);
- atomic64_sub(trans->chunk_bytes_reserved, &cur_trans->chunk_bytes_reserved);
- cond_wake_up(&cur_trans->chunk_reserve_wait);
trans->chunk_bytes_reserved = 0;
}
spin_lock_init(&cur_trans->dropped_roots_lock);
INIT_LIST_HEAD(&cur_trans->releasing_ebs);
spin_lock_init(&cur_trans->releasing_ebs_lock);
- atomic64_set(&cur_trans->chunk_bytes_reserved, 0);
- init_waitqueue_head(&cur_trans->chunk_reserve_wait);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
extent_io_tree_init(fs_info, &cur_trans->dirty_pages,
IO_TREE_TRANS_DIRTY_PAGES, fs_info->btree_inode);