};
/*
+ * This describes the state of the block_group for async discard. This is due
+ * to the two pass nature of it where extent discarding is prioritized over
+ * bitmap discarding. BTRFS_DISCARD_RESET_CURSOR is set when we are resetting
+ * between lists to prevent contention for discard state variables
+ * (eg. discard_cursor).
+ */
+enum btrfs_discard_state {
+ BTRFS_DISCARD_EXTENTS,
+ BTRFS_DISCARD_BITMAPS,
+ BTRFS_DISCARD_RESET_CURSOR,
+};
+
+/*
* Control flags for do_chunk_alloc's force field CHUNK_ALLOC_NO_FORCE means to
* only allocate a chunk if we really need one.
*
struct list_head discard_list;
int discard_index;
u64 discard_eligible_time;
+ u64 discard_cursor;
+ enum btrfs_discard_state discard_state;
/* For dirty block groups */
struct list_head dirty_list;
return &discard_ctl->discard_list[block_group->discard_index];
}
-static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
- struct btrfs_block_group *block_group)
+static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
+ struct btrfs_block_group *block_group)
{
- spin_lock(&discard_ctl->lock);
-
- if (!btrfs_run_discard_work(discard_ctl)) {
- spin_unlock(&discard_ctl->lock);
+ if (!btrfs_run_discard_work(discard_ctl))
return;
- }
if (list_empty(&block_group->discard_list) ||
block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
block_group->discard_index = BTRFS_DISCARD_INDEX_START;
block_group->discard_eligible_time = (ktime_get_ns() +
BTRFS_DISCARD_DELAY);
+ block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
}
list_move_tail(&block_group->discard_list,
get_discard_list(discard_ctl, block_group));
+}
+static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
+ struct btrfs_block_group *block_group)
+{
+ spin_lock(&discard_ctl->lock);
+ __add_to_discard_list(discard_ctl, block_group);
spin_unlock(&discard_ctl->lock);
}
block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
block_group->discard_eligible_time = (ktime_get_ns() +
BTRFS_DISCARD_UNUSED_DELAY);
+ block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
list_add_tail(&block_group->discard_list,
&discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
/**
* peek_discard_list - wrap find_next_block_group()
* @discard_ctl: discard control
+ * @discard_state: the discard_state of the block_group after state management
*
* This wraps find_next_block_group() and sets the block_group to be in use.
+ * discard_state's control flow is managed here. Variables related to
+ * discard_state are reset here as needed (eg. discard_cursor). @discard_state
+ * is remembered as it may change while we're discarding, but we want the
+ * discard to execute in the context determined here.
*/
static struct btrfs_block_group *peek_discard_list(
- struct btrfs_discard_ctl *discard_ctl)
+ struct btrfs_discard_ctl *discard_ctl,
+ enum btrfs_discard_state *discard_state)
{
struct btrfs_block_group *block_group;
const u64 now = ktime_get_ns();
spin_lock(&discard_ctl->lock);
-
+again:
block_group = find_next_block_group(discard_ctl, now);
- if (block_group && now < block_group->discard_eligible_time)
+ if (block_group && now > block_group->discard_eligible_time) {
+ if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
+ block_group->used != 0) {
+ __add_to_discard_list(discard_ctl, block_group);
+ goto again;
+ }
+ if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
+ block_group->discard_cursor = block_group->start;
+ block_group->discard_state = BTRFS_DISCARD_EXTENTS;
+ }
+ discard_ctl->block_group = block_group;
+ *discard_state = block_group->discard_state;
+ } else {
block_group = NULL;
-
- discard_ctl->block_group = block_group;
+ }
spin_unlock(&discard_ctl->lock);
* btrfs_discard_workfn - discard work function
* @work: work
*
- * This finds the next block_group to start discarding and then discards it.
+ * This finds the next block_group to start discarding and then discards a
+ * single region. It does this in a two-pass fashion: first extents and second
+ * bitmaps. Completely discarded block groups are sent to the unused_bgs path.
*/
static void btrfs_discard_workfn(struct work_struct *work)
{
struct btrfs_discard_ctl *discard_ctl;
struct btrfs_block_group *block_group;
+ enum btrfs_discard_state discard_state;
u64 trimmed = 0;
discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
- block_group = peek_discard_list(discard_ctl);
+ block_group = peek_discard_list(discard_ctl, &discard_state);
if (!block_group || !btrfs_run_discard_work(discard_ctl))
return;
- btrfs_trim_block_group(block_group, &trimmed, block_group->start,
- btrfs_block_group_end(block_group), 0);
+ /* Perform discarding */
+ if (discard_state == BTRFS_DISCARD_BITMAPS)
+ btrfs_trim_block_group_bitmaps(block_group, &trimmed,
+ block_group->discard_cursor,
+ btrfs_block_group_end(block_group),
+ 0, true);
+ else
+ btrfs_trim_block_group_extents(block_group, &trimmed,
+ block_group->discard_cursor,
+ btrfs_block_group_end(block_group),
+ 0, true);
+
+ /* Determine next steps for a block_group */
+ if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
+ if (discard_state == BTRFS_DISCARD_BITMAPS) {
+ btrfs_finish_discard_pass(discard_ctl, block_group);
+ } else {
+ block_group->discard_cursor = block_group->start;
+ spin_lock(&discard_ctl->lock);
+ if (block_group->discard_state !=
+ BTRFS_DISCARD_RESET_CURSOR)
+ block_group->discard_state =
+ BTRFS_DISCARD_BITMAPS;
+ spin_unlock(&discard_ctl->lock);
+ }
+ }
+
+ spin_lock(&discard_ctl->lock);
+ discard_ctl->block_group = NULL;
+ spin_unlock(&discard_ctl->lock);
- btrfs_finish_discard_pass(discard_ctl, block_group);
btrfs_discard_schedule_work(discard_ctl, false);
}
return ret;
}
+/*
+ * If @async is set, then we will trim 1 region and return.
+ */
static int trim_no_bitmap(struct btrfs_block_group *block_group,
- u64 *total_trimmed, u64 start, u64 end, u64 minlen)
+ u64 *total_trimmed, u64 start, u64 end, u64 minlen,
+ bool async)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *entry;
mutex_lock(&ctl->cache_writeout_mutex);
spin_lock(&ctl->tree_lock);
- if (ctl->free_space < minlen) {
- spin_unlock(&ctl->tree_lock);
- mutex_unlock(&ctl->cache_writeout_mutex);
- break;
- }
+ if (ctl->free_space < minlen)
+ goto out_unlock;
entry = tree_search_offset(ctl, start, 0, 1);
- if (!entry) {
- spin_unlock(&ctl->tree_lock);
- mutex_unlock(&ctl->cache_writeout_mutex);
- break;
- }
+ if (!entry)
+ goto out_unlock;
- /* skip bitmaps */
- while (entry->bitmap) {
+ /* Skip bitmaps and if async, already trimmed entries */
+ while (entry->bitmap ||
+ (async && btrfs_free_space_trimmed(entry))) {
node = rb_next(&entry->offset_index);
- if (!node) {
- spin_unlock(&ctl->tree_lock);
- mutex_unlock(&ctl->cache_writeout_mutex);
- goto out;
- }
+ if (!node)
+ goto out_unlock;
entry = rb_entry(node, struct btrfs_free_space,
offset_index);
}
- if (entry->offset >= end) {
- spin_unlock(&ctl->tree_lock);
- mutex_unlock(&ctl->cache_writeout_mutex);
- break;
- }
+ if (entry->offset >= end)
+ goto out_unlock;
extent_start = entry->offset;
extent_bytes = entry->bytes;
ret = do_trimming(block_group, total_trimmed, start, bytes,
extent_start, extent_bytes, extent_trim_state,
&trim_entry);
- if (ret)
+ if (ret) {
+ block_group->discard_cursor = start + bytes;
break;
+ }
next:
start += bytes;
+ block_group->discard_cursor = start;
+ if (async && *total_trimmed)
+ break;
if (fatal_signal_pending(current)) {
ret = -ERESTARTSYS;
cond_resched();
}
-out:
+
+ return ret;
+
+out_unlock:
+ block_group->discard_cursor = btrfs_block_group_end(block_group);
+ spin_unlock(&ctl->tree_lock);
+ mutex_unlock(&ctl->cache_writeout_mutex);
+
return ret;
}
entry->trim_state = BTRFS_TRIM_STATE_TRIMMED;
}
+/*
+ * If @async is set, then we will trim 1 region and return.
+ */
static int trim_bitmaps(struct btrfs_block_group *block_group,
- u64 *total_trimmed, u64 start, u64 end, u64 minlen)
+ u64 *total_trimmed, u64 start, u64 end, u64 minlen,
+ bool async)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *entry;
spin_lock(&ctl->tree_lock);
if (ctl->free_space < minlen) {
+ block_group->discard_cursor =
+ btrfs_block_group_end(block_group);
spin_unlock(&ctl->tree_lock);
mutex_unlock(&ctl->cache_writeout_mutex);
break;
}
entry = tree_search_offset(ctl, offset, 1, 0);
- if (!entry || btrfs_free_space_trimmed(entry)) {
+ if (!entry || (async && start == offset &&
+ btrfs_free_space_trimmed(entry))) {
spin_unlock(&ctl->tree_lock);
mutex_unlock(&ctl->cache_writeout_mutex);
next_bitmap = true;
goto next;
}
+ /*
+ * We already trimmed a region, but are using the locking above
+ * to reset the trim_state.
+ */
+ if (async && *total_trimmed) {
+ spin_unlock(&ctl->tree_lock);
+ mutex_unlock(&ctl->cache_writeout_mutex);
+ goto out;
+ }
+
bytes = min(bytes, end - start);
if (bytes < minlen) {
entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
start, bytes, 0, &trim_entry);
if (ret) {
reset_trimming_bitmap(ctl, offset);
+ block_group->discard_cursor =
+ btrfs_block_group_end(block_group);
break;
}
next:
} else {
start += bytes;
}
+ block_group->discard_cursor = start;
if (fatal_signal_pending(current)) {
if (start != offset)
cond_resched();
}
+ if (offset >= end)
+ block_group->discard_cursor = end;
+
+out:
return ret;
}
btrfs_get_block_group_trimming(block_group);
spin_unlock(&block_group->lock);
- ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
+ ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false);
if (ret)
goto out;
- ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
+ ret = trim_bitmaps(block_group, trimmed, start, end, minlen, false);
div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem);
/* If we ended in the middle of a bitmap, reset the trimming flag */
if (rem)
return ret;
}
+int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
+ u64 *trimmed, u64 start, u64 end, u64 minlen,
+ bool async)
+{
+ int ret;
+
+ *trimmed = 0;
+
+ spin_lock(&block_group->lock);
+ if (block_group->removed) {
+ spin_unlock(&block_group->lock);
+ return 0;
+ }
+ btrfs_get_block_group_trimming(block_group);
+ spin_unlock(&block_group->lock);
+
+ ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async);
+ btrfs_put_block_group_trimming(block_group);
+
+ return ret;
+}
+
+int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
+ u64 *trimmed, u64 start, u64 end, u64 minlen,
+ bool async)
+{
+ int ret;
+
+ *trimmed = 0;
+
+ spin_lock(&block_group->lock);
+ if (block_group->removed) {
+ spin_unlock(&block_group->lock);
+ return 0;
+ }
+ btrfs_get_block_group_trimming(block_group);
+ spin_unlock(&block_group->lock);
+
+ ret = trim_bitmaps(block_group, trimmed, start, end, minlen, async);
+ btrfs_put_block_group_trimming(block_group);
+
+ return ret;
+}
+
/*
* Find the left-most item in the cache tree, and then return the
* smallest inode number in the item.
struct btrfs_free_cluster *cluster);
int btrfs_trim_block_group(struct btrfs_block_group *block_group,
u64 *trimmed, u64 start, u64 end, u64 minlen);
+int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
+ u64 *trimmed, u64 start, u64 end, u64 minlen,
+ bool async);
+int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
+ u64 *trimmed, u64 start, u64 end, u64 minlen,
+ bool async);
/* Support functions for running our sanity tests */
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS