#include <linux/types.h>
#include <linux/workqueue.h>
+#include "bset.h"
#include "util.h"
#include "closure.h"
*/
mempool_t *fill_iter;
- mempool_t *sort_pool;
- unsigned sort_crit_factor;
+ struct bset_sort_state sort;
/* List of buckets we're currently writing data to */
struct list_head data_buckets;
unsigned congested_read_threshold_us;
unsigned congested_write_threshold_us;
- struct time_stats sort_time;
struct time_stats btree_gc_time;
struct time_stats btree_split_time;
struct time_stats btree_read_time;
/* Mergesort */
+void bch_bset_sort_state_free(struct bset_sort_state *state)
+{
+ if (state->pool)
+ mempool_destroy(state->pool);
+}
+
+int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order)
+{
+ spin_lock_init(&state->time.lock);
+
+ state->page_order = page_order;
+ state->crit_factor = int_sqrt(1 << page_order);
+
+ state->pool = mempool_create_page_pool(1, page_order);
+ if (!state->pool)
+ return -ENOMEM;
+
+ return 0;
+}
+
static void sort_key_next(struct btree_iter *iter,
struct btree_iter_set *i)
{
}
static void __btree_sort(struct btree *b, struct btree_iter *iter,
- unsigned start, unsigned order, bool fixup)
+ unsigned start, unsigned order, bool fixup,
+ struct bset_sort_state *state)
{
uint64_t start_time;
- bool remove_stale = !b->written;
bool used_mempool = false;
struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO,
order);
if (!out) {
- out = page_address(mempool_alloc(b->c->sort_pool, GFP_NOIO));
+ BUG_ON(order > state->page_order);
+
+ out = page_address(mempool_alloc(state->pool, GFP_NOIO));
used_mempool = true;
order = ilog2(bucket_pages(b->c));
}
start_time = local_clock();
- btree_mergesort(b, out, iter, fixup, remove_stale);
+ btree_mergesort(b, out, iter, fixup, false);
b->nsets = start;
if (!start && order == b->page_order) {
}
if (used_mempool)
- mempool_free(virt_to_page(out), b->c->sort_pool);
+ mempool_free(virt_to_page(out), state->pool);
else
free_pages((unsigned long) out, order);
- if (b->written)
- bset_build_written_tree(b);
+ bset_build_written_tree(b);
if (!start)
- bch_time_stats_update(&b->c->sort_time, start_time);
+ bch_time_stats_update(&state->time, start_time);
}
-void bch_btree_sort_partial(struct btree *b, unsigned start)
+void bch_btree_sort_partial(struct btree *b, unsigned start,
+ struct bset_sort_state *state)
{
size_t order = b->page_order, keys = 0;
struct btree_iter iter;
order = ilog2(order);
}
- __btree_sort(b, &iter, start, order, false);
+ __btree_sort(b, &iter, start, order, false, state);
EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize);
}
-void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter)
+void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter,
+ struct bset_sort_state *state)
{
- BUG_ON(!b->written);
- __btree_sort(b, iter, 0, b->page_order, true);
+ __btree_sort(b, iter, 0, b->page_order, true, state);
}
-void bch_btree_sort_into(struct btree *b, struct btree *new)
+void bch_btree_sort_into(struct btree *b, struct btree *new,
+ struct bset_sort_state *state)
{
uint64_t start_time = local_clock();
btree_mergesort(b, new->sets->data, &iter, false, true);
- bch_time_stats_update(&b->c->sort_time, start_time);
+ bch_time_stats_update(&state->time, start_time);
- bkey_copy_key(&new->key, &b->key);
new->sets->size = 0;
}
#define SORT_CRIT (4096 / sizeof(uint64_t))
-void bch_btree_sort_lazy(struct btree *b)
+void bch_btree_sort_lazy(struct btree *b, struct bset_sort_state *state)
{
unsigned crit = SORT_CRIT;
int i;
if (!b->nsets)
goto out;
- /* If not a leaf node, always sort */
- if (b->level) {
- bch_btree_sort(b);
- return;
- }
-
for (i = b->nsets - 1; i >= 0; --i) {
- crit *= b->c->sort_crit_factor;
+ crit *= state->crit_factor;
if (b->sets[i].data->keys < crit) {
- bch_btree_sort_partial(b, i);
+ bch_btree_sort_partial(b, i, state);
return;
}
}
/* Sort if we'd overflow */
if (b->nsets + 1 == MAX_BSETS) {
- bch_btree_sort(b);
+ bch_btree_sort(b, state);
return;
}
#include <linux/slab.h>
+#include "util.h" /* for time_stats */
+
/*
* BKEYS:
*
struct bset *data;
};
+/* Sorting */
+
+struct bset_sort_state {
+ mempool_t *pool;
+
+ unsigned page_order;
+ unsigned crit_factor;
+
+ struct time_stats time;
+};
+
+void bch_bset_sort_state_free(struct bset_sort_state *);
+int bch_bset_sort_state_init(struct bset_sort_state *, unsigned);
+void bch_btree_sort_lazy(struct btree *, struct bset_sort_state *);
+void bch_btree_sort_into(struct btree *, struct btree *,
+ struct bset_sort_state *);
+void bch_btree_sort_and_fix_extents(struct btree *, struct btree_iter *,
+ struct bset_sort_state *);
+void bch_btree_sort_partial(struct btree *, unsigned,
+ struct bset_sort_state *);
+
+static inline void bch_btree_sort(struct btree *b,
+ struct bset_sort_state *state)
+{
+ bch_btree_sort_partial(b, 0, state);
+}
+
/* Keylists */
struct keylist {
})
bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *);
-void bch_btree_sort_lazy(struct btree *);
-void bch_btree_sort_into(struct btree *, struct btree *);
-void bch_btree_sort_and_fix_extents(struct btree *, struct btree_iter *);
-void bch_btree_sort_partial(struct btree *, unsigned);
-
-static inline void bch_btree_sort(struct btree *b)
-{
- bch_btree_sort_partial(b, 0);
-}
int bch_bset_print_stats(struct cache_set *, char *);
if (i->seq == b->sets[0].data->seq)
goto err;
- bch_btree_sort_and_fix_extents(b, iter);
+ bch_btree_sort_and_fix_extents(b, iter, &b->c->sort);
i = b->sets[0].data;
err = "short btree key";
atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size,
&PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written);
- bch_btree_sort_lazy(b);
+ /* If not a leaf node, always sort */
+ if (b->level && b->nsets)
+ bch_btree_sort(b, &b->c->sort);
+ else
+ bch_btree_sort_lazy(b, &b->c->sort);
/*
* do verify if there was more than one set initially (i.e. we did a
static struct btree *btree_node_alloc_replacement(struct btree *b, bool wait)
{
struct btree *n = bch_btree_node_alloc(b->c, b->level, wait);
- if (!IS_ERR_OR_NULL(n))
- bch_btree_sort_into(b, n);
+ if (!IS_ERR_OR_NULL(n)) {
+ bch_btree_sort_into(b, n, &b->c->sort);
+ bkey_copy_key(&n->key, &b->key);
+ }
return n;
}
if (ca)
kobject_put(&ca->kobj);
+ bch_bset_sort_state_free(&c->sort);
free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
if (c->bio_split)
c->btree_pages = max_t(int, c->btree_pages / 4,
BTREE_MAX_PAGES);
- c->sort_crit_factor = int_sqrt(c->btree_pages);
-
sema_init(&c->sb_write_mutex, 1);
mutex_init(&c->bucket_lock);
init_waitqueue_head(&c->try_wait);
init_waitqueue_head(&c->bucket_wait);
sema_init(&c->uuid_write_mutex, 1);
- spin_lock_init(&c->sort_time.lock);
spin_lock_init(&c->btree_gc_time.lock);
spin_lock_init(&c->btree_split_time.lock);
spin_lock_init(&c->btree_read_time.lock);
bucket_pages(c))) ||
!(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
!(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
- !(c->sort_pool = mempool_create_page_pool(1,
- ilog2(bucket_pages(c)))) ||
!(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
bch_journal_alloc(c) ||
bch_btree_cache_alloc(c) ||
- bch_open_buckets_alloc(c))
+ bch_open_buckets_alloc(c) ||
+ bch_bset_sort_state_init(&c->sort, ilog2(c->btree_pages)))
goto err;
c->congested_read_threshold_us = 2000;
sysfs_print_time_stats(&c->btree_gc_time, btree_gc, sec, ms);
sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us);
- sysfs_print_time_stats(&c->sort_time, btree_sort, ms, us);
+ sysfs_print_time_stats(&c->sort.time, btree_sort, ms, us);
sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us);
sysfs_print_time_stats(&c->try_harder_time, try_harder, ms, us);