bcache: make bch_btree_check() to be multithreaded
authorColy Li <colyli@suse.de>
Sun, 22 Mar 2020 06:03:01 +0000 (14:03 +0800)
committerJens Axboe <axboe@kernel.dk>
Sun, 22 Mar 2020 16:06:57 +0000 (10:06 -0600)
When registering a cache device, bch_btree_check() is called to check
all btree nodes, to make sure the btree is consistent and not
corrupted.

bch_btree_check() is recursively executed in a single thread, when there
are a lot of data cached and the btree is huge, it may take very long
time to check all the btree nodes. In my testing, I observed it took
around 50 minutes to finish bch_btree_check().

When checking the bcache btree nodes, the cache set is not running yet,
and indeed the whole tree is in read-only state, it is safe to create
multiple threads to check the btree in parallel.

This patch tries to create multiple threads, and each thread tries to
one-by-one check the sub-tree indexed by a key from the btree root node.
The parallel thread number depends on how many keys in the btree root
node. At most BCH_BTR_CHKTHREAD_MAX (64) threads can be created, but in
practice is should be min(cpu-number/2, root-node-keys-number).

Signed-off-by: Coly Li <colyli@suse.de>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/md/bcache/btree.c
drivers/md/bcache/btree.h

index faf1525..74d66b6 100644 (file)
@@ -1897,13 +1897,176 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
        return ret;
 }
 
+
+static int bch_btree_check_thread(void *arg)
+{
+       int ret;
+       struct btree_check_info *info = arg;
+       struct btree_check_state *check_state = info->state;
+       struct cache_set *c = check_state->c;
+       struct btree_iter iter;
+       struct bkey *k, *p;
+       int cur_idx, prev_idx, skip_nr;
+       int i, n;
+
+       k = p = NULL;
+       i = n = 0;
+       cur_idx = prev_idx = 0;
+       ret = 0;
+
+       /* root node keys are checked before thread created */
+       bch_btree_iter_init(&c->root->keys, &iter, NULL);
+       k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
+       BUG_ON(!k);
+
+       p = k;
+       while (k) {
+               /*
+                * Fetch a root node key index, skip the keys which
+                * should be fetched by other threads, then check the
+                * sub-tree indexed by the fetched key.
+                */
+               spin_lock(&check_state->idx_lock);
+               cur_idx = check_state->key_idx;
+               check_state->key_idx++;
+               spin_unlock(&check_state->idx_lock);
+
+               skip_nr = cur_idx - prev_idx;
+
+               while (skip_nr) {
+                       k = bch_btree_iter_next_filter(&iter,
+                                                      &c->root->keys,
+                                                      bch_ptr_bad);
+                       if (k)
+                               p = k;
+                       else {
+                               /*
+                                * No more keys to check in root node,
+                                * current checking threads are enough,
+                                * stop creating more.
+                                */
+                               atomic_set(&check_state->enough, 1);
+                               /* Update check_state->enough earlier */
+                               smp_mb();
+                               goto out;
+                       }
+                       skip_nr--;
+                       cond_resched();
+               }
+
+               if (p) {
+                       struct btree_op op;
+
+                       btree_node_prefetch(c->root, p);
+                       c->gc_stats.nodes++;
+                       bch_btree_op_init(&op, 0);
+                       ret = bcache_btree(check_recurse, p, c->root, &op);
+                       if (ret)
+                               goto out;
+               }
+               p = NULL;
+               prev_idx = cur_idx;
+               cond_resched();
+       }
+
+out:
+       info->result = ret;
+       /* update check_state->started among all CPUs */
+       smp_mb();
+       if (atomic_dec_and_test(&check_state->started))
+               wake_up(&check_state->wait);
+
+       return ret;
+}
+
+
+
+static int bch_btree_chkthread_nr(void)
+{
+       int n = num_online_cpus()/2;
+
+       if (n == 0)
+               n = 1;
+       else if (n > BCH_BTR_CHKTHREAD_MAX)
+               n = BCH_BTR_CHKTHREAD_MAX;
+
+       return n;
+}
+
 int bch_btree_check(struct cache_set *c)
 {
-       struct btree_op op;
+       int ret = 0;
+       int i;
+       struct bkey *k = NULL;
+       struct btree_iter iter;
+       struct btree_check_state *check_state;
+       char name[32];
 
-       bch_btree_op_init(&op, SHRT_MAX);
+       /* check and mark root node keys */
+       for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid)
+               bch_initial_mark_key(c, c->root->level, k);
+
+       bch_initial_mark_key(c, c->root->level + 1, &c->root->key);
+
+       if (c->root->level == 0)
+               return 0;
+
+       check_state = kzalloc(sizeof(struct btree_check_state), GFP_KERNEL);
+       if (!check_state)
+               return -ENOMEM;
+
+       check_state->c = c;
+       check_state->total_threads = bch_btree_chkthread_nr();
+       check_state->key_idx = 0;
+       spin_lock_init(&check_state->idx_lock);
+       atomic_set(&check_state->started, 0);
+       atomic_set(&check_state->enough, 0);
+       init_waitqueue_head(&check_state->wait);
 
-       return bcache_btree_root(check_recurse, c, &op);
+       /*
+        * Run multiple threads to check btree nodes in parallel,
+        * if check_state->enough is non-zero, it means current
+        * running check threads are enough, unncessary to create
+        * more.
+        */
+       for (i = 0; i < check_state->total_threads; i++) {
+               /* fetch latest check_state->enough earlier */
+               smp_mb();
+               if (atomic_read(&check_state->enough))
+                       break;
+
+               check_state->infos[i].result = 0;
+               check_state->infos[i].state = check_state;
+               snprintf(name, sizeof(name), "bch_btrchk[%u]", i);
+               atomic_inc(&check_state->started);
+
+               check_state->infos[i].thread =
+                       kthread_run(bch_btree_check_thread,
+                                   &check_state->infos[i],
+                                   name);
+               if (IS_ERR(check_state->infos[i].thread)) {
+                       pr_err("fails to run thread bch_btrchk[%d]", i);
+                       for (--i; i >= 0; i--)
+                               kthread_stop(check_state->infos[i].thread);
+                       ret = -ENOMEM;
+                       goto out;
+               }
+       }
+
+       wait_event_interruptible(check_state->wait,
+                                atomic_read(&check_state->started) == 0 ||
+                                 test_bit(CACHE_SET_IO_DISABLE, &c->flags));
+
+       for (i = 0; i < check_state->total_threads; i++) {
+               if (check_state->infos[i].result) {
+                       ret = check_state->infos[i].result;
+                       goto out;
+               }
+       }
+
+out:
+       kfree(check_state);
+       return ret;
 }
 
 void bch_initial_gc_finish(struct cache_set *c)
index 19e3026..7c884f2 100644 (file)
@@ -145,6 +145,9 @@ struct btree {
        struct bio              *bio;
 };
 
+
+
+
 #define BTREE_FLAG(flag)                                               \
 static inline bool btree_node_ ## flag(struct btree *b)                        \
 {      return test_bit(BTREE_NODE_ ## flag, &b->flags); }              \
@@ -216,6 +219,25 @@ struct btree_op {
        unsigned int            insert_collision:1;
 };
 
+struct btree_check_state;
+struct btree_check_info {
+       struct btree_check_state        *state;
+       struct task_struct              *thread;
+       int                             result;
+};
+
+#define BCH_BTR_CHKTHREAD_MAX  64
+struct btree_check_state {
+       struct cache_set                *c;
+       int                             total_threads;
+       int                             key_idx;
+       spinlock_t                      idx_lock;
+       atomic_t                        started;
+       atomic_t                        enough;
+       wait_queue_head_t               wait;
+       struct btree_check_info         infos[BCH_BTR_CHKTHREAD_MAX];
+};
+
 static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
 {
        memset(op, 0, sizeof(struct btree_op));