bcache: Fix/revamp tracepoints
authorKent Overstreet <koverstreet@google.com>
Fri, 26 Apr 2013 22:39:55 +0000 (15:39 -0700)
committerKent Overstreet <koverstreet@google.com>
Thu, 27 Jun 2013 00:09:15 +0000 (17:09 -0700)
The tracepoints were reworked to be more sensible, and fixed a null
pointer deref in one of the tracepoints.

Converted some of the pr_debug()s to tracepoints - this is partly a
performance optimization; it used to be that with DEBUG or
CONFIG_DYNAMIC_DEBUG pr_debug() was an empty macro; but at some point it
was changed to an empty inline function.

Some of the pr_debug() statements had rather expensive function calls as
part of the arguments, so this code was getting run unnecessarily even
on non debug kernels - in some fast paths, too.

Signed-off-by: Kent Overstreet <koverstreet@google.com>
15 files changed:
drivers/md/bcache/alloc.c
drivers/md/bcache/bcache.h
drivers/md/bcache/bset.h
drivers/md/bcache/btree.c
drivers/md/bcache/io.c
drivers/md/bcache/journal.c
drivers/md/bcache/movinggc.c
drivers/md/bcache/request.c
drivers/md/bcache/request.h
drivers/md/bcache/super.c
drivers/md/bcache/sysfs.c
drivers/md/bcache/trace.c
drivers/md/bcache/util.h
drivers/md/bcache/writeback.c
include/trace/events/bcache.h

index 38428f4..b54b73b 100644 (file)
@@ -65,6 +65,7 @@
 
 #include <linux/kthread.h>
 #include <linux/random.h>
+#include <trace/events/bcache.h>
 
 #define MAX_IN_FLIGHT_DISCARDS         8U
 
@@ -351,10 +352,7 @@ static void invalidate_buckets(struct cache *ca)
                break;
        }
 
-       pr_debug("free %zu/%zu free_inc %zu/%zu unused %zu/%zu",
-                fifo_used(&ca->free), ca->free.size,
-                fifo_used(&ca->free_inc), ca->free_inc.size,
-                fifo_used(&ca->unused), ca->unused.size);
+       trace_bcache_alloc_invalidate(ca);
 }
 
 #define allocator_wait(ca, cond)                                       \
@@ -473,9 +471,7 @@ again:
                return r;
        }
 
-       pr_debug("alloc failure: blocked %i free %zu free_inc %zu unused %zu",
-                atomic_read(&ca->set->prio_blocked), fifo_used(&ca->free),
-                fifo_used(&ca->free_inc), fifo_used(&ca->unused));
+       trace_bcache_alloc_fail(ca);
 
        if (cl) {
                closure_wait(&ca->set->bucket_wait, cl);
index ad4957b..59c15e0 100644 (file)
 #define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
 
 #include <linux/bio.h>
-#include <linux/blktrace_api.h>
 #include <linux/kobject.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
@@ -901,8 +900,6 @@ static inline unsigned local_clock_us(void)
        return local_clock() >> 10;
 }
 
-#define MAX_BSETS              4U
-
 #define BTREE_PRIO             USHRT_MAX
 #define INITIAL_PRIO           32768
 
@@ -1107,23 +1104,6 @@ static inline void __bkey_put(struct cache_set *c, struct bkey *k)
                atomic_dec_bug(&PTR_BUCKET(c, k, i)->pin);
 }
 
-/* Blktrace macros */
-
-#define blktrace_msg(c, fmt, ...)                                      \
-do {                                                                   \
-       struct request_queue *q = bdev_get_queue(c->bdev);              \
-       if (q)                                                          \
-               blk_add_trace_msg(q, fmt, ##__VA_ARGS__);               \
-} while (0)
-
-#define blktrace_msg_all(s, fmt, ...)                                  \
-do {                                                                   \
-       struct cache *_c;                                               \
-       unsigned i;                                                     \
-       for_each_cache(_c, (s), i)                                      \
-               blktrace_msg(_c, fmt, ##__VA_ARGS__);                   \
-} while (0)
-
 static inline void cached_dev_put(struct cached_dev *dc)
 {
        if (atomic_dec_and_test(&dc->count))
index 57a9cff..ae115a2 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _BCACHE_BSET_H
 #define _BCACHE_BSET_H
 
+#include <linux/slab.h>
+
 /*
  * BKEYS:
  *
 
 /* Btree key comparison/iteration */
 
+#define MAX_BSETS              4U
+
 struct btree_iter {
        size_t size, used;
        struct btree_iter_set {
index aaec186..218d486 100644 (file)
@@ -223,8 +223,9 @@ void bch_btree_node_read(struct btree *b)
        struct closure cl;
        struct bio *bio;
 
+       trace_bcache_btree_read(b);
+
        closure_init_stack(&cl);
-       pr_debug("%s", pbtree(b));
 
        bio = bch_bbio_alloc(b->c);
        bio->bi_rw      = REQ_META|READ_SYNC;
@@ -234,7 +235,6 @@ void bch_btree_node_read(struct btree *b)
 
        bch_bio_map(bio, b->sets[0].data);
 
-       trace_bcache_btree_read(bio);
        bch_submit_bbio(bio, b->c, &b->key, 0);
        closure_sync(&cl);
 
@@ -343,7 +343,6 @@ static void do_btree_node_write(struct btree *b)
                        memcpy(page_address(bv->bv_page),
                               base + j * PAGE_SIZE, PAGE_SIZE);
 
-               trace_bcache_btree_write(b->bio);
                bch_submit_bbio(b->bio, b->c, &k.key, 0);
 
                continue_at(cl, btree_node_write_done, NULL);
@@ -351,7 +350,6 @@ static void do_btree_node_write(struct btree *b)
                b->bio->bi_vcnt = 0;
                bch_bio_map(b->bio, i);
 
-               trace_bcache_btree_write(b->bio);
                bch_submit_bbio(b->bio, b->c, &k.key, 0);
 
                closure_sync(cl);
@@ -363,10 +361,13 @@ void bch_btree_node_write(struct btree *b, struct closure *parent)
 {
        struct bset *i = b->sets[b->nsets].data;
 
+       trace_bcache_btree_write(b);
+
        BUG_ON(current->bio_list);
        BUG_ON(b->written >= btree_blocks(b));
        BUG_ON(b->written && !i->keys);
        BUG_ON(b->sets->data->seq != i->seq);
+       bch_check_key_order(b, i);
 
        cancel_delayed_work(&b->work);
 
@@ -376,12 +377,8 @@ void bch_btree_node_write(struct btree *b, struct closure *parent)
        clear_bit(BTREE_NODE_dirty,      &b->flags);
        change_bit(BTREE_NODE_write_idx, &b->flags);
 
-       bch_check_key_order(b, i);
-
        do_btree_node_write(b);
 
-       pr_debug("%s block %i keys %i", pbtree(b), b->written, i->keys);
-
        b->written += set_blocks(i, b->c);
        atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size,
                        &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written);
@@ -752,6 +749,8 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k,
        int ret = -ENOMEM;
        struct btree *i;
 
+       trace_bcache_btree_cache_cannibalize(c);
+
        if (!cl)
                return ERR_PTR(-ENOMEM);
 
@@ -770,7 +769,6 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k,
                return ERR_PTR(-EAGAIN);
        }
 
-       /* XXX: tracepoint */
        c->try_harder = cl;
        c->try_harder_start = local_clock();
 retry:
@@ -956,13 +954,14 @@ static void btree_node_free(struct btree *b, struct btree_op *op)
 {
        unsigned i;
 
+       trace_bcache_btree_node_free(b);
+
        /*
         * The BUG_ON() in btree_node_get() implies that we must have a write
         * lock on parent to free or even invalidate a node
         */
        BUG_ON(op->lock <= b->level);
        BUG_ON(b == b->c->root);
-       pr_debug("bucket %s", pbtree(b));
 
        if (btree_node_dirty(b))
                btree_complete_write(b, btree_current_write(b));
@@ -1012,12 +1011,16 @@ retry:
        bch_bset_init_next(b);
 
        mutex_unlock(&c->bucket_lock);
+
+       trace_bcache_btree_node_alloc(b);
        return b;
 err_free:
        bch_bucket_free(c, &k.key);
        __bkey_put(c, &k.key);
 err:
        mutex_unlock(&c->bucket_lock);
+
+       trace_bcache_btree_node_alloc_fail(b);
        return b;
 }
 
@@ -1254,7 +1257,7 @@ static void btree_gc_coalesce(struct btree *b, struct btree_op *op,
        btree_node_free(r->b, op);
        up_write(&r->b->lock);
 
-       pr_debug("coalesced %u nodes", nodes);
+       trace_bcache_btree_gc_coalesce(nodes);
 
        gc->nodes--;
        nodes--;
@@ -1479,8 +1482,7 @@ static void bch_btree_gc(struct closure *cl)
        struct btree_op op;
        uint64_t start_time = local_clock();
 
-       trace_bcache_gc_start(c->sb.set_uuid);
-       blktrace_msg_all(c, "Starting gc");
+       trace_bcache_gc_start(c);
 
        memset(&stats, 0, sizeof(struct gc_stat));
        closure_init_stack(&writes);
@@ -1496,9 +1498,7 @@ static void bch_btree_gc(struct closure *cl)
        closure_sync(&writes);
 
        if (ret) {
-               blktrace_msg_all(c, "Stopped gc");
                pr_warn("gc failed!");
-
                continue_at(cl, bch_btree_gc, bch_gc_wq);
        }
 
@@ -1519,8 +1519,7 @@ static void bch_btree_gc(struct closure *cl)
        stats.in_use    = (c->nbuckets - available) * 100 / c->nbuckets;
        memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat));
 
-       blktrace_msg_all(c, "Finished gc");
-       trace_bcache_gc_end(c->sb.set_uuid);
+       trace_bcache_gc_end(c);
 
        continue_at(cl, bch_moving_gc, bch_gc_wq);
 }
@@ -1901,12 +1900,11 @@ static int btree_split(struct btree *b, struct btree_op *op)
 
        split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5;
 
-       pr_debug("%ssplitting at %s keys %i", split ? "" : "not ",
-                pbtree(b), n1->sets[0].data->keys);
-
        if (split) {
                unsigned keys = 0;
 
+               trace_bcache_btree_node_split(b, n1->sets[0].data->keys);
+
                n2 = bch_btree_node_alloc(b->c, b->level, &op->cl);
                if (IS_ERR(n2))
                        goto err_free1;
@@ -1941,8 +1939,11 @@ static int btree_split(struct btree *b, struct btree_op *op)
                bch_keylist_add(&op->keys, &n2->key);
                bch_btree_node_write(n2, &op->cl);
                rw_unlock(true, n2);
-       } else
+       } else {
+               trace_bcache_btree_node_compact(b, n1->sets[0].data->keys);
+
                bch_btree_insert_keys(n1, op);
+       }
 
        bch_keylist_add(&op->keys, &n1->key);
        bch_btree_node_write(n1, &op->cl);
@@ -2117,6 +2118,8 @@ void bch_btree_set_root(struct btree *b)
 {
        unsigned i;
 
+       trace_bcache_btree_set_root(b);
+
        BUG_ON(!b->written);
 
        for (i = 0; i < KEY_PTRS(&b->key); i++)
@@ -2130,7 +2133,6 @@ void bch_btree_set_root(struct btree *b)
        __bkey_put(b->c, &b->key);
 
        bch_journal_meta(b->c, NULL);
-       pr_debug("%s for %pf", pbtree(b), __builtin_return_address(0));
 }
 
 /* Cache lookup */
@@ -2216,7 +2218,6 @@ static int submit_partial_cache_hit(struct btree *b, struct btree_op *op,
                n->bi_end_io    = bch_cache_read_endio;
                n->bi_private   = &s->cl;
 
-               trace_bcache_cache_hit(n);
                __bch_submit_bbio(n, b->c);
        }
 
index d285cd4..0f6d696 100644 (file)
@@ -9,6 +9,8 @@
 #include "bset.h"
 #include "debug.h"
 
+#include <linux/blkdev.h>
+
 static void bch_bi_idx_hack_endio(struct bio *bio, int error)
 {
        struct bio *p = bio->bi_private;
index 970d819..5ca2214 100644 (file)
@@ -9,6 +9,8 @@
 #include "debug.h"
 #include "request.h"
 
+#include <trace/events/bcache.h>
+
 /*
  * Journal replay/recovery:
  *
@@ -300,7 +302,8 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list,
                for (k = i->j.start;
                     k < end(&i->j);
                     k = bkey_next(k)) {
-                       pr_debug("%s", pkey(k));
+                       trace_bcache_journal_replay_key(k);
+
                        bkey_copy(op->keys.top, k);
                        bch_keylist_push(&op->keys);
 
@@ -712,7 +715,8 @@ void bch_journal(struct closure *cl)
        spin_lock(&c->journal.lock);
 
        if (journal_full(&c->journal)) {
-               /* XXX: tracepoint */
+               trace_bcache_journal_full(c);
+
                closure_wait(&c->journal.wait, cl);
 
                journal_reclaim(c);
@@ -728,13 +732,15 @@ void bch_journal(struct closure *cl)
 
        if (b * c->sb.block_size > PAGE_SECTORS << JSET_BITS ||
            b > c->journal.blocks_free) {
-               /* XXX: If we were inserting so many keys that they won't fit in
+               trace_bcache_journal_entry_full(c);
+
+               /*
+                * XXX: If we were inserting so many keys that they won't fit in
                 * an _empty_ journal write, we'll deadlock. For now, handle
                 * this in bch_keylist_realloc() - but something to think about.
                 */
                BUG_ON(!w->data->keys);
 
-               /* XXX: tracepoint */
                BUG_ON(!closure_wait(&w->wait, cl));
 
                closure_flush(&c->journal.io);
index 8589512..04f6b97 100644 (file)
@@ -9,6 +9,8 @@
 #include "debug.h"
 #include "request.h"
 
+#include <trace/events/bcache.h>
+
 struct moving_io {
        struct keybuf_key       *w;
        struct search           s;
@@ -49,9 +51,8 @@ static void write_moving_finish(struct closure *cl)
        while (bv-- != bio->bi_io_vec)
                __free_page(bv->bv_page);
 
-       pr_debug("%s %s", io->s.op.insert_collision
-                ? "collision moving" : "moved",
-                pkey(&io->w->key));
+       if (io->s.op.insert_collision)
+               trace_bcache_gc_copy_collision(&io->w->key);
 
        bch_keybuf_del(&io->s.op.c->moving_gc_keys, io->w);
 
@@ -94,8 +95,6 @@ static void write_moving(struct closure *cl)
        struct moving_io *io = container_of(s, struct moving_io, s);
 
        if (!s->error) {
-               trace_bcache_write_moving(&io->bio.bio);
-
                moving_init(io);
 
                io->bio.bio.bi_sector   = KEY_START(&io->w->key);
@@ -122,7 +121,6 @@ static void read_moving_submit(struct closure *cl)
        struct moving_io *io = container_of(s, struct moving_io, s);
        struct bio *bio = &io->bio.bio;
 
-       trace_bcache_read_moving(bio);
        bch_submit_bbio(bio, s->op.c, &io->w->key, 0);
 
        continue_at(cl, write_moving, bch_gc_wq);
@@ -162,7 +160,7 @@ static void read_moving(struct closure *cl)
                if (bch_bio_alloc_pages(bio, GFP_KERNEL))
                        goto err;
 
-               pr_debug("%s", pkey(&w->key));
+               trace_bcache_gc_copy(&w->key);
 
                closure_call(&io->s.cl, read_moving_submit, NULL, &c->gc.cl);
 
index e5ff12e..6954699 100644 (file)
@@ -530,10 +530,9 @@ static void bch_insert_data_loop(struct closure *cl)
                if (KEY_CSUM(k))
                        bio_csum(n, k);
 
-               pr_debug("%s", pkey(k));
+               trace_bcache_cache_insert(k);
                bch_keylist_push(&op->keys);
 
-               trace_bcache_cache_insert(n, n->bi_sector, n->bi_bdev);
                n->bi_rw |= REQ_WRITE;
                bch_submit_bbio(n, op->c, k, 0);
        } while (n != bio);
@@ -784,11 +783,8 @@ static void request_read_error(struct closure *cl)
        int i;
 
        if (s->recoverable) {
-               /* The cache read failed, but we can retry from the backing
-                * device.
-                */
-               pr_debug("recovering at sector %llu",
-                        (uint64_t) s->orig_bio->bi_sector);
+               /* Retry from the backing device: */
+               trace_bcache_read_retry(s->orig_bio);
 
                s->error = 0;
                bv = s->bio.bio.bi_io_vec;
@@ -806,7 +802,6 @@ static void request_read_error(struct closure *cl)
 
                /* XXX: invalidate cache */
 
-               trace_bcache_read_retry(&s->bio.bio);
                closure_bio_submit(&s->bio.bio, &s->cl, s->d);
        }
 
@@ -899,6 +894,7 @@ static void request_read_done_bh(struct closure *cl)
        struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
 
        bch_mark_cache_accounting(s, !s->cache_miss, s->op.skip);
+       trace_bcache_read(s->orig_bio, !s->cache_miss, s->op.skip);
 
        if (s->error)
                continue_at_nobarrier(cl, request_read_error, bcache_wq);
@@ -969,7 +965,6 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
        s->cache_miss = miss;
        bio_get(s->op.cache_bio);
 
-       trace_bcache_cache_miss(s->orig_bio);
        closure_bio_submit(s->op.cache_bio, &s->cl, s->d);
 
        return ret;
@@ -1040,15 +1035,15 @@ static void request_write(struct cached_dev *dc, struct search *s)
        if (should_writeback(dc, s->orig_bio))
                s->writeback = true;
 
+       trace_bcache_write(s->orig_bio, s->writeback, s->op.skip);
+
        if (!s->writeback) {
                s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO,
                                                   dc->disk.bio_split);
 
-               trace_bcache_writethrough(s->orig_bio);
                closure_bio_submit(bio, cl, s->d);
        } else {
                s->op.cache_bio = bio;
-               trace_bcache_writeback(s->orig_bio);
                bch_writeback_add(dc, bio_sectors(bio));
        }
 out:
@@ -1058,7 +1053,6 @@ skip:
        s->op.skip = true;
        s->op.cache_bio = s->orig_bio;
        bio_get(s->op.cache_bio);
-       trace_bcache_write_skip(s->orig_bio);
 
        if ((bio->bi_rw & REQ_DISCARD) &&
            !blk_queue_discard(bdev_get_queue(dc->bdev)))
@@ -1088,9 +1082,10 @@ static void request_nodata(struct cached_dev *dc, struct search *s)
 
 /* Cached devices - read & write stuff */
 
-int bch_get_congested(struct cache_set *c)
+unsigned bch_get_congested(struct cache_set *c)
 {
        int i;
+       long rand;
 
        if (!c->congested_read_threshold_us &&
            !c->congested_write_threshold_us)
@@ -1106,7 +1101,13 @@ int bch_get_congested(struct cache_set *c)
 
        i += CONGESTED_MAX;
 
-       return i <= 0 ? 1 : fract_exp_two(i, 6);
+       if (i > 0)
+               i = fract_exp_two(i, 6);
+
+       rand = get_random_int();
+       i -= bitmap_weight(&rand, BITS_PER_LONG);
+
+       return i > 0 ? i : 1;
 }
 
 static void add_sequential(struct task_struct *t)
@@ -1126,10 +1127,8 @@ static void check_should_skip(struct cached_dev *dc, struct search *s)
 {
        struct cache_set *c = s->op.c;
        struct bio *bio = &s->bio.bio;
-
-       long rand;
-       int cutoff = bch_get_congested(c);
        unsigned mode = cache_mode(dc, bio);
+       unsigned sectors, congested = bch_get_congested(c);
 
        if (atomic_read(&dc->disk.detaching) ||
            c->gc_stats.in_use > CUTOFF_CACHE_ADD ||
@@ -1147,17 +1146,14 @@ static void check_should_skip(struct cached_dev *dc, struct search *s)
                goto skip;
        }
 
-       if (!cutoff) {
-               cutoff = dc->sequential_cutoff >> 9;
+       if (!congested && !dc->sequential_cutoff)
+               goto rescale;
 
-               if (!cutoff)
-                       goto rescale;
-
-               if (mode == CACHE_MODE_WRITEBACK &&
-                   (bio->bi_rw & REQ_WRITE) &&
-                   (bio->bi_rw & REQ_SYNC))
-                       goto rescale;
-       }
+       if (!congested &&
+           mode == CACHE_MODE_WRITEBACK &&
+           (bio->bi_rw & REQ_WRITE) &&
+           (bio->bi_rw & REQ_SYNC))
+               goto rescale;
 
        if (dc->sequential_merge) {
                struct io *i;
@@ -1192,12 +1188,19 @@ found:
                add_sequential(s->task);
        }
 
-       rand = get_random_int();
-       cutoff -= bitmap_weight(&rand, BITS_PER_LONG);
+       sectors = max(s->task->sequential_io,
+                     s->task->sequential_io_avg) >> 9;
 
-       if (cutoff <= (int) (max(s->task->sequential_io,
-                                s->task->sequential_io_avg) >> 9))
+       if (dc->sequential_cutoff &&
+           sectors >= dc->sequential_cutoff >> 9) {
+               trace_bcache_bypass_sequential(s->orig_bio);
                goto skip;
+       }
+
+       if (congested && sectors >= congested) {
+               trace_bcache_bypass_congested(s->orig_bio);
+               goto skip;
+       }
 
 rescale:
        bch_rescale_priorities(c, bio_sectors(bio));
index 254d9ab..57dc478 100644 (file)
@@ -30,7 +30,7 @@ struct search {
 };
 
 void bch_cache_read_endio(struct bio *, int);
-int bch_get_congested(struct cache_set *);
+unsigned bch_get_congested(struct cache_set *);
 void bch_insert_data(struct closure *cl);
 void bch_btree_insert_async(struct closure *);
 void bch_cache_read_endio(struct bio *, int);
index e53f899..47bc137 100644 (file)
@@ -11,6 +11,7 @@
 #include "debug.h"
 #include "request.h"
 
+#include <linux/blkdev.h>
 #include <linux/buffer_head.h>
 #include <linux/debugfs.h>
 #include <linux/genhd.h>
@@ -543,7 +544,6 @@ void bch_prio_write(struct cache *ca)
 
        pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free),
                 fifo_used(&ca->free_inc), fifo_used(&ca->unused));
-       blktrace_msg(ca, "Starting priorities: " buckets_free(ca));
 
        for (i = prio_buckets(ca) - 1; i >= 0; --i) {
                long bucket;
index 29228b8..f5c2d86 100644 (file)
@@ -10,6 +10,7 @@
 #include "btree.h"
 #include "request.h"
 
+#include <linux/blkdev.h>
 #include <linux/sort.h>
 
 static const char * const cache_replacement_policies[] = {
index 983f9bb..7f4f38a 100644 (file)
@@ -2,6 +2,7 @@
 #include "btree.h"
 #include "request.h"
 
+#include <linux/blktrace_api.h>
 #include <linux/module.h>
 
 #define CREATE_TRACE_POINTS
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_start);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_end);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_passthrough);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_hit);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_miss);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_sequential);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_congested);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_retry);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writethrough);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_skip);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_replay_key);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_full);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_entry_full);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_cache_cannibalize);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_read);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_write);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_dirty);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_dirty);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc_fail);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_free);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_gc_coalesce);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_start);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_end);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy_collision);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_split);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_compact);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_set_root);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_invalidate);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_fail);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback_collision);
index 577393e..e027805 100644 (file)
@@ -15,8 +15,6 @@
 
 struct closure;
 
-#include <trace/events/bcache.h>
-
 #ifdef CONFIG_BCACHE_EDEBUG
 
 #define atomic_dec_bug(v)      BUG_ON(atomic_dec_return(v) < 0)
index 2714ed3..82f6d45 100644 (file)
@@ -10,6 +10,8 @@
 #include "btree.h"
 #include "debug.h"
 
+#include <trace/events/bcache.h>
+
 static struct workqueue_struct *dirty_wq;
 
 static void read_dirty(struct closure *);
@@ -236,10 +238,12 @@ static void write_dirty_finish(struct closure *cl)
                for (i = 0; i < KEY_PTRS(&w->key); i++)
                        atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin);
 
-               pr_debug("clearing %s", pkey(&w->key));
                bch_btree_insert(&op, dc->disk.c);
                closure_sync(&op.cl);
 
+               if (op.insert_collision)
+                       trace_bcache_writeback_collision(&w->key);
+
                atomic_long_inc(op.insert_collision
                                ? &dc->disk.c->writeback_keys_failed
                                : &dc->disk.c->writeback_keys_done);
@@ -275,7 +279,6 @@ static void write_dirty(struct closure *cl)
        io->bio.bi_bdev         = io->dc->bdev;
        io->bio.bi_end_io       = dirty_endio;
 
-       trace_bcache_write_dirty(&io->bio);
        closure_bio_submit(&io->bio, cl, &io->dc->disk);
 
        continue_at(cl, write_dirty_finish, dirty_wq);
@@ -296,7 +299,6 @@ static void read_dirty_submit(struct closure *cl)
 {
        struct dirty_io *io = container_of(cl, struct dirty_io, cl);
 
-       trace_bcache_read_dirty(&io->bio);
        closure_bio_submit(&io->bio, cl, &io->dc->disk);
 
        continue_at(cl, write_dirty, dirty_wq);
@@ -352,7 +354,7 @@ static void read_dirty(struct closure *cl)
                if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL))
                        goto err_free;
 
-               pr_debug("%s", pkey(&w->key));
+               trace_bcache_writeback(&w->key);
 
                closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl);
 
index 3cc5a0b..c9952b3 100644 (file)
@@ -9,9 +9,7 @@
 struct search;
 
 DECLARE_EVENT_CLASS(bcache_request,
-
        TP_PROTO(struct search *s, struct bio *bio),
-
        TP_ARGS(s, bio),
 
        TP_STRUCT__entry(
@@ -22,7 +20,6 @@ DECLARE_EVENT_CLASS(bcache_request,
                __field(dev_t,          orig_sector             )
                __field(unsigned int,   nr_sector               )
                __array(char,           rwbs,   6               )
-               __array(char,           comm,   TASK_COMM_LEN   )
        ),
 
        TP_fast_assign(
@@ -33,36 +30,66 @@ DECLARE_EVENT_CLASS(bcache_request,
                __entry->orig_sector    = bio->bi_sector - 16;
                __entry->nr_sector      = bio->bi_size >> 9;
                blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
        ),
 
-       TP_printk("%d,%d %s %llu + %u [%s] (from %d,%d @ %llu)",
+       TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->rwbs,
-                 (unsigned long long)__entry->sector,
-                 __entry->nr_sector, __entry->comm,
-                 __entry->orig_major, __entry->orig_minor,
+                 __entry->rwbs, (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->orig_major, __entry->orig_minor,
                  (unsigned long long)__entry->orig_sector)
 );
 
-DEFINE_EVENT(bcache_request, bcache_request_start,
+DECLARE_EVENT_CLASS(bkey,
+       TP_PROTO(struct bkey *k),
+       TP_ARGS(k),
 
-       TP_PROTO(struct search *s, struct bio *bio),
+       TP_STRUCT__entry(
+               __field(u32,    size                            )
+               __field(u32,    inode                           )
+               __field(u64,    offset                          )
+               __field(bool,   dirty                           )
+       ),
 
-       TP_ARGS(s, bio)
+       TP_fast_assign(
+               __entry->inode  = KEY_INODE(k);
+               __entry->offset = KEY_OFFSET(k);
+               __entry->size   = KEY_SIZE(k);
+               __entry->dirty  = KEY_DIRTY(k);
+       ),
+
+       TP_printk("%u:%llu len %u dirty %u", __entry->inode,
+                 __entry->offset, __entry->size, __entry->dirty)
 );
 
-DEFINE_EVENT(bcache_request, bcache_request_end,
+DECLARE_EVENT_CLASS(btree_node,
+       TP_PROTO(struct btree *b),
+       TP_ARGS(b),
+
+       TP_STRUCT__entry(
+               __field(size_t,         bucket                  )
+       ),
+
+       TP_fast_assign(
+               __entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0);
+       ),
 
+       TP_printk("bucket %zu", __entry->bucket)
+);
+
+/* request.c */
+
+DEFINE_EVENT(bcache_request, bcache_request_start,
        TP_PROTO(struct search *s, struct bio *bio),
+       TP_ARGS(s, bio)
+);
 
+DEFINE_EVENT(bcache_request, bcache_request_end,
+       TP_PROTO(struct search *s, struct bio *bio),
        TP_ARGS(s, bio)
 );
 
 DECLARE_EVENT_CLASS(bcache_bio,
-
        TP_PROTO(struct bio *bio),
-
        TP_ARGS(bio),
 
        TP_STRUCT__entry(
@@ -70,7 +97,6 @@ DECLARE_EVENT_CLASS(bcache_bio,
                __field(sector_t,       sector                  )
                __field(unsigned int,   nr_sector               )
                __array(char,           rwbs,   6               )
-               __array(char,           comm,   TASK_COMM_LEN   )
        ),
 
        TP_fast_assign(
@@ -78,191 +104,295 @@ DECLARE_EVENT_CLASS(bcache_bio,
                __entry->sector         = bio->bi_sector;
                __entry->nr_sector      = bio->bi_size >> 9;
                blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
        ),
 
-       TP_printk("%d,%d  %s %llu + %u [%s]",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->rwbs,
-                 (unsigned long long)__entry->sector,
-                 __entry->nr_sector, __entry->comm)
+       TP_printk("%d,%d  %s %llu + %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+                 (unsigned long long)__entry->sector, __entry->nr_sector)
 );
 
-
-DEFINE_EVENT(bcache_bio, bcache_passthrough,
-
+DEFINE_EVENT(bcache_bio, bcache_bypass_sequential,
        TP_PROTO(struct bio *bio),
-
        TP_ARGS(bio)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_cache_hit,
-
+DEFINE_EVENT(bcache_bio, bcache_bypass_congested,
        TP_PROTO(struct bio *bio),
-
        TP_ARGS(bio)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_cache_miss,
+TRACE_EVENT(bcache_read,
+       TP_PROTO(struct bio *bio, bool hit, bool bypass),
+       TP_ARGS(bio, hit, bypass),
 
-       TP_PROTO(struct bio *bio),
+       TP_STRUCT__entry(
+               __field(dev_t,          dev                     )
+               __field(sector_t,       sector                  )
+               __field(unsigned int,   nr_sector               )
+               __array(char,           rwbs,   6               )
+               __field(bool,           cache_hit               )
+               __field(bool,           bypass                  )
+       ),
 
-       TP_ARGS(bio)
+       TP_fast_assign(
+               __entry->dev            = bio->bi_bdev->bd_dev;
+               __entry->sector         = bio->bi_sector;
+               __entry->nr_sector      = bio->bi_size >> 9;
+               blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+               __entry->cache_hit = hit;
+               __entry->bypass = bypass;
+       ),
+
+       TP_printk("%d,%d  %s %llu + %u hit %u bypass %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->rwbs, (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->cache_hit, __entry->bypass)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_read_retry,
+TRACE_EVENT(bcache_write,
+       TP_PROTO(struct bio *bio, bool writeback, bool bypass),
+       TP_ARGS(bio, writeback, bypass),
 
-       TP_PROTO(struct bio *bio),
+       TP_STRUCT__entry(
+               __field(dev_t,          dev                     )
+               __field(sector_t,       sector                  )
+               __field(unsigned int,   nr_sector               )
+               __array(char,           rwbs,   6               )
+               __field(bool,           writeback               )
+               __field(bool,           bypass                  )
+       ),
 
-       TP_ARGS(bio)
-);
+       TP_fast_assign(
+               __entry->dev            = bio->bi_bdev->bd_dev;
+               __entry->sector         = bio->bi_sector;
+               __entry->nr_sector      = bio->bi_size >> 9;
+               blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+               __entry->writeback = writeback;
+               __entry->bypass = bypass;
+       ),
 
-DEFINE_EVENT(bcache_bio, bcache_writethrough,
+       TP_printk("%d,%d  %s %llu + %u hit %u bypass %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->rwbs, (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->writeback, __entry->bypass)
+);
 
+DEFINE_EVENT(bcache_bio, bcache_read_retry,
        TP_PROTO(struct bio *bio),
-
        TP_ARGS(bio)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_writeback,
+DEFINE_EVENT(bkey, bcache_cache_insert,
+       TP_PROTO(struct bkey *k),
+       TP_ARGS(k)
+);
 
-       TP_PROTO(struct bio *bio),
+/* Journal */
 
-       TP_ARGS(bio)
-);
+DECLARE_EVENT_CLASS(cache_set,
+       TP_PROTO(struct cache_set *c),
+       TP_ARGS(c),
 
-DEFINE_EVENT(bcache_bio, bcache_write_skip,
+       TP_STRUCT__entry(
+               __array(char,           uuid,   16 )
+       ),
 
-       TP_PROTO(struct bio *bio),
+       TP_fast_assign(
+               memcpy(__entry->uuid, c->sb.set_uuid, 16);
+       ),
 
-       TP_ARGS(bio)
+       TP_printk("%pU", __entry->uuid)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_btree_read,
-
-       TP_PROTO(struct bio *bio),
+DEFINE_EVENT(bkey, bcache_journal_replay_key,
+       TP_PROTO(struct bkey *k),
+       TP_ARGS(k)
+);
 
-       TP_ARGS(bio)
+DEFINE_EVENT(cache_set, bcache_journal_full,
+       TP_PROTO(struct cache_set *c),
+       TP_ARGS(c)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_btree_write,
+DEFINE_EVENT(cache_set, bcache_journal_entry_full,
+       TP_PROTO(struct cache_set *c),
+       TP_ARGS(c)
+);
 
+DEFINE_EVENT(bcache_bio, bcache_journal_write,
        TP_PROTO(struct bio *bio),
-
        TP_ARGS(bio)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_write_dirty,
+/* Btree */
 
-       TP_PROTO(struct bio *bio),
+DEFINE_EVENT(cache_set, bcache_btree_cache_cannibalize,
+       TP_PROTO(struct cache_set *c),
+       TP_ARGS(c)
+);
 
-       TP_ARGS(bio)
+DEFINE_EVENT(btree_node, bcache_btree_read,
+       TP_PROTO(struct btree *b),
+       TP_ARGS(b)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_read_dirty,
+TRACE_EVENT(bcache_btree_write,
+       TP_PROTO(struct btree *b),
+       TP_ARGS(b),
 
-       TP_PROTO(struct bio *bio),
+       TP_STRUCT__entry(
+               __field(size_t,         bucket                  )
+               __field(unsigned,       block                   )
+               __field(unsigned,       keys                    )
+       ),
 
-       TP_ARGS(bio)
+       TP_fast_assign(
+               __entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0);
+               __entry->block  = b->written;
+               __entry->keys   = b->sets[b->nsets].data->keys;
+       ),
+
+       TP_printk("bucket %zu", __entry->bucket)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_write_moving,
+DEFINE_EVENT(btree_node, bcache_btree_node_alloc,
+       TP_PROTO(struct btree *b),
+       TP_ARGS(b)
+);
 
-       TP_PROTO(struct bio *bio),
+DEFINE_EVENT(btree_node, bcache_btree_node_alloc_fail,
+       TP_PROTO(struct btree *b),
+       TP_ARGS(b)
+);
 
-       TP_ARGS(bio)
+DEFINE_EVENT(btree_node, bcache_btree_node_free,
+       TP_PROTO(struct btree *b),
+       TP_ARGS(b)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_read_moving,
+TRACE_EVENT(bcache_btree_gc_coalesce,
+       TP_PROTO(unsigned nodes),
+       TP_ARGS(nodes),
 
-       TP_PROTO(struct bio *bio),
+       TP_STRUCT__entry(
+               __field(unsigned,       nodes                   )
+       ),
 
-       TP_ARGS(bio)
-);
+       TP_fast_assign(
+               __entry->nodes  = nodes;
+       ),
 
-DEFINE_EVENT(bcache_bio, bcache_journal_write,
+       TP_printk("coalesced %u nodes", __entry->nodes)
+);
 
-       TP_PROTO(struct bio *bio),
+DEFINE_EVENT(cache_set, bcache_gc_start,
+       TP_PROTO(struct cache_set *c),
+       TP_ARGS(c)
+);
 
-       TP_ARGS(bio)
+DEFINE_EVENT(cache_set, bcache_gc_end,
+       TP_PROTO(struct cache_set *c),
+       TP_ARGS(c)
 );
 
-DECLARE_EVENT_CLASS(bcache_cache_bio,
+DEFINE_EVENT(bkey, bcache_gc_copy,
+       TP_PROTO(struct bkey *k),
+       TP_ARGS(k)
+);
 
-       TP_PROTO(struct bio *bio,
-                sector_t orig_sector,
-                struct block_device* orig_bdev),
+DEFINE_EVENT(bkey, bcache_gc_copy_collision,
+       TP_PROTO(struct bkey *k),
+       TP_ARGS(k)
+);
 
-       TP_ARGS(bio, orig_sector, orig_bdev),
+DECLARE_EVENT_CLASS(btree_split,
+       TP_PROTO(struct btree *b, unsigned keys),
+       TP_ARGS(b, keys),
 
        TP_STRUCT__entry(
-               __field(dev_t,          dev                     )
-               __field(dev_t,          orig_dev                )
-               __field(sector_t,       sector                  )
-               __field(sector_t,       orig_sector             )
-               __field(unsigned int,   nr_sector               )
-               __array(char,           rwbs,   6               )
-               __array(char,           comm,   TASK_COMM_LEN   )
+               __field(size_t,         bucket                  )
+               __field(unsigned,       keys                    )
        ),
 
        TP_fast_assign(
-               __entry->dev            = bio->bi_bdev->bd_dev;
-               __entry->orig_dev       = orig_bdev->bd_dev;
-               __entry->sector         = bio->bi_sector;
-               __entry->orig_sector    = orig_sector;
-               __entry->nr_sector      = bio->bi_size >> 9;
-               blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+               __entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0);
+               __entry->keys   = keys;
        ),
 
-       TP_printk("%d,%d  %s %llu + %u [%s] (from %d,%d %llu)",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->rwbs,
-                 (unsigned long long)__entry->sector,
-                 __entry->nr_sector, __entry->comm,
-                 MAJOR(__entry->orig_dev), MINOR(__entry->orig_dev),
-                 (unsigned long long)__entry->orig_sector)
+       TP_printk("bucket %zu keys %u", __entry->bucket, __entry->keys)
 );
 
-DEFINE_EVENT(bcache_cache_bio, bcache_cache_insert,
-
-       TP_PROTO(struct bio *bio,
-                sector_t orig_sector,
-                struct block_device *orig_bdev),
+DEFINE_EVENT(btree_split, bcache_btree_node_split,
+       TP_PROTO(struct btree *b, unsigned keys),
+       TP_ARGS(b, keys)
+);
 
-       TP_ARGS(bio, orig_sector, orig_bdev)
+DEFINE_EVENT(btree_split, bcache_btree_node_compact,
+       TP_PROTO(struct btree *b, unsigned keys),
+       TP_ARGS(b, keys)
 );
 
-DECLARE_EVENT_CLASS(bcache_gc,
+DEFINE_EVENT(btree_node, bcache_btree_set_root,
+       TP_PROTO(struct btree *b),
+       TP_ARGS(b)
+);
 
-       TP_PROTO(uint8_t *uuid),
+/* Allocator */
 
-       TP_ARGS(uuid),
+TRACE_EVENT(bcache_alloc_invalidate,
+       TP_PROTO(struct cache *ca),
+       TP_ARGS(ca),
 
        TP_STRUCT__entry(
-               __field(uint8_t *,      uuid)
+               __field(unsigned,       free                    )
+               __field(unsigned,       free_inc                )
+               __field(unsigned,       free_inc_size           )
+               __field(unsigned,       unused                  )
        ),
 
        TP_fast_assign(
-               __entry->uuid           = uuid;
+               __entry->free           = fifo_used(&ca->free);
+               __entry->free_inc       = fifo_used(&ca->free_inc);
+               __entry->free_inc_size  = ca->free_inc.size;
+               __entry->unused         = fifo_used(&ca->unused);
        ),
 
-       TP_printk("%pU", __entry->uuid)
+       TP_printk("free %u free_inc %u/%u unused %u", __entry->free,
+                 __entry->free_inc, __entry->free_inc_size, __entry->unused)
 );
 
+TRACE_EVENT(bcache_alloc_fail,
+       TP_PROTO(struct cache *ca),
+       TP_ARGS(ca),
 
-DEFINE_EVENT(bcache_gc, bcache_gc_start,
+       TP_STRUCT__entry(
+               __field(unsigned,       free                    )
+               __field(unsigned,       free_inc                )
+               __field(unsigned,       unused                  )
+               __field(unsigned,       blocked                 )
+       ),
 
-            TP_PROTO(uint8_t *uuid),
+       TP_fast_assign(
+               __entry->free           = fifo_used(&ca->free);
+               __entry->free_inc       = fifo_used(&ca->free_inc);
+               __entry->unused         = fifo_used(&ca->unused);
+               __entry->blocked        = atomic_read(&ca->set->prio_blocked);
+       ),
 
-            TP_ARGS(uuid)
+       TP_printk("free %u free_inc %u unused %u blocked %u", __entry->free,
+                 __entry->free_inc, __entry->unused, __entry->blocked)
 );
 
-DEFINE_EVENT(bcache_gc, bcache_gc_end,
+/* Background writeback */
 
-            TP_PROTO(uint8_t *uuid),
+DEFINE_EVENT(bkey, bcache_writeback,
+       TP_PROTO(struct bkey *k),
+       TP_ARGS(k)
+);
 
-            TP_ARGS(uuid)
+DEFINE_EVENT(bkey, bcache_writeback_collision,
+       TP_PROTO(struct bkey *k),
+       TP_ARGS(k)
 );
 
 #endif /* _TRACE_BCACHE_H */