md: account io_acct_set usage with active_io
authorXiao Ni <xni@redhat.com>
Fri, 3 Feb 2023 05:13:44 +0000 (13:13 +0800)
committerSong Liu <song@kernel.org>
Wed, 8 Feb 2023 23:46:57 +0000 (15:46 -0800)
io_acct_set was enabled for raid0/raid5 io accounting. bios that contain
md_io_acct are allocated in the i/o path. There isn't a good method to
monitor if these bios are all finished and freed. In the takeover process,
io_acct_set (which is used for bios with md_io_acct) need to be freed.
However, if some bios finish after io_acct_set is freed, it may trigger
the following panic:

[ 6973.767999] RIP: 0010:mempool_free+0x52/0x80
[ 6973.786098] Call Trace:
[ 6973.786549]  md_end_io_acct+0x31/0x40
[ 6973.787227]  blk_update_request+0x224/0x380
[ 6973.787994]  blk_mq_end_request+0x1a/0x130
[ 6973.788739]  blk_complete_reqs+0x35/0x50
[ 6973.789456]  __do_softirq+0xd7/0x2c8
[ 6973.790114]  ? sort_range+0x20/0x20
[ 6973.790763]  run_ksoftirqd+0x2a/0x40
[ 6973.791400]  smpboot_thread_fn+0xb5/0x150
[ 6973.792114]  kthread+0x10b/0x130
[ 6973.792724]  ? set_kthread_struct+0x50/0x50
[ 6973.793491]  ret_from_fork+0x1f/0x40

Fix this by increasing and decreasing active_io for each bio with
md_io_acct so that mddev_suspend() will wait until all bios from
io_acct_set finish before freeing io_acct_set.

Reported-by: Fine Fan <ffan@redhat.com>
Signed-off-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Song Liu <song@kernel.org>
drivers/md/md.c
drivers/md/md.h

index 1961105..927a43d 100644 (file)
@@ -8628,12 +8628,15 @@ static void md_end_io_acct(struct bio *bio)
 {
        struct md_io_acct *md_io_acct = bio->bi_private;
        struct bio *orig_bio = md_io_acct->orig_bio;
+       struct mddev *mddev = md_io_acct->mddev;
 
        orig_bio->bi_status = bio->bi_status;
 
        bio_end_io_acct(orig_bio, md_io_acct->start_time);
        bio_put(bio);
        bio_endio(orig_bio);
+
+       percpu_ref_put(&mddev->active_io);
 }
 
 /*
@@ -8649,10 +8652,13 @@ void md_account_bio(struct mddev *mddev, struct bio **bio)
        if (!blk_queue_io_stat(bdev->bd_disk->queue))
                return;
 
+       percpu_ref_get(&mddev->active_io);
+
        clone = bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_acct_set);
        md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
        md_io_acct->orig_bio = *bio;
        md_io_acct->start_time = bio_start_io_acct(*bio);
+       md_io_acct->mddev = mddev;
 
        clone->bi_end_io = md_end_io_acct;
        clone->bi_private = md_io_acct;
index 6335cb8..e148e3c 100644 (file)
@@ -710,9 +710,10 @@ struct md_thread {
 };
 
 struct md_io_acct {
-       struct bio *orig_bio;
-       unsigned long start_time;
-       struct bio bio_clone;
+       struct mddev    *mddev;
+       struct bio      *orig_bio;
+       unsigned long   start_time;
+       struct bio      bio_clone;
 };
 
 #define THREAD_WAKEUP  0