md/raid1: Avoid lock contention from wake_up()
authorJack Wang <jinpu.wang@ionos.com>
Wed, 5 Jul 2023 11:32:27 +0000 (13:32 +0200)
committerSong Liu <song@kernel.org>
Thu, 27 Jul 2023 07:13:30 +0000 (00:13 -0700)
wake_up is called unconditionally in a few paths such as make_request(),
which cause lock contention under high concurrency workload like below
    raid1_end_write_request
     wake_up
      __wake_up_common_lock
       spin_lock_irqsave

Improve performance by only call wake_up() if waitqueue is not empty

Fio test script:

[global]
name=random reads and writes
ioengine=libaio
direct=1
readwrite=randrw
rwmixread=70
iodepth=64
buffered=0
filename=/dev/md0
size=1G
runtime=30
time_based
randrepeat=0
norandommap
refill_buffers
ramp_time=10
bs=4k
numjobs=400
group_reporting=1
[job1]

Test result with 2 ramdisk in raid1 on a Intel Broadwell 56 cores server.

Before this patch       With this patch
READ BW=4621MB/s BW=7337MB/s
WRITE BW=1980MB/s BW=3144MB/s

The patch is inspired by Yu Kuai's change for raid10:
https://lore.kernel.org/r/20230621105728.1268542-1-yukuai1@huaweicloud.com

Cc: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
Reviewed-by: Yu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20230705113227.148494-1-jinpu.wang@ionos.com
Signed-off-by: Song Liu <song@kernel.org>
drivers/md/raid1.c

index a68c9cc..23d2119 100644 (file)
@@ -789,11 +789,17 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
        return best_disk;
 }
 
+static void wake_up_barrier(struct r1conf *conf)
+{
+       if (wq_has_sleeper(&conf->wait_barrier))
+               wake_up(&conf->wait_barrier);
+}
+
 static void flush_bio_list(struct r1conf *conf, struct bio *bio)
 {
        /* flush any pending bitmap writes to disk before proceeding w/ I/O */
        raid1_prepare_flush_writes(conf->mddev->bitmap);
-       wake_up(&conf->wait_barrier);
+       wake_up_barrier(conf);
 
        while (bio) { /* submit pending writes */
                struct bio *next = bio->bi_next;
@@ -970,7 +976,7 @@ static bool _wait_barrier(struct r1conf *conf, int idx, bool nowait)
         * In case freeze_array() is waiting for
         * get_unqueued_pending() == extra
         */
-       wake_up(&conf->wait_barrier);
+       wake_up_barrier(conf);
        /* Wait for the barrier in same barrier unit bucket to drop. */
 
        /* Return false when nowait flag is set */
@@ -1013,7 +1019,7 @@ static bool wait_read_barrier(struct r1conf *conf, sector_t sector_nr, bool nowa
         * In case freeze_array() is waiting for
         * get_unqueued_pending() == extra
         */
-       wake_up(&conf->wait_barrier);
+       wake_up_barrier(conf);
        /* Wait for array to be unfrozen */
 
        /* Return false when nowait flag is set */
@@ -1042,7 +1048,7 @@ static bool wait_barrier(struct r1conf *conf, sector_t sector_nr, bool nowait)
 static void _allow_barrier(struct r1conf *conf, int idx)
 {
        atomic_dec(&conf->nr_pending[idx]);
-       wake_up(&conf->wait_barrier);
+       wake_up_barrier(conf);
 }
 
 static void allow_barrier(struct r1conf *conf, sector_t sector_nr)
@@ -1171,7 +1177,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
                spin_lock_irq(&conf->device_lock);
                bio_list_merge(&conf->pending_bio_list, &plug->pending);
                spin_unlock_irq(&conf->device_lock);
-               wake_up(&conf->wait_barrier);
+               wake_up_barrier(conf);
                md_wakeup_thread(mddev->thread);
                kfree(plug);
                return;
@@ -1574,7 +1580,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
        r1_bio_write_done(r1_bio);
 
        /* In case raid1d snuck in to freeze_array */
-       wake_up(&conf->wait_barrier);
+       wake_up_barrier(conf);
 }
 
 static bool raid1_make_request(struct mddev *mddev, struct bio *bio)