dm: use op specific max_sectors when splitting abnormal io
authorMike Snitzer <snitzer@kernel.org>
Thu, 15 Jun 2023 01:47:46 +0000 (21:47 -0400)
committerMike Snitzer <snitzer@kernel.org>
Thu, 15 Jun 2023 16:47:16 +0000 (12:47 -0400)
Split abnormal IO in terms of the corresponding operation specific
max_sectors (max_discard_sectors, max_secure_erase_sectors or
max_write_zeroes_sectors).

This fixes a significant dm-thinp discard performance regression that
was introduced with commit e2dd8aca2d76 ("dm bio prison v1: improve
concurrent IO performance"). Relative to discard: max_discard_sectors
is used instead of max_sectors; which fixes excessive discard splitting
(e.g. max_sectors=128K vs max_discard_sectors=64M).

Tested by discarding an 1 Petabyte dm-thin device:
lvcreate -V 1125899906842624B -T test/pool -n thin
time blkdiscard /dev/test/thin

Before this fix (splitting discards every 128K): ~116m
 After this fix (splitting discards every 64M) : 0m33.460s

Reported-by: Zorro Lang <zlang@redhat.com>
Fixes: 06961c487a33 ("dm: split discards further if target sets max_discard_granularity")
Requires: 13f6facf3fae ("dm: allow targets to require splitting WRITE_ZEROES and SECURE_ERASE")
Fixes: e2dd8aca2d76 ("dm bio prison v1: improve concurrent IO performance")
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
drivers/md/dm.c

index 8488547fc00d36b7bd30f3fc74a4a06a3a0f457c..fffb0cbe2ac8a14fe9bc0c9dc442ce6e4a0a02aa 100644 (file)
@@ -1172,7 +1172,8 @@ static inline sector_t max_io_len_target_boundary(struct dm_target *ti,
 }
 
 static sector_t __max_io_len(struct dm_target *ti, sector_t sector,
-                            unsigned int max_granularity)
+                            unsigned int max_granularity,
+                            unsigned int max_sectors)
 {
        sector_t target_offset = dm_target_offset(ti, sector);
        sector_t len = max_io_len_target_boundary(ti, target_offset);
@@ -1186,13 +1187,13 @@ static sector_t __max_io_len(struct dm_target *ti, sector_t sector,
        if (!max_granularity)
                return len;
        return min_t(sector_t, len,
-               min(queue_max_sectors(ti->table->md->queue),
+               min(max_sectors ? : queue_max_sectors(ti->table->md->queue),
                    blk_chunk_sectors_left(target_offset, max_granularity)));
 }
 
 static inline sector_t max_io_len(struct dm_target *ti, sector_t sector)
 {
-       return __max_io_len(ti, sector, ti->max_io_len);
+       return __max_io_len(ti, sector, ti->max_io_len, 0);
 }
 
 int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
@@ -1581,12 +1582,13 @@ static void __send_empty_flush(struct clone_info *ci)
 
 static void __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
                                        unsigned int num_bios,
-                                       unsigned int max_granularity)
+                                       unsigned int max_granularity,
+                                       unsigned int max_sectors)
 {
        unsigned int len, bios;
 
        len = min_t(sector_t, ci->sector_count,
-                   __max_io_len(ti, ci->sector, max_granularity));
+                   __max_io_len(ti, ci->sector, max_granularity, max_sectors));
 
        atomic_add(num_bios, &ci->io->io_count);
        bios = __send_duplicate_bios(ci, ti, num_bios, &len);
@@ -1623,23 +1625,27 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci,
 {
        unsigned int num_bios = 0;
        unsigned int max_granularity = 0;
+       unsigned int max_sectors = 0;
        struct queue_limits *limits = dm_get_queue_limits(ti->table->md);
 
        switch (bio_op(ci->bio)) {
        case REQ_OP_DISCARD:
                num_bios = ti->num_discard_bios;
+               max_sectors = limits->max_discard_sectors;
                if (ti->max_discard_granularity)
-                       max_granularity = limits->max_discard_sectors;
+                       max_granularity = max_sectors;
                break;
        case REQ_OP_SECURE_ERASE:
                num_bios = ti->num_secure_erase_bios;
+               max_sectors = limits->max_secure_erase_sectors;
                if (ti->max_secure_erase_granularity)
-                       max_granularity = limits->max_secure_erase_sectors;
+                       max_granularity = max_sectors;
                break;
        case REQ_OP_WRITE_ZEROES:
                num_bios = ti->num_write_zeroes_bios;
+               max_sectors = limits->max_write_zeroes_sectors;
                if (ti->max_write_zeroes_granularity)
-                       max_granularity = limits->max_write_zeroes_sectors;
+                       max_granularity = max_sectors;
                break;
        default:
                break;
@@ -1654,7 +1660,8 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci,
        if (unlikely(!num_bios))
                return BLK_STS_NOTSUPP;
 
-       __send_changing_extent_only(ci, ti, num_bios, max_granularity);
+       __send_changing_extent_only(ci, ti, num_bios,
+                                   max_granularity, max_sectors);
        return BLK_STS_OK;
 }