From 5c0de3d72f8c05678ed769bea24e98128f7ab570 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 28 Jun 2021 09:59:37 -0400 Subject: [PATCH] dm writecache: make writeback pause configurable Commit 95b88f4d71cb953e02206be3c757083601391a0f ("dm writecache: pause writeback if cache full and origin being written directly") introduced a code that pauses cache flushing if we are issuing writes directly to the origin. Improve that initial commit by making the timeout code configurable (via the option "pause_writeback"). Also change the default from 1s to 3s because it performed better. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- .../admin-guide/device-mapper/writecache.rst | 5 ++- drivers/md/dm-io-tracker.h | 12 ++++++ drivers/md/dm-writecache.c | 48 ++++++++++++++++++---- 3 files changed, 55 insertions(+), 10 deletions(-) diff --git a/Documentation/admin-guide/device-mapper/writecache.rst b/Documentation/admin-guide/device-mapper/writecache.rst index 977f82b..65427d8 100644 --- a/Documentation/admin-guide/device-mapper/writecache.rst +++ b/Documentation/admin-guide/device-mapper/writecache.rst @@ -12,7 +12,6 @@ first sector should contain valid superblock from previous invocation. Constructor parameters: 1. type of the cache device - "p" or "s" - - p - persistent memory - s - SSD 2. the underlying device that will be cached @@ -21,7 +20,6 @@ Constructor parameters: size) 5. the number of optional parameters (the parameters with an argument count as two) - start_sector n (default: 0) offset from the start of cache device in 512-byte sectors high_watermark n (default: 50) @@ -71,6 +69,9 @@ Constructor parameters: metadata_only only metadata is promoted to the cache. This option improves performance for heavier REQ_META workloads. + pause_writeback n (default: 3000) + pause writeback if there was some write I/O redirected to + the origin volume in the last n milliseconds Status: 1. error indicator - 0 if there was no error, otherwise error number diff --git a/drivers/md/dm-io-tracker.h b/drivers/md/dm-io-tracker.h index 1dcf01f..bdcc627 100644 --- a/drivers/md/dm-io-tracker.h +++ b/drivers/md/dm-io-tracker.h @@ -45,6 +45,18 @@ static inline bool dm_iot_idle_for(struct dm_io_tracker *iot, unsigned long j) return r; } +static inline unsigned long dm_iot_idle_time(struct dm_io_tracker *iot) +{ + unsigned long r = 0; + + spin_lock_irq(&iot->lock); + if (!iot->in_flight) + r = jiffies - iot->idle_time; + spin_unlock_irq(&iot->lock); + + return r; +} + static inline void dm_iot_io_begin(struct dm_io_tracker *iot, sector_t len) { spin_lock_irq(&iot->lock); diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index d70342c..e21e29e 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -30,6 +30,7 @@ #define AUTOCOMMIT_MSEC 1000 #define MAX_AGE_DIV 16 #define MAX_AGE_UNSPECIFIED -1UL +#define PAUSE_WRITEBACK (HZ * 3) #define BITMAP_GRANULARITY 65536 #if BITMAP_GRANULARITY < PAGE_SIZE @@ -125,6 +126,7 @@ struct dm_writecache { size_t freelist_high_watermark; size_t freelist_low_watermark; unsigned long max_age; + unsigned long pause; unsigned uncommitted_blocks; unsigned autocommit_blocks; @@ -174,11 +176,13 @@ struct dm_writecache { bool cleaner:1; bool cleaner_set:1; bool metadata_only:1; + bool pause_set:1; unsigned high_wm_percent_value; unsigned low_wm_percent_value; unsigned autocommit_time_value; unsigned max_age_value; + unsigned pause_value; unsigned writeback_all; struct workqueue_struct *writeback_wq; @@ -1470,9 +1474,11 @@ bio_copy: } unlock_remap_origin: - if (bio_data_dir(bio) != READ) { - dm_iot_io_begin(&wc->iot, 1); - bio->bi_private = (void *)2; + if (likely(wc->pause != 0)) { + if (bio_op(bio) == REQ_OP_WRITE) { + dm_iot_io_begin(&wc->iot, 1); + bio->bi_private = (void *)2; + } } bio_set_dev(bio, wc->dev->bdev); wc_unlock(wc); @@ -1837,10 +1843,19 @@ static void writecache_writeback(struct work_struct *work) dm_kcopyd_client_flush(wc->dm_kcopyd); } - if (!wc->writeback_all && !dm_suspended(wc->ti)) { - while (!dm_iot_idle_for(&wc->iot, HZ)) { - cond_resched(); - msleep(1000); + if (likely(wc->pause != 0)) { + while (1) { + unsigned long idle; + if (unlikely(wc->cleaner) || unlikely(wc->writeback_all) || + unlikely(dm_suspended(wc->ti))) + break; + idle = dm_iot_idle_time(&wc->iot); + if (idle >= wc->pause) + break; + idle = wc->pause - idle; + if (idle > HZ) + idle = HZ; + schedule_timeout_idle(idle); } } @@ -2113,7 +2128,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) struct wc_memory_superblock s; static struct dm_arg _args[] = { - {0, 17, "Invalid number of feature args"}, + {0, 18, "Invalid number of feature args"}, }; as.argc = argc; @@ -2206,6 +2221,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } } else { + wc->pause = PAUSE_WRITEBACK; r = mempool_init_kmalloc_pool(&wc->copy_pool, 1, sizeof(struct copy_struct)); if (r) { ti->error = "Could not allocate mempool"; @@ -2344,6 +2360,18 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) } else goto invalid_optional; } else if (!strcasecmp(string, "metadata_only")) { wc->metadata_only = true; + } else if (!strcasecmp(string, "pause_writeback") && opt_params >= 1) { + unsigned pause_msecs; + if (WC_MODE_PMEM(wc)) + goto invalid_optional; + string = dm_shift_arg(&as), opt_params--; + if (sscanf(string, "%u%c", &pause_msecs, &dummy) != 1) + goto invalid_optional; + if (pause_msecs > 60000) + goto invalid_optional; + wc->pause = msecs_to_jiffies(pause_msecs); + wc->pause_set = true; + wc->pause_value = pause_msecs; } else { invalid_optional: r = -EINVAL; @@ -2569,6 +2597,8 @@ static void writecache_status(struct dm_target *ti, status_type_t type, extra_args++; if (wc->metadata_only) extra_args++; + if (wc->pause_set) + extra_args += 2; DMEMIT("%u", extra_args); if (wc->start_sector_set) @@ -2591,6 +2621,8 @@ static void writecache_status(struct dm_target *ti, status_type_t type, DMEMIT(" %sfua", wc->writeback_fua ? "" : "no"); if (wc->metadata_only) DMEMIT(" metadata_only"); + if (wc->pause_set) + DMEMIT(" pause_writeback %u", wc->pause_value); break; } } -- 2.7.4