dm kcopyd: introduce configurable throttling
[platform/adaptation/renesas_rcar/renesas_kernel.git] / drivers / md / dm-kcopyd.c
index 68c0267..d581fe5 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/vmalloc.h>
 #include <linux/workqueue.h>
 #include <linux/mutex.h>
+#include <linux/delay.h>
 #include <linux/device-mapper.h>
 #include <linux/dm-kcopyd.h>
 
@@ -51,6 +52,8 @@ struct dm_kcopyd_client {
        struct workqueue_struct *kcopyd_wq;
        struct work_struct kcopyd_work;
 
+       struct dm_kcopyd_throttle *throttle;
+
 /*
  * We maintain three lists of jobs:
  *
@@ -68,6 +71,117 @@ struct dm_kcopyd_client {
 
 static struct page_list zero_page_list;
 
+static DEFINE_SPINLOCK(throttle_spinlock);
+
+/*
+ * IO/IDLE accounting slowly decays after (1 << ACCOUNT_INTERVAL_SHIFT) period.
+ * When total_period >= (1 << ACCOUNT_INTERVAL_SHIFT) the counters are divided
+ * by 2.
+ */
+#define ACCOUNT_INTERVAL_SHIFT         SHIFT_HZ
+
+/*
+ * Sleep this number of milliseconds.
+ *
+ * The value was decided experimentally.
+ * Smaller values seem to cause an increased copy rate above the limit.
+ * The reason for this is unknown but possibly due to jiffies rounding errors
+ * or read/write cache inside the disk.
+ */
+#define SLEEP_MSEC                     100
+
+/*
+ * Maximum number of sleep events. There is a theoretical livelock if more
+ * kcopyd clients do work simultaneously which this limit avoids.
+ */
+#define MAX_SLEEPS                     10
+
+static void io_job_start(struct dm_kcopyd_throttle *t)
+{
+       unsigned throttle, now, difference;
+       int slept = 0, skew;
+
+       if (unlikely(!t))
+               return;
+
+try_again:
+       spin_lock_irq(&throttle_spinlock);
+
+       throttle = ACCESS_ONCE(t->throttle);
+
+       if (likely(throttle >= 100))
+               goto skip_limit;
+
+       now = jiffies;
+       difference = now - t->last_jiffies;
+       t->last_jiffies = now;
+       if (t->num_io_jobs)
+               t->io_period += difference;
+       t->total_period += difference;
+
+       /*
+        * Maintain sane values if we got a temporary overflow.
+        */
+       if (unlikely(t->io_period > t->total_period))
+               t->io_period = t->total_period;
+
+       if (unlikely(t->total_period >= (1 << ACCOUNT_INTERVAL_SHIFT))) {
+               int shift = fls(t->total_period >> ACCOUNT_INTERVAL_SHIFT);
+               t->total_period >>= shift;
+               t->io_period >>= shift;
+       }
+
+       skew = t->io_period - throttle * t->total_period / 100;
+
+       if (unlikely(skew > 0) && slept < MAX_SLEEPS) {
+               slept++;
+               spin_unlock_irq(&throttle_spinlock);
+               msleep(SLEEP_MSEC);
+               goto try_again;
+       }
+
+skip_limit:
+       t->num_io_jobs++;
+
+       spin_unlock_irq(&throttle_spinlock);
+}
+
+static void io_job_finish(struct dm_kcopyd_throttle *t)
+{
+       unsigned long flags;
+
+       if (unlikely(!t))
+               return;
+
+       spin_lock_irqsave(&throttle_spinlock, flags);
+
+       t->num_io_jobs--;
+
+       if (likely(ACCESS_ONCE(t->throttle) >= 100))
+               goto skip_limit;
+
+       if (!t->num_io_jobs) {
+               unsigned now, difference;
+
+               now = jiffies;
+               difference = now - t->last_jiffies;
+               t->last_jiffies = now;
+
+               t->io_period += difference;
+               t->total_period += difference;
+
+               /*
+                * Maintain sane values if we got a temporary overflow.
+                */
+               if (unlikely(t->io_period > t->total_period))
+                       t->io_period = t->total_period;
+       }
+
+skip_limit:
+       spin_unlock_irqrestore(&throttle_spinlock, flags);
+}
+
+
 static void wake(struct dm_kcopyd_client *kc)
 {
        queue_work(kc->kcopyd_wq, &kc->kcopyd_work);
@@ -348,6 +462,8 @@ static void complete_io(unsigned long error, void *context)
        struct kcopyd_job *job = (struct kcopyd_job *) context;
        struct dm_kcopyd_client *kc = job->kc;
 
+       io_job_finish(kc->throttle);
+
        if (error) {
                if (job->rw & WRITE)
                        job->write_err |= error;
@@ -389,6 +505,8 @@ static int run_io_job(struct kcopyd_job *job)
                .client = job->kc->io_client,
        };
 
+       io_job_start(job->kc->throttle);
+
        if (job->rw == READ)
                r = dm_io(&io_req, 1, &job->source, NULL);
        else
@@ -695,7 +813,7 @@ int kcopyd_cancel(struct kcopyd_job *job, int block)
 /*-----------------------------------------------------------------
  * Client setup
  *---------------------------------------------------------------*/
-struct dm_kcopyd_client *dm_kcopyd_client_create(void)
+struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle)
 {
        int r = -ENOMEM;
        struct dm_kcopyd_client *kc;
@@ -708,6 +826,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(void)
        INIT_LIST_HEAD(&kc->complete_jobs);
        INIT_LIST_HEAD(&kc->io_jobs);
        INIT_LIST_HEAD(&kc->pages_jobs);
+       kc->throttle = throttle;
 
        kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache);
        if (!kc->job_pool)