raid1: Rewrite the implementation of iobarrier.
[platform/adaptation/renesas_rcar/renesas_kernel.git] / drivers / md / raid1.c
index 78da339..d9ee4ed 100644 (file)
@@ -66,7 +66,8 @@
  */
 static int max_queued_requests = 1024;
 
-static void allow_barrier(struct r1conf *conf);
+static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
+                         sector_t bi_sector);
 static void lower_barrier(struct r1conf *conf);
 
 static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
@@ -227,6 +228,8 @@ static void call_bio_endio(struct r1bio *r1_bio)
        struct bio *bio = r1_bio->master_bio;
        int done;
        struct r1conf *conf = r1_bio->mddev->private;
+       sector_t start_next_window = r1_bio->start_next_window;
+       sector_t bi_sector = bio->bi_sector;
 
        if (bio->bi_phys_segments) {
                unsigned long flags;
@@ -234,6 +237,11 @@ static void call_bio_endio(struct r1bio *r1_bio)
                bio->bi_phys_segments--;
                done = (bio->bi_phys_segments == 0);
                spin_unlock_irqrestore(&conf->device_lock, flags);
+               /*
+                * make_request() might be waiting for
+                * bi_phys_segments to decrease
+                */
+               wake_up(&conf->wait_barrier);
        } else
                done = 1;
 
@@ -245,7 +253,7 @@ static void call_bio_endio(struct r1bio *r1_bio)
                 * Wake up any possible resync thread that waits for the device
                 * to go idle.
                 */
-               allow_barrier(conf);
+               allow_barrier(conf, start_next_window, bi_sector);
        }
 }
 
@@ -827,10 +835,19 @@ static void raise_barrier(struct r1conf *conf)
        /* block any new IO from starting */
        conf->barrier++;
 
-       /* Now wait for all pending IO to complete */
+       /* For these conditions we must wait:
+        * A: while the array is in frozen state
+        * B: while barrier >= RESYNC_DEPTH, meaning resync reach
+        *    the max count which allowed.
+        * C: next_resync + RESYNC_SECTORS > start_next_window, meaning
+        *    next resync will reach to the window which normal bios are
+        *    handling.
+        */
        wait_event_lock_irq(conf->wait_barrier,
                            !conf->array_frozen &&
-                           !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
+                           conf->barrier < RESYNC_DEPTH &&
+                           (conf->start_next_window >=
+                            conf->next_resync + RESYNC_SECTORS),
                            conf->resync_lock);
 
        spin_unlock_irq(&conf->resync_lock);
@@ -846,10 +863,33 @@ static void lower_barrier(struct r1conf *conf)
        wake_up(&conf->wait_barrier);
 }
 
-static void wait_barrier(struct r1conf *conf)
+static bool need_to_wait_for_sync(struct r1conf *conf, struct bio *bio)
+{
+       bool wait = false;
+
+       if (conf->array_frozen || !bio)
+               wait = true;
+       else if (conf->barrier && bio_data_dir(bio) == WRITE) {
+               if (conf->next_resync < RESYNC_WINDOW_SECTORS)
+                       wait = true;
+               else if ((conf->next_resync - RESYNC_WINDOW_SECTORS
+                               >= bio_end_sector(bio)) ||
+                        (conf->next_resync + NEXT_NORMALIO_DISTANCE
+                               <= bio->bi_sector))
+                       wait = false;
+               else
+                       wait = true;
+       }
+
+       return wait;
+}
+
+static sector_t wait_barrier(struct r1conf *conf, struct bio *bio)
 {
+       sector_t sector = 0;
+
        spin_lock_irq(&conf->resync_lock);
-       if (conf->barrier) {
+       if (need_to_wait_for_sync(conf, bio)) {
                conf->nr_waiting++;
                /* Wait for the barrier to drop.
                 * However if there are already pending
@@ -863,21 +903,65 @@ static void wait_barrier(struct r1conf *conf)
                wait_event_lock_irq(conf->wait_barrier,
                                    !conf->array_frozen &&
                                    (!conf->barrier ||
-                                   (conf->nr_pending &&
+                                   ((conf->start_next_window <
+                                     conf->next_resync + RESYNC_SECTORS) &&
                                     current->bio_list &&
                                     !bio_list_empty(current->bio_list))),
                                    conf->resync_lock);
                conf->nr_waiting--;
        }
+
+       if (bio && bio_data_dir(bio) == WRITE) {
+               if (conf->next_resync + NEXT_NORMALIO_DISTANCE
+                   <= bio->bi_sector) {
+                       if (conf->start_next_window == MaxSector)
+                               conf->start_next_window =
+                                       conf->next_resync +
+                                       NEXT_NORMALIO_DISTANCE;
+
+                       if ((conf->start_next_window + NEXT_NORMALIO_DISTANCE)
+                           <= bio->bi_sector)
+                               conf->next_window_requests++;
+                       else
+                               conf->current_window_requests++;
+               }
+               if (bio->bi_sector >= conf->start_next_window)
+                       sector = conf->start_next_window;
+       }
+
        conf->nr_pending++;
        spin_unlock_irq(&conf->resync_lock);
+       return sector;
 }
 
-static void allow_barrier(struct r1conf *conf)
+static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
+                         sector_t bi_sector)
 {
        unsigned long flags;
+
        spin_lock_irqsave(&conf->resync_lock, flags);
        conf->nr_pending--;
+       if (start_next_window) {
+               if (start_next_window == conf->start_next_window) {
+                       if (conf->start_next_window + NEXT_NORMALIO_DISTANCE
+                           <= bi_sector)
+                               conf->next_window_requests--;
+                       else
+                               conf->current_window_requests--;
+               } else
+                       conf->current_window_requests--;
+
+               if (!conf->current_window_requests) {
+                       if (conf->next_window_requests) {
+                               conf->current_window_requests =
+                                       conf->next_window_requests;
+                               conf->next_window_requests = 0;
+                               conf->start_next_window +=
+                                       NEXT_NORMALIO_DISTANCE;
+                       } else
+                               conf->start_next_window = MaxSector;
+               }
+       }
        spin_unlock_irqrestore(&conf->resync_lock, flags);
        wake_up(&conf->wait_barrier);
 }
@@ -1012,6 +1096,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
        int first_clone;
        int sectors_handled;
        int max_sectors;
+       sector_t start_next_window;
 
        /*
         * Register the new request and wait if the reconstruction
@@ -1041,7 +1126,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
                finish_wait(&conf->wait_barrier, &w);
        }
 
-       wait_barrier(conf);
+       start_next_window = wait_barrier(conf, bio);
 
        bitmap = mddev->bitmap;
 
@@ -1162,6 +1247,7 @@ read_again:
 
        disks = conf->raid_disks * 2;
  retry_write:
+       r1_bio->start_next_window = start_next_window;
        blocked_rdev = NULL;
        rcu_read_lock();
        max_sectors = r1_bio->sectors;
@@ -1230,14 +1316,24 @@ read_again:
        if (unlikely(blocked_rdev)) {
                /* Wait for this device to become unblocked */
                int j;
+               sector_t old = start_next_window;
 
                for (j = 0; j < i; j++)
                        if (r1_bio->bios[j])
                                rdev_dec_pending(conf->mirrors[j].rdev, mddev);
                r1_bio->state = 0;
-               allow_barrier(conf);
+               allow_barrier(conf, start_next_window, bio->bi_sector);
                md_wait_for_blocked_rdev(blocked_rdev, mddev);
-               wait_barrier(conf);
+               start_next_window = wait_barrier(conf, bio);
+               /*
+                * We must make sure the multi r1bios of bio have
+                * the same value of bi_phys_segments
+                */
+               if (bio->bi_phys_segments && old &&
+                   old != start_next_window)
+                       /* Wait for the former r1bio(s) to complete */
+                       wait_event(conf->wait_barrier,
+                                  bio->bi_phys_segments == 1);
                goto retry_write;
        }
 
@@ -1437,11 +1533,14 @@ static void print_conf(struct r1conf *conf)
 
 static void close_sync(struct r1conf *conf)
 {
-       wait_barrier(conf);
-       allow_barrier(conf);
+       wait_barrier(conf, NULL);
+       allow_barrier(conf, 0, 0);
 
        mempool_destroy(conf->r1buf_pool);
        conf->r1buf_pool = NULL;
+
+       conf->next_resync = 0;
+       conf->start_next_window = MaxSector;
 }
 
 static int raid1_spare_active(struct mddev *mddev)
@@ -2713,6 +2812,9 @@ static struct r1conf *setup_conf(struct mddev *mddev)
        conf->pending_count = 0;
        conf->recovery_disabled = mddev->recovery_disabled - 1;
 
+       conf->start_next_window = MaxSector;
+       conf->current_window_requests = conf->next_window_requests = 0;
+
        err = -EIO;
        for (i = 0; i < conf->raid_disks * 2; i++) {