drbd: improve throttling decisions of background resynchronisation

author Lars Ellenberg <lars.ellenberg@linbit.com>

Fri, 20 Dec 2013 10:22:13 +0000 (11:22 +0100)

committer Philipp Reisner <philipp.reisner@linbit.com>

Thu, 10 Jul 2014 16:35:13 +0000 (18:35 +0200)
author Lars Ellenberg <lars.ellenberg@linbit.com>
Fri, 20 Dec 2013 10:22:13 +0000 (11:22 +0100)
committer Philipp Reisner <philipp.reisner@linbit.com>
Thu, 10 Jul 2014 16:35:13 +0000 (18:35 +0200)
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c

index d7e8066..6ce5c76 100644 (file)
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -991,6 +991,15 @@ int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector)
         struct lc_element *e;
         struct bm_extent *bm_ext;
         int i;
+       bool throttle = drbd_rs_should_slow_down(device, sector, true);
+
+       /* If we need to throttle, a half-locked (only marked BME_NO_WRITES,
+        * not yet BME_LOCKED) extent needs to be kicked out explicitly if we
+        * need to throttle. There is at most one such half-locked extent,
+        * which is remembered in resync_wenr. */
+
+       if (throttle && device->resync_wenr != enr)
+               return -EAGAIN;
  
         spin_lock_irq(&device->al_lock);
         if (device->resync_wenr != LC_FREE && device->resync_wenr != enr) {
@@ -1014,8 +1023,10 @@ int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector)
                         D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags));
                         clear_bit(BME_NO_WRITES, &bm_ext->flags);
                         device->resync_wenr = LC_FREE;
-                       if (lc_put(device->resync, &bm_ext->lce) == 0)
+                       if (lc_put(device->resync, &bm_ext->lce) == 0) {
+                               bm_ext->flags = 0;
                                 device->resync_locked--;
+                       }
                         wake_up(&device->al_wait);
                 } else {
                         drbd_alert(device, "LOGIC BUG\n");
@@ -1077,8 +1088,20 @@ proceed:
         return 0;
  
  try_again:
-       if (bm_ext)
-               device->resync_wenr = enr;
+       if (bm_ext) {
+               if (throttle) {
+                       D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags));
+                       D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags));
+                       clear_bit(BME_NO_WRITES, &bm_ext->flags);
+                       device->resync_wenr = LC_FREE;
+                       if (lc_put(device->resync, &bm_ext->lce) == 0) {
+                               bm_ext->flags = 0;
+                               device->resync_locked--;
+                       }
+                       wake_up(&device->al_wait);
+               } else
+                       device->resync_wenr = enr;
+       }
         spin_unlock_irq(&device->al_lock);
         return -EAGAIN;
  }
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h

index fa010ea..81f4af4 100644 (file)
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -797,6 +797,7 @@ struct drbd_device {
         unsigned int al_writ_cnt;
         unsigned int bm_writ_cnt;
         atomic_t ap_bio_cnt;     /* Requests we need to complete */
+       atomic_t ap_actlog_cnt;  /* Requests waiting for activity log */
         atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */
         atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
         atomic_t unacked_cnt;    /* Need to send replies for */
@@ -1454,7 +1455,8 @@ extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
  extern int drbd_receiver(struct drbd_thread *thi);
  extern int drbd_asender(struct drbd_thread *thi);
  extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
-extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector);
+extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
+               bool throttle_if_app_is_waiting);
  extern int drbd_submit_peer_request(struct drbd_device *,
                                     struct drbd_peer_request *, const unsigned,
                                     const int);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c

index 5886596..ad7c0e8 100644 (file)
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1909,6 +1909,7 @@ void drbd_init_set_defaults(struct drbd_device *device)
         drbd_set_defaults(device);
  
         atomic_set(&device->ap_bio_cnt, 0);
+       atomic_set(&device->ap_actlog_cnt, 0);
         atomic_set(&device->ap_pending_cnt, 0);
         atomic_set(&device->rs_pending_cnt, 0);
         atomic_set(&device->unacked_cnt, 0);
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c

index 9059d7b..06e6147 100644 (file)
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -335,6 +335,9 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
                         lc_seq_printf_stats(seq, device->act_log);
                         put_ldev(device);
                 }
+
+               if (proc_details >= 2)
+                       seq_printf(seq, "\tblocked on activity log: %d\n", atomic_read(&device->ap_actlog_cnt));
         }
         rcu_read_unlock();
  
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c

index 7a1078d..0d3cbd8 100644 (file)
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2417,13 +2417,14 @@ out_interrupted:
   * The current sync rate used here uses only the most recent two step marks,
   * to have a short time average so we can react faster.
   */
-bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
+bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
+               bool throttle_if_app_is_waiting)
  {
         struct lc_element *tmp;
-       bool throttle = true;
+       bool throttle = drbd_rs_c_min_rate_throttle(device);
  
-       if (!drbd_rs_c_min_rate_throttle(device))
-               return false;
+       if (!throttle || throttle_if_app_is_waiting)
+               return throttle;
  
         spin_lock_irq(&device->al_lock);
         tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
@@ -2431,7 +2432,8 @@ bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
                 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
                 if (test_bit(BME_PRIORITY, &bm_ext->flags))
                         throttle = false;
-               /* Do not slow down if app IO is already waiting for this extent */
+               /* Do not slow down if app IO is already waiting for this extent,
+                * and our progress is necessary for application IO to complete. */
         }
         spin_unlock_irq(&device->al_lock);
  
@@ -2456,7 +2458,9 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
         curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
                       (int)part_stat_read(&disk->part0, sectors[1]) -
                         atomic_read(&device->rs_sect_ev);
-       if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
+
+       if (atomic_read(&device->ap_actlog_cnt)
+           || !device->rs_last_events || curr_events - device->rs_last_events > 64) {
                 unsigned long rs_left;
                 int i;
  
@@ -2646,7 +2650,8 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
          * we would also throttle its application reads.
          * In that case, throttling is done on the SyncTarget only.
          */
-       if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
+       if (device->state.peer != R_PRIMARY
+       && drbd_rs_should_slow_down(device, sector, false))
                 schedule_timeout_uninterruptible(HZ/10);
         if (drbd_rs_begin_io(device, sector))
                 goto out_free_e;
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c

index 3f6a6ed..74ebef1 100644 (file)
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1218,6 +1218,7 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
         if (rw == WRITE && req->private_bio && req->i.size
         && !test_bit(AL_SUSPENDED, &device->flags)) {
                 if (!drbd_al_begin_io_fastpath(device, &req->i)) {
+                       atomic_inc(&device->ap_actlog_cnt);
                         drbd_queue_write(device, req);
                         return NULL;
                 }
@@ -1354,6 +1355,7 @@ static void submit_fast_path(struct drbd_device *device, struct list_head *incom
  
                         req->rq_state |= RQ_IN_ACT_LOG;
                         req->in_actlog_jif = jiffies;
+                       atomic_dec(&device->ap_actlog_cnt);
                 }
  
                 list_del_init(&req->tl_requests);
@@ -1439,6 +1441,7 @@ skip_fast_path:
                 list_for_each_entry_safe(req, tmp, &pending, tl_requests) {
                         req->rq_state |= RQ_IN_ACT_LOG;
                         req->in_actlog_jif = jiffies;
+                       atomic_dec(&device->ap_actlog_cnt);
                         list_del_init(&req->tl_requests);
                         drbd_send_and_submit(device, req);
                 }
@@ -1454,6 +1457,7 @@ skip_fast_path:
                         if (!was_cold) {
                                 req->rq_state |= RQ_IN_ACT_LOG;
                                 req->in_actlog_jif = jiffies;
+                               atomic_dec(&device->ap_actlog_cnt);
                                 /* Corresponding extent was hot after all? */
                                 drbd_send_and_submit(device, req);
                         } else {
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c

index 0ff8f46..48975a2 100644 (file)
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -395,9 +395,6 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector,
         if (!get_ldev(device))
                 return -EIO;
  
-       if (drbd_rs_should_slow_down(device, sector))
-               goto defer;
-
         /* GFP_TRY, because if there is no memory available right now, this may
          * be rescheduled for later. It is "only" background resync, after all. */
         peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
@@ -651,8 +648,7 @@ next_sector:
  
                 sector = BM_BIT_TO_SECT(bit);
  
-               if (drbd_rs_should_slow_down(device, sector) ||
-                   drbd_try_rs_begin_io(device, sector)) {
+               if (drbd_try_rs_begin_io(device, sector)) {
                         device->bm_resync_fo = bit;
                         goto requeue;
                 }
@@ -783,8 +779,7 @@ static int make_ov_request(struct drbd_device *device, int cancel)
  
                 size = BM_BLOCK_SIZE;
  
-               if (drbd_rs_should_slow_down(device, sector) ||
-                   drbd_try_rs_begin_io(device, sector)) {
+               if (drbd_try_rs_begin_io(device, sector)) {
                         device->ov_position = sector;
                         goto requeue;
                 }
author	Lars Ellenberg <lars.ellenberg@linbit.com>
	Fri, 20 Dec 2013 10:22:13 +0000 (11:22 +0100)
committer	Philipp Reisner <philipp.reisner@linbit.com>
	Thu, 10 Jul 2014 16:35:13 +0000 (18:35 +0200)
drivers/block/drbd/drbd_actlog.c		patch \| blob \| history
drivers/block/drbd/drbd_int.h		patch \| blob \| history
drivers/block/drbd/drbd_main.c		patch \| blob \| history
drivers/block/drbd/drbd_proc.c		patch \| blob \| history
drivers/block/drbd/drbd_receiver.c		patch \| blob \| history
drivers/block/drbd/drbd_req.c		patch \| blob \| history
drivers/block/drbd/drbd_worker.c		patch \| blob \| history