drbd: fix resend/resubmit of frozen IO

author Lars Ellenberg <lars.ellenberg@linbit.com>

Mon, 7 May 2012 09:53:08 +0000 (11:53 +0200)

committer Philipp Reisner <philipp.reisner@linbit.com>

Thu, 8 Nov 2012 15:58:27 +0000 (16:58 +0100)
author Lars Ellenberg <lars.ellenberg@linbit.com>
Mon, 7 May 2012 09:53:08 +0000 (11:53 +0200)
committer Philipp Reisner <philipp.reisner@linbit.com>
Thu, 8 Nov 2012 15:58:27 +0000 (16:58 +0100)
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h

index 4fad3f5..976e78c 100644 (file)
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -859,6 +859,7 @@ struct drbd_tconn {                 /* is a resource from the config file */
         unsigned int epochs;
         enum write_ordering_e write_ordering;
  
+       unsigned long last_reconnect_jif;
         struct drbd_thread receiver;
         struct drbd_thread worker;
         struct drbd_thread asender;
@@ -881,6 +882,7 @@ struct drbd_conf {
         struct block_device *this_bdev;
         struct gendisk      *vdisk;
  
+       unsigned long last_reattach_jif;
         struct drbd_work  resync_work,
                           unplug_work,
                           go_diskless,
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c

index c76402c..44a7d6b 100644 (file)
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1171,12 +1171,14 @@ void request_timer_fn(unsigned long data)
         struct list_head *le;
         struct net_conf *nc;
         unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
+       unsigned long now;
  
         rcu_read_lock();
         nc = rcu_dereference(tconn->net_conf);
-       ent = nc ? nc->timeout * HZ/10 * nc->ko_count : 0;
+       if (nc && mdev->state.conn >= C_WF_REPORT_PARAMS)
+               ent = nc->timeout * HZ/10 * nc->ko_count;
  
-       if (get_ldev(mdev)) {
+       if (get_ldev(mdev)) { /* implicit state.disk >= D_INCONSISTENT */
                 dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10;
                 put_ldev(mdev);
         }
@@ -1184,32 +1186,51 @@ void request_timer_fn(unsigned long data)
  
         et = min_not_zero(dt, ent);
  
-       if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED))
+       if (!et)
                 return; /* Recurring timer stopped */
  
+       now = jiffies;
+
         spin_lock_irq(&tconn->req_lock);
         le = &tconn->oldest_tle->requests;
         if (list_empty(le)) {
                 spin_unlock_irq(&tconn->req_lock);
-               mod_timer(&mdev->request_timer, jiffies + et);
+               mod_timer(&mdev->request_timer, now + et);
                 return;
         }
  
         le = le->prev;
         req = list_entry(le, struct drbd_request, tl_requests);
-       if (ent && req->rq_state & RQ_NET_PENDING) {
-               if (time_is_before_eq_jiffies(req->start_time + ent)) {
-                       dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
-                       _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
-               }
+
+       /* The request is considered timed out, if
+        * - we have some effective timeout from the configuration,
+        *   with above state restrictions applied,
+        * - the oldest request is waiting for a response from the network
+        *   resp. the local disk,
+        * - the oldest request is in fact older than the effective timeout,
+        * - the connection was established (resp. disk was attached)
+        *   for longer than the timeout already.
+        * Note that for 32bit jiffies and very stable connections/disks,
+        * we may have a wrap around, which is catched by
+        *   !time_in_range(now, last_..._jif, last_..._jif + timeout).
+        *
+        * Side effect: once per 32bit wrap-around interval, which means every
+        * ~198 days with 250 HZ, we have a window where the timeout would need
+        * to expire twice (worst case) to become effective. Good enough.
+        */
+       if (ent && req->rq_state & RQ_NET_PENDING &&
+                time_after(now, req->start_time + ent) &&
+               !time_in_range(now, tconn->last_reconnect_jif, tconn->last_reconnect_jif + ent)) {
+               dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
+               _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
         }
-       if (dt && req->rq_state & RQ_LOCAL_PENDING && req->w.mdev == mdev) {
-               if (time_is_before_eq_jiffies(req->start_time + dt)) {
-                       dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
-                       __drbd_chk_io_error(mdev, 1);
-               }
+       if (dt && req->rq_state & RQ_LOCAL_PENDING && req->w.mdev == mdev &&
+                time_after(now, req->start_time + dt) &&
+               !time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) {
+               dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
+               __drbd_chk_io_error(mdev, 1);
         }
-       nt = (time_is_before_eq_jiffies(req->start_time + et) ? jiffies : req->start_time) + et;
+       nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;
         spin_unlock_irq(&tconn->req_lock);
         mod_timer(&mdev->request_timer, nt);
  }
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c

index c4554b1..2673049 100644 (file)
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -1075,6 +1075,13 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
         if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
                 drbd_resume_al(mdev);
  
+       /* remember last attach time so request_timer_fn() won't
+        * kill newly established sessions while we are still trying to thaw
+        * previously frozen IO */
+       if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
+           ns.disk > D_NEGOTIATING)
+               mdev->last_reattach_jif = jiffies;
+
         ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
         if (ascw) {
                 ascw->os = os;
@@ -1609,8 +1616,15 @@ conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state
         enum drbd_state_rv rv;
         int vnr, number_of_volumes = 0;
  
-       if (mask.conn == C_MASK)
+       if (mask.conn == C_MASK) {
+               /* remember last connect time so request_timer_fn() won't
+                * kill newly established sessions while we are still trying to thaw
+                * previously frozen IO */
+               if (tconn->cstate != C_WF_REPORT_PARAMS && val.conn == C_WF_REPORT_PARAMS)
+                       tconn->last_reconnect_jif = jiffies;
+
                 tconn->cstate = val.conn;
+       }
  
         rcu_read_lock();
         idr_for_each_entry(&tconn->volumes, mdev, vnr) {
author	Lars Ellenberg <lars.ellenberg@linbit.com>
	Mon, 7 May 2012 09:53:08 +0000 (11:53 +0200)
committer	Philipp Reisner <philipp.reisner@linbit.com>
	Thu, 8 Nov 2012 15:58:27 +0000 (16:58 +0100)
drivers/block/drbd/drbd_int.h		patch \| blob \| history
drivers/block/drbd/drbd_req.c		patch \| blob \| history
drivers/block/drbd/drbd_state.c		patch \| blob \| history