From e80a52d14f868059e8ec790c9fae88cdb8a1df98 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 25 Feb 2010 12:40:45 -0800 Subject: [PATCH] ceph: fix connection fault STANDBY check Move any out_sent messages to out_queue _before_ checking if out_queue is empty and going to STANDBY, or else we may drop something that was never acked. And clean up the code a bit (less goto). Signed-off-by: Sage Weil --- fs/ceph/messenger.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 9ea7b76..0ddc2c7 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -1853,32 +1853,27 @@ static void ceph_fault(struct ceph_connection *con) con->in_msg = NULL; } + /* Requeue anything that hasn't been acked */ + list_splice_init(&con->out_sent, &con->out_queue); /* If there are no messages in the queue, place the connection * in a STANDBY state (i.e., don't try to reconnect just yet). */ if (list_empty(&con->out_queue) && !con->out_keepalive_pending) { dout("fault setting STANDBY\n"); set_bit(STANDBY, &con->state); - mutex_unlock(&con->mutex); - goto out; + } else { + /* retry after a delay. */ + if (con->delay == 0) + con->delay = BASE_DELAY_INTERVAL; + else if (con->delay < MAX_DELAY_INTERVAL) + con->delay *= 2; + dout("fault queueing %p delay %lu\n", con, con->delay); + con->ops->get(con); + if (queue_delayed_work(ceph_msgr_wq, &con->work, + round_jiffies_relative(con->delay)) == 0) + con->ops->put(con); } - /* Requeue anything that hasn't been acked, and retry after a - * delay. */ - list_splice_init(&con->out_sent, &con->out_queue); - - if (con->delay == 0) - con->delay = BASE_DELAY_INTERVAL; - else if (con->delay < MAX_DELAY_INTERVAL) - con->delay *= 2; - - /* explicitly schedule work to try to reconnect again later. */ - dout("fault queueing %p delay %lu\n", con, con->delay); - con->ops->get(con); - if (queue_delayed_work(ceph_msgr_wq, &con->work, - round_jiffies_relative(con->delay)) == 0) - con->ops->put(con); - out_unlock: mutex_unlock(&con->mutex); out: -- 2.7.4