From 685f9a5d14194fc35db73e5e7370740ccc14b64a Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 9 Nov 2009 12:05:48 -0800 Subject: [PATCH] ceph: do not confuse stale and dead (unreconnected) caps We were using the cap_gen to track both stale caps (caps that timed out due to temporarily losing touch with the mds) and dead caps that did not reconnect after an MDS failure. Introduce a recon_gen counter to track reconnections to restarted MDSs and kill dead caps based on that instead. Rename gen to cap_gen while we're at it to make it more clear which is which. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 20 +++++++++++++++----- fs/ceph/mds_client.c | 9 ++++++--- fs/ceph/mds_client.h | 2 ++ fs/ceph/super.h | 4 +++- 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 8b863db..775e6f6 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -609,7 +609,8 @@ retry: cap->seq = seq; cap->issue_seq = seq; cap->mseq = mseq; - cap->gen = session->s_cap_gen; + cap->cap_gen = session->s_cap_gen; + cap->recon_gen = session->s_recon_gen; if (fmode >= 0) __ceph_get_fmode(ci, fmode); @@ -626,17 +627,25 @@ retry: static int __cap_is_valid(struct ceph_cap *cap) { unsigned long ttl; - u32 gen; + u32 gen, recon_gen; spin_lock(&cap->session->s_cap_lock); gen = cap->session->s_cap_gen; + recon_gen = cap->session->s_recon_gen; ttl = cap->session->s_cap_ttl; spin_unlock(&cap->session->s_cap_lock); - if (cap->gen < gen || time_after_eq(jiffies, ttl)) { + if (cap->recon_gen != recon_gen) { + dout("__cap_is_valid %p cap %p issued %s " + "but DEAD (recon_gen %u vs %u)\n", &cap->ci->vfs_inode, + cap, ceph_cap_string(cap->issued), cap->recon_gen, + recon_gen); + return 0; + } + if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) { dout("__cap_is_valid %p cap %p issued %s " "but STALE (gen %u vs %u)\n", &cap->ci->vfs_inode, - cap, ceph_cap_string(cap->issued), cap->gen, gen); + cap, ceph_cap_string(cap->issued), cap->cap_gen, gen); return 0; } @@ -2203,7 +2212,8 @@ restart: issued = __ceph_caps_issued(ci, &implemented); issued |= implemented | __ceph_caps_dirty(ci); - cap->gen = session->s_cap_gen; + cap->cap_gen = session->s_cap_gen; + cap->recon_gen = session->s_recon_gen; __check_cap_issue(ci, cap, newcaps); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 210cb66..828417a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -329,6 +329,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, ceph_con_open(&s->s_con, ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); spin_lock_init(&s->s_cap_lock); + s->s_recon_gen = 0; s->s_cap_gen = 0; s->s_cap_ttl = 0; s->s_renew_requested = 0; @@ -738,10 +739,11 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap, struct ceph_mds_session *session = arg; spin_lock(&inode->i_lock); - if (cap->gen != session->s_cap_gen) { + if (cap->recon_gen != session->s_recon_gen) { pr_err("failed reconnect %p %llx.%llx cap %p " - "(gen %d < session %d)\n", inode, ceph_vinop(inode), - cap, cap->gen, session->s_cap_gen); + "(recon_gen %d < session %d)\n", inode, + ceph_vinop(inode), cap, + cap->recon_gen, session->s_recon_gen); __ceph_remove_cap(cap, NULL); } wake_up(&ceph_inode(inode)->i_cap_wq); @@ -2050,6 +2052,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) session->s_state = CEPH_MDS_SESSION_RECONNECTING; session->s_seq = 0; + session->s_recon_gen++; ceph_con_open(&session->s_con, ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index f566e9c..c0846b1 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -98,6 +98,8 @@ struct ceph_mds_session { u64 s_seq; /* incoming msg seq # */ struct mutex s_mutex; /* serialize session messages */ + int s_recon_gen; /* inc on reconnect to recovered mds */ + struct ceph_connection s_con; /* protected by s_cap_lock */ diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 05947b9..2579355 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -169,7 +169,9 @@ struct ceph_cap { int issued; /* latest, from the mds */ int implemented; /* implemented superset of issued (for revocation) */ int mds_wanted; - u32 seq, issue_seq, mseq, gen; + u32 seq, issue_seq, mseq; + u32 cap_gen; /* active/stale cycle */ + u32 recon_gen; /* mds restart reconnect cycle */ unsigned long last_used; struct list_head caps_item; }; -- 2.7.4