ceph: update the auth cap when the async create req is forwarded
authorXiubo Li <xiubli@redhat.com>
Fri, 10 Jun 2022 01:53:21 +0000 (09:53 +0800)
committerIlya Dryomov <idryomov@gmail.com>
Tue, 2 Aug 2022 22:54:12 +0000 (00:54 +0200)
For async create we will always try to choose the auth MDS of frag
the dentry belonged to of the parent directory to send the request
and ususally this works fine, but if the MDS migrated the directory
to another MDS before it could be handled the request will be
forwarded. And then the auth cap will be changed.

We need to update the auth cap in this case before the request is
forwarded.

Link: https://tracker.ceph.com/issues/55857
Signed-off-by: Xiubo Li <xiubli@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
fs/ceph/file.c
fs/ceph/mds_client.c
fs/ceph/super.h

index fefa6de..cec1111 100644 (file)
@@ -612,6 +612,7 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
        struct ceph_mds_reply_inode in = { };
        struct ceph_mds_reply_info_in iinfo = { .in = &in };
        struct ceph_inode_info *ci = ceph_inode(dir);
+       struct ceph_dentry_info *di = ceph_dentry(dentry);
        struct inode *inode;
        struct timespec64 now;
        struct ceph_string *pool_ns;
@@ -714,6 +715,12 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
                file->f_mode |= FMODE_CREATED;
                ret = finish_open(file, dentry, ceph_open);
        }
+
+       spin_lock(&dentry->d_lock);
+       di->flags &= ~CEPH_DENTRY_ASYNC_CREATE;
+       wake_up_bit(&di->flags, CEPH_DENTRY_ASYNC_CREATE_BIT);
+       spin_unlock(&dentry->d_lock);
+
        return ret;
 }
 
@@ -790,9 +797,16 @@ retry:
                    (req->r_dir_caps =
                      try_prep_async_create(dir, dentry, &lo,
                                            &req->r_deleg_ino))) {
+                       struct ceph_dentry_info *di = ceph_dentry(dentry);
+
                        set_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags);
                        req->r_args.open.flags |= cpu_to_le32(CEPH_O_EXCL);
                        req->r_callback = ceph_async_create_cb;
+
+                       spin_lock(&dentry->d_lock);
+                       di->flags |= CEPH_DENTRY_ASYNC_CREATE;
+                       spin_unlock(&dentry->d_lock);
+
                        err = ceph_mdsc_submit_request(mdsc, dir, req);
                        if (!err) {
                                err = ceph_finish_async_create(dir, dentry,
index 9cfa7b7..80f8b9e 100644 (file)
@@ -2960,6 +2960,64 @@ static void __do_request(struct ceph_mds_client *mdsc,
        if (req->r_request_started == 0)   /* note request start time */
                req->r_request_started = jiffies;
 
+       /*
+        * For async create we will choose the auth MDS of frag in parent
+        * directory to send the request and ususally this works fine, but
+        * if the migrated the dirtory to another MDS before it could handle
+        * it the request will be forwarded.
+        *
+        * And then the auth cap will be changed.
+        */
+       if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags) && req->r_num_fwd) {
+               struct ceph_dentry_info *di = ceph_dentry(req->r_dentry);
+               struct ceph_inode_info *ci;
+               struct ceph_cap *cap;
+
+               /*
+                * The request maybe handled very fast and the new inode
+                * hasn't been linked to the dentry yet. We need to wait
+                * for the ceph_finish_async_create(), which shouldn't be
+                * stuck too long or fail in thoery, to finish when forwarding
+                * the request.
+                */
+               if (!d_inode(req->r_dentry)) {
+                       err = wait_on_bit(&di->flags, CEPH_DENTRY_ASYNC_CREATE_BIT,
+                                         TASK_KILLABLE);
+                       if (err) {
+                               mutex_lock(&req->r_fill_mutex);
+                               set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
+                               mutex_unlock(&req->r_fill_mutex);
+                               goto out_session;
+                       }
+               }
+
+               ci = ceph_inode(d_inode(req->r_dentry));
+
+               spin_lock(&ci->i_ceph_lock);
+               cap = ci->i_auth_cap;
+               if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE && mds != cap->mds) {
+                       dout("do_request session changed for auth cap %d -> %d\n",
+                            cap->session->s_mds, session->s_mds);
+
+                       /* Remove the auth cap from old session */
+                       spin_lock(&cap->session->s_cap_lock);
+                       cap->session->s_nr_caps--;
+                       list_del_init(&cap->session_caps);
+                       spin_unlock(&cap->session->s_cap_lock);
+
+                       /* Add the auth cap to the new session */
+                       cap->mds = mds;
+                       cap->session = session;
+                       spin_lock(&session->s_cap_lock);
+                       session->s_nr_caps++;
+                       list_add_tail(&cap->session_caps, &session->s_caps);
+                       spin_unlock(&session->s_cap_lock);
+
+                       change_auth_cap_ses(ci, session);
+               }
+               spin_unlock(&ci->i_ceph_lock);
+       }
+
        err = __send_request(session, req, false);
 
 out_session:
index 3c940a8..3b146c0 100644 (file)
@@ -302,6 +302,8 @@ struct ceph_dentry_info {
 #define CEPH_DENTRY_PRIMARY_LINK       (1 << 3)
 #define CEPH_DENTRY_ASYNC_UNLINK_BIT   (4)
 #define CEPH_DENTRY_ASYNC_UNLINK       (1 << CEPH_DENTRY_ASYNC_UNLINK_BIT)
+#define CEPH_DENTRY_ASYNC_CREATE_BIT   (5)
+#define CEPH_DENTRY_ASYNC_CREATE       (1 << CEPH_DENTRY_ASYNC_CREATE_BIT)
 
 struct ceph_inode_xattrs_info {
        /*