fs: dlm: make buffer handling per msg
authorAlexander Aring <aahringo@redhat.com>
Fri, 21 May 2021 19:08:42 +0000 (15:08 -0400)
committerDavid Teigland <teigland@redhat.com>
Tue, 25 May 2021 14:22:20 +0000 (09:22 -0500)
This patch makes the void pointer handle for lowcomms functionality per
message and not per page allocation entry. A refcount handling for the
handle was added to keep the message alive until the user doesn't need
it anymore.

There exists now a per message callback which will be called when
allocating a new buffer. This callback will be guaranteed to be called
according the order of the sending buffer, which can be used that the
caller increments a sequence number for the dlm message handle.

For transition process we cast the dlm_mhandle to dlm_msg and vice versa
until the midcomms layer will implement a specific dlm_mhandle structure.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
fs/dlm/dlm_internal.h
fs/dlm/lowcomms.c
fs/dlm/lowcomms.h
fs/dlm/midcomms.c
fs/dlm/rcom.c

index ae3fdf6..e8dc5f4 100644 (file)
@@ -57,6 +57,7 @@ struct dlm_header;
 struct dlm_message;
 struct dlm_rcom;
 struct dlm_mhandle;
+struct dlm_msg;
 
 #define log_print(fmt, args...) \
        printk(KERN_ERR "dlm: "fmt"\n" , ##args)
index 14ca3ed..d222e60 100644 (file)
@@ -119,8 +119,19 @@ struct writequeue_entry {
        int len;
        int end;
        int users;
-       int idx; /* get()/commit() idx exchange */
        struct connection *con;
+       struct list_head msgs;
+       struct kref ref;
+};
+
+struct dlm_msg {
+       struct writequeue_entry *entry;
+       void *ppc;
+       int len;
+       int idx; /* new()/commit() idx exchange */
+
+       struct list_head list;
+       struct kref ref;
 };
 
 struct dlm_node_addr {
@@ -1022,12 +1033,37 @@ accept_err:
        return result;
 }
 
-static void free_entry(struct writequeue_entry *e)
+static void dlm_page_release(struct kref *kref)
 {
+       struct writequeue_entry *e = container_of(kref, struct writequeue_entry,
+                                                 ref);
+
        __free_page(e->page);
        kfree(e);
 }
 
+static void dlm_msg_release(struct kref *kref)
+{
+       struct dlm_msg *msg = container_of(kref, struct dlm_msg, ref);
+
+       kref_put(&msg->entry->ref, dlm_page_release);
+       kfree(msg);
+}
+
+static void free_entry(struct writequeue_entry *e)
+{
+       struct dlm_msg *msg, *tmp;
+
+       list_for_each_entry_safe(msg, tmp, &e->msgs, list) {
+               list_del(&msg->list);
+               kref_put(&msg->ref, dlm_msg_release);
+       }
+
+       list_del(&e->list);
+       atomic_dec(&e->con->writequeue_cnt);
+       kref_put(&e->ref, dlm_page_release);
+}
+
 /*
  * writequeue_entry_complete - try to delete and free write queue entry
  * @e: write queue entry to try to delete
@@ -1040,11 +1076,8 @@ static void writequeue_entry_complete(struct writequeue_entry *e, int completed)
        e->offset += completed;
        e->len -= completed;
 
-       if (e->len == 0 && e->users == 0) {
-               list_del(&e->list);
-               atomic_dec(&e->con->writequeue_cnt);
+       if (e->len == 0 && e->users == 0)
                free_entry(e);
-       }
 }
 
 /*
@@ -1410,12 +1443,16 @@ static struct writequeue_entry *new_writequeue_entry(struct connection *con,
 
        entry->con = con;
        entry->users = 1;
+       kref_init(&entry->ref);
+       INIT_LIST_HEAD(&entry->msgs);
 
        return entry;
 }
 
 static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
-                                            gfp_t allocation, char **ppc)
+                                            gfp_t allocation, char **ppc,
+                                            void (*cb)(struct dlm_mhandle *mh),
+                                            struct dlm_mhandle *mh)
 {
        struct writequeue_entry *e;
 
@@ -1423,7 +1460,12 @@ static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
        if (!list_empty(&con->writequeue)) {
                e = list_last_entry(&con->writequeue, struct writequeue_entry, list);
                if (DLM_WQ_REMAIN_BYTES(e) >= len) {
+                       kref_get(&e->ref);
+
                        *ppc = page_address(e->page) + e->end;
+                       if (cb)
+                               cb(mh);
+
                        e->end += len;
                        e->users++;
                        spin_unlock(&con->writequeue_lock);
@@ -1437,21 +1479,28 @@ static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
        if (!e)
                return NULL;
 
+       kref_get(&e->ref);
        *ppc = page_address(e->page);
        e->end += len;
        atomic_inc(&con->writequeue_cnt);
 
        spin_lock(&con->writequeue_lock);
+       if (cb)
+               cb(mh);
+
        list_add_tail(&e->list, &con->writequeue);
        spin_unlock(&con->writequeue_lock);
 
        return e;
 };
 
-void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
+struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation,
+                                    char **ppc, void (*cb)(struct dlm_mhandle *mh),
+                                    struct dlm_mhandle *mh)
 {
        struct writequeue_entry *e;
        struct connection *con;
+       struct dlm_msg *msg;
        int idx;
 
        if (len > DEFAULT_BUFFER_SIZE ||
@@ -1469,25 +1518,41 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
                return NULL;
        }
 
-       e = new_wq_entry(con, len, allocation, ppc);
+       msg = kzalloc(sizeof(*msg), allocation);
+       if (!msg) {
+               srcu_read_unlock(&connections_srcu, idx);
+               return NULL;
+       }
+
+       kref_init(&msg->ref);
+
+       e = new_wq_entry(con, len, allocation, ppc, cb, mh);
        if (!e) {
                srcu_read_unlock(&connections_srcu, idx);
+               kfree(msg);
                return NULL;
        }
 
+       msg->ppc = *ppc;
+       msg->len = len;
+       msg->entry = e;
+
        /* we assume if successful commit must called */
-       e->idx = idx;
+       msg->idx = idx;
 
-       return e;
+       return msg;
 }
 
-void dlm_lowcomms_commit_buffer(void *mh)
+void dlm_lowcomms_commit_msg(struct dlm_msg *msg)
 {
-       struct writequeue_entry *e = (struct writequeue_entry *)mh;
+       struct writequeue_entry *e = msg->entry;
        struct connection *con = e->con;
        int users;
 
        spin_lock(&con->writequeue_lock);
+       kref_get(&msg->ref);
+       list_add(&msg->list, &e->msgs);
+
        users = --e->users;
        if (users)
                goto out;
@@ -1496,15 +1561,20 @@ void dlm_lowcomms_commit_buffer(void *mh)
        spin_unlock(&con->writequeue_lock);
 
        queue_work(send_workqueue, &con->swork);
-       srcu_read_unlock(&connections_srcu, e->idx);
+       srcu_read_unlock(&connections_srcu, msg->idx);
        return;
 
 out:
        spin_unlock(&con->writequeue_lock);
-       srcu_read_unlock(&connections_srcu, e->idx);
+       srcu_read_unlock(&connections_srcu, msg->idx);
        return;
 }
 
+void dlm_lowcomms_put_msg(struct dlm_msg *msg)
+{
+       kref_put(&msg->ref, dlm_msg_release);
+}
+
 /* Send a message */
 static void send_to_sock(struct connection *con)
 {
@@ -1590,7 +1660,6 @@ static void clean_one_writequeue(struct connection *con)
 
        spin_lock(&con->writequeue_lock);
        list_for_each_entry_safe(e, safe, &con->writequeue, list) {
-               list_del(&e->list);
                free_entry(e);
        }
        spin_unlock(&con->writequeue_lock);
index 48bbc4e..cdb8f06 100644 (file)
@@ -22,8 +22,11 @@ void dlm_lowcomms_shutdown(void);
 void dlm_lowcomms_stop(void);
 void dlm_lowcomms_exit(void);
 int dlm_lowcomms_close(int nodeid);
-void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc);
-void dlm_lowcomms_commit_buffer(void *mh);
+struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation,
+                                    char **ppc, void (*cb)(struct dlm_mhandle *mh),
+                                    struct dlm_mhandle *mh);
+void dlm_lowcomms_commit_msg(struct dlm_msg *msg);
+void dlm_lowcomms_put_msg(struct dlm_msg *msg);
 int dlm_lowcomms_connect_node(int nodeid);
 int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark);
 int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len);
index 1a280dd..aadb378 100644 (file)
 struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
                                             gfp_t allocation, char **ppc)
 {
-       return dlm_lowcomms_get_buffer(nodeid, len, allocation, ppc);
+       return (struct dlm_mhandle *)dlm_lowcomms_new_msg(nodeid, len,
+                                                         allocation, ppc,
+                                                         NULL, NULL);
 }
 
 void dlm_midcomms_commit_mhandle(struct dlm_mhandle *mh)
 {
-       dlm_lowcomms_commit_buffer(mh);
+       dlm_lowcomms_commit_msg((struct dlm_msg *)mh);
+       dlm_lowcomms_put_msg((struct dlm_msg *)mh);
 }
 
 void dlm_midcomms_add_member(int nodeid) { }
index 2661674..6f653a3 100644 (file)
@@ -70,21 +70,22 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
 
 static int create_rcom_stateless(struct dlm_ls *ls, int to_nodeid, int type,
                                 int len, struct dlm_rcom **rc_ret,
-                                void **mh_ret)
+                                struct dlm_msg **msg_ret)
 {
        int mb_len = sizeof(struct dlm_rcom) + len;
-       void *mh;
+       struct dlm_msg *msg;
        char *mb;
 
-       mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_NOFS, &mb);
-       if (!mh) {
+       msg = dlm_lowcomms_new_msg(to_nodeid, mb_len, GFP_NOFS, &mb,
+                                  NULL, NULL);
+       if (!msg) {
                log_print("create_rcom to %d type %d len %d ENOBUFS",
                          to_nodeid, type, len);
                return -ENOBUFS;
        }
 
        _create_rcom(ls, to_nodeid, type, len, rc_ret, mb, mb_len);
-       *mh_ret = mh;
+       *msg_ret = msg;
        return 0;
 }
 
@@ -100,11 +101,12 @@ static void send_rcom(struct dlm_ls *ls, struct dlm_mhandle *mh,
        dlm_midcomms_commit_mhandle(mh);
 }
 
-static void send_rcom_stateless(struct dlm_ls *ls, void *mh,
+static void send_rcom_stateless(struct dlm_ls *ls, struct dlm_msg *msg,
                                struct dlm_rcom *rc)
 {
        _send_rcom(ls, rc);
-       dlm_lowcomms_commit_buffer(mh);
+       dlm_lowcomms_commit_msg(msg);
+       dlm_lowcomms_put_msg(msg);
 }
 
 static void set_rcom_status(struct dlm_ls *ls, struct rcom_status *rs,
@@ -180,8 +182,8 @@ static void disallow_sync_reply(struct dlm_ls *ls)
 int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags)
 {
        struct dlm_rcom *rc;
+       struct dlm_msg *msg;
        int error = 0;
-       void *mh;
 
        ls->ls_recover_nodeid = nodeid;
 
@@ -193,7 +195,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags)
 
 retry:
        error = create_rcom_stateless(ls, nodeid, DLM_RCOM_STATUS,
-                                     sizeof(struct rcom_status), &rc, &mh);
+                                     sizeof(struct rcom_status), &rc, &msg);
        if (error)
                goto out;
 
@@ -202,7 +204,7 @@ retry:
        allow_sync_reply(ls, &rc->rc_id);
        memset(ls->ls_recover_buf, 0, LOWCOMMS_MAX_TX_BUFFER_LEN);
 
-       send_rcom_stateless(ls, mh, rc);
+       send_rcom_stateless(ls, msg, rc);
 
        error = dlm_wait_function(ls, &rcom_response);
        disallow_sync_reply(ls);
@@ -234,9 +236,9 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
        uint32_t status;
        int nodeid = rc_in->rc_header.h_nodeid;
        int len = sizeof(struct rcom_config);
+       struct dlm_msg *msg;
        int num_slots = 0;
        int error;
-       void *mh;
 
        if (!dlm_slots_version(&rc_in->rc_header)) {
                status = dlm_recover_status(ls);
@@ -258,7 +260,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 
  do_create:
        error = create_rcom_stateless(ls, nodeid, DLM_RCOM_STATUS_REPLY,
-                                     len, &rc, &mh);
+                                     len, &rc, &msg);
        if (error)
                return;
 
@@ -285,7 +287,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
        spin_unlock(&ls->ls_recover_lock);
 
  do_send:
-       send_rcom_stateless(ls, mh, rc);
+       send_rcom_stateless(ls, msg, rc);
 }
 
 static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
@@ -310,14 +312,14 @@ static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
 {
        struct dlm_rcom *rc;
+       struct dlm_msg *msg;
        int error = 0;
-       void *mh;
 
        ls->ls_recover_nodeid = nodeid;
 
 retry:
        error = create_rcom_stateless(ls, nodeid, DLM_RCOM_NAMES, last_len,
-                                     &rc, &mh);
+                                     &rc, &msg);
        if (error)
                goto out;
        memcpy(rc->rc_buf, last_name, last_len);
@@ -325,7 +327,7 @@ retry:
        allow_sync_reply(ls, &rc->rc_id);
        memset(ls->ls_recover_buf, 0, LOWCOMMS_MAX_TX_BUFFER_LEN);
 
-       send_rcom_stateless(ls, mh, rc);
+       send_rcom_stateless(ls, msg, rc);
 
        error = dlm_wait_function(ls, &rcom_response);
        disallow_sync_reply(ls);
@@ -339,14 +341,14 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 {
        struct dlm_rcom *rc;
        int error, inlen, outlen, nodeid;
-       void *mh;
+       struct dlm_msg *msg;
 
        nodeid = rc_in->rc_header.h_nodeid;
        inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
        outlen = LOWCOMMS_MAX_TX_BUFFER_LEN - sizeof(struct dlm_rcom);
 
        error = create_rcom_stateless(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen,
-                                     &rc, &mh);
+                                     &rc, &msg);
        if (error)
                return;
        rc->rc_id = rc_in->rc_id;
@@ -354,7 +356,7 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 
        dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen,
                              nodeid);
-       send_rcom_stateless(ls, mh, rc);
+       send_rcom_stateless(ls, msg, rc);
 }
 
 int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid)