net/smc: periodic testlink support
authorKarsten Graul <kgraul@linux.ibm.com>
Wed, 2 May 2018 14:56:44 +0000 (16:56 +0200)
committerDavid S. Miller <davem@davemloft.net>
Wed, 2 May 2018 17:29:12 +0000 (13:29 -0400)
Add periodic LLC testlink support to ensure the link is still active.
The interval time is initialized using the value of
sysctl_tcp_keepalive_time.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/smc/af_smc.c
net/smc/smc_core.c
net/smc/smc_core.h
net/smc/smc_llc.c
net/smc/smc_llc.h
net/smc/smc_wr.c

index 20aa417..961b8ef 100644 (file)
@@ -294,6 +294,7 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
 
 static int smc_clnt_conf_first_link(struct smc_sock *smc)
 {
+       struct net *net = sock_net(smc->clcsock->sk);
        struct smc_link_group *lgr = smc->conn.lgr;
        struct smc_link *link;
        int rest;
@@ -353,7 +354,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
        if (rc < 0)
                return SMC_CLC_DECL_TCL;
 
-       link->state = SMC_LNK_ACTIVE;
+       smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
 
        return 0;
 }
@@ -715,6 +716,7 @@ void smc_close_non_accepted(struct sock *sk)
 
 static int smc_serv_conf_first_link(struct smc_sock *smc)
 {
+       struct net *net = sock_net(smc->clcsock->sk);
        struct smc_link_group *lgr = smc->conn.lgr;
        struct smc_link *link;
        int rest;
@@ -769,7 +771,7 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
                return rc;
        }
 
-       link->state = SMC_LNK_ACTIVE;
+       smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
 
        return 0;
 }
index f44f680..d924776 100644 (file)
@@ -310,6 +310,7 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
 /* remove a link group */
 void smc_lgr_free(struct smc_link_group *lgr)
 {
+       smc_llc_link_flush(&lgr->lnk[SMC_SINGLE_LINK]);
        smc_lgr_free_bufs(lgr);
        smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
        kfree(lgr);
@@ -332,6 +333,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
        struct rb_node *node;
 
        smc_lgr_forget(lgr);
+       smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
 
        write_lock_bh(&lgr->conns_lock);
        node = rb_first(&lgr->conns_all);
index 07e2a39..97339f0 100644 (file)
@@ -79,6 +79,7 @@ struct smc_link {
        dma_addr_t              wr_rx_dma_addr; /* DMA address of wr_rx_bufs */
        u64                     wr_rx_id;       /* seq # of last recv WR */
        u32                     wr_rx_cnt;      /* number of WR recv buffers */
+       unsigned long           wr_rx_tstamp;   /* jiffies when last buf rx */
 
        struct ib_reg_wr        wr_reg;         /* WR register memory region */
        wait_queue_head_t       wr_reg_wait;    /* wait for wr_reg result */
@@ -101,6 +102,9 @@ struct smc_link {
        int                     llc_confirm_resp_rc; /* rc from conf_resp msg */
        struct completion       llc_add;        /* wait for rx of add link */
        struct completion       llc_add_resp;   /* wait for rx of add link rsp*/
+       struct delayed_work     llc_testlink_wrk; /* testlink worker */
+       struct completion       llc_testlink_resp; /* wait for rx of testlink */
+       int                     llc_testlink_time; /* testlink interval */
 };
 
 /* For now we just allow one parallel link per link group. The SMC protocol
index ea4b219..33b4d85 100644 (file)
@@ -397,7 +397,8 @@ static void smc_llc_rx_test_link(struct smc_link *link,
                                 struct smc_llc_msg_test_link *llc)
 {
        if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
-               /* unused as long as we don't send this type of msg */
+               if (link->state == SMC_LNK_ACTIVE)
+                       complete(&link->llc_testlink_resp);
        } else {
                smc_llc_send_test_link(link, llc->user_data, SMC_LLC_RESP);
        }
@@ -502,6 +503,65 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
        }
 }
 
+/***************************** worker ****************************************/
+
+static void smc_llc_testlink_work(struct work_struct *work)
+{
+       struct smc_link *link = container_of(to_delayed_work(work),
+                                            struct smc_link, llc_testlink_wrk);
+       unsigned long next_interval;
+       struct smc_link_group *lgr;
+       unsigned long expire_time;
+       u8 user_data[16] = { 0 };
+       int rc;
+
+       lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+       if (link->state != SMC_LNK_ACTIVE)
+               return;         /* don't reschedule worker */
+       expire_time = link->wr_rx_tstamp + link->llc_testlink_time;
+       if (time_is_after_jiffies(expire_time)) {
+               next_interval = expire_time - jiffies;
+               goto out;
+       }
+       reinit_completion(&link->llc_testlink_resp);
+       smc_llc_send_test_link(link, user_data, SMC_LLC_REQ);
+       /* receive TEST LINK response over RoCE fabric */
+       rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp,
+                                                      SMC_LLC_WAIT_TIME);
+       if (rc <= 0) {
+               smc_lgr_terminate(lgr);
+               return;
+       }
+       next_interval = link->llc_testlink_time;
+out:
+       schedule_delayed_work(&link->llc_testlink_wrk, next_interval);
+}
+
+void smc_llc_link_active(struct smc_link *link, int testlink_time)
+{
+       init_completion(&link->llc_testlink_resp);
+       INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work);
+       link->state = SMC_LNK_ACTIVE;
+       if (testlink_time) {
+               link->llc_testlink_time = testlink_time * HZ;
+               schedule_delayed_work(&link->llc_testlink_wrk,
+                                     link->llc_testlink_time);
+       }
+}
+
+/* called in tasklet context */
+void smc_llc_link_inactive(struct smc_link *link)
+{
+       link->state = SMC_LNK_INACTIVE;
+       cancel_delayed_work(&link->llc_testlink_wrk);
+}
+
+/* called in worker context */
+void smc_llc_link_flush(struct smc_link *link)
+{
+       cancel_delayed_work_sync(&link->llc_testlink_wrk);
+}
+
 /***************************** init, exit, misc ******************************/
 
 static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
index e4a7d5e..d6e4211 100644 (file)
@@ -44,6 +44,9 @@ int smc_llc_send_delete_link(struct smc_link *link,
                             enum smc_llc_reqresp reqresp);
 int smc_llc_send_test_link(struct smc_link *lnk, u8 user_data[16],
                           enum smc_llc_reqresp reqresp);
+void smc_llc_link_active(struct smc_link *link, int testlink_time);
+void smc_llc_link_inactive(struct smc_link *link);
+void smc_llc_link_flush(struct smc_link *link);
 int smc_llc_init(void) __init;
 
 #endif /* SMC_LLC_H */
index 1b8af23..cc7c1bb 100644 (file)
@@ -376,6 +376,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
        for (i = 0; i < num; i++) {
                link = wc[i].qp->qp_context;
                if (wc[i].status == IB_WC_SUCCESS) {
+                       link->wr_rx_tstamp = jiffies;
                        smc_wr_rx_demultiplex(&wc[i]);
                        smc_wr_rx_post(link); /* refill WR RX */
                } else {