net/smc: add listen processing for SMC-Rv2
authorKarsten Graul <kgraul@linux.ibm.com>
Sat, 16 Oct 2021 09:37:46 +0000 (11:37 +0200)
committerDavid S. Miller <davem@davemloft.net>
Sat, 16 Oct 2021 13:58:13 +0000 (14:58 +0100)
Implement the server side of the SMC-Rv2 processing. Process incoming
CLC messages, find eligible devices and check for a valid route to the
remote peer.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/smc/af_smc.c
net/smc/smc_clc.h
net/smc/smc_core.c
net/smc/smc_core.h

index d428026..ce5feb6 100644 (file)
@@ -1515,33 +1515,48 @@ static int smc_listen_v2_check(struct smc_sock *new_smc,
 
        ini->smc_type_v1 = pclc->hdr.typev1;
        ini->smc_type_v2 = pclc->hdr.typev2;
-       ini->smcd_version = ini->smc_type_v1 != SMC_TYPE_N ? SMC_V1 : 0;
-       if (pclc->hdr.version > SMC_V1)
-               ini->smcd_version |=
-                               ini->smc_type_v2 != SMC_TYPE_N ? SMC_V2 : 0;
-       if (!(ini->smcd_version & SMC_V2)) {
+       ini->smcd_version = smcd_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
+       ini->smcr_version = smcr_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
+       if (pclc->hdr.version > SMC_V1) {
+               if (smcd_indicated(ini->smc_type_v2))
+                       ini->smcd_version |= SMC_V2;
+               if (smcr_indicated(ini->smc_type_v2))
+                       ini->smcr_version |= SMC_V2;
+       }
+       if (!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) {
                rc = SMC_CLC_DECL_PEERNOSMC;
                goto out;
        }
-       if (!smc_ism_is_v2_capable()) {
-               ini->smcd_version &= ~SMC_V2;
-               rc = SMC_CLC_DECL_NOISM2SUPP;
-               goto out;
-       }
        pclc_v2_ext = smc_get_clc_v2_ext(pclc);
        if (!pclc_v2_ext) {
                ini->smcd_version &= ~SMC_V2;
+               ini->smcr_version &= ~SMC_V2;
                rc = SMC_CLC_DECL_NOV2EXT;
                goto out;
        }
        pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext);
-       if (!pclc_smcd_v2_ext) {
-               ini->smcd_version &= ~SMC_V2;
-               rc = SMC_CLC_DECL_NOV2DEXT;
+       if (ini->smcd_version & SMC_V2) {
+               if (!smc_ism_is_v2_capable()) {
+                       ini->smcd_version &= ~SMC_V2;
+                       rc = SMC_CLC_DECL_NOISM2SUPP;
+               } else if (!pclc_smcd_v2_ext) {
+                       ini->smcd_version &= ~SMC_V2;
+                       rc = SMC_CLC_DECL_NOV2DEXT;
+               } else if (!pclc_v2_ext->hdr.eid_cnt &&
+                          !pclc_v2_ext->hdr.flag.seid) {
+                       ini->smcd_version &= ~SMC_V2;
+                       rc = SMC_CLC_DECL_NOUEID;
+               }
+       }
+       if (ini->smcr_version & SMC_V2) {
+               if (!pclc_v2_ext->hdr.eid_cnt) {
+                       ini->smcr_version &= ~SMC_V2;
+                       rc = SMC_CLC_DECL_NOUEID;
+               }
        }
 
 out:
-       if (!ini->smcd_version)
+       if (!ini->smcd_version && !ini->smcr_version)
                return rc;
 
        return 0;
@@ -1661,10 +1676,6 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
        pclc_smcd = smc_get_clc_msg_smcd(pclc);
        smc_v2_ext = smc_get_clc_v2_ext(pclc);
        smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext);
-       if (!smcd_v2_ext) {
-               smc_find_ism_store_rc(SMC_CLC_DECL_NOV2DEXT, ini);
-               goto not_found;
-       }
 
        mutex_lock(&smcd_dev_list.mutex);
        if (pclc_smcd->ism.chid)
@@ -1682,8 +1693,10 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
        }
        mutex_unlock(&smcd_dev_list.mutex);
 
-       if (!ini->ism_dev[0])
+       if (!ini->ism_dev[0]) {
+               smc_find_ism_store_rc(SMC_CLC_DECL_NOSMCD2DEV, ini);
                goto not_found;
+       }
 
        smc_ism_get_system_eid(&eid);
        if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext,
@@ -1736,6 +1749,7 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc,
 
 not_found:
        smc_find_ism_store_rc(rc, ini);
+       ini->smcd_version &= ~SMC_V1;
        ini->ism_dev[0] = NULL;
        ini->is_smcd = false;
 }
@@ -1754,24 +1768,69 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
        return 0;
 }
 
+static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
+                                        struct smc_clc_msg_proposal *pclc,
+                                        struct smc_init_info *ini)
+{
+       struct smc_clc_v2_extension *smc_v2_ext;
+       u8 smcr_version;
+       int rc;
+
+       if (!(ini->smcr_version & SMC_V2) || !smcr_indicated(ini->smc_type_v2))
+               goto not_found;
+
+       smc_v2_ext = smc_get_clc_v2_ext(pclc);
+       if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, NULL, NULL))
+               goto not_found;
+
+       /* prepare RDMA check */
+       memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
+       memcpy(ini->peer_gid, smc_v2_ext->roce, SMC_GID_SIZE);
+       memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
+       ini->check_smcrv2 = true;
+       ini->smcrv2.clc_sk = new_smc->clcsock->sk;
+       ini->smcrv2.saddr = new_smc->clcsock->sk->sk_rcv_saddr;
+       ini->smcrv2.daddr = smc_ib_gid_to_ipv4(smc_v2_ext->roce);
+       rc = smc_find_rdma_device(new_smc, ini);
+       if (rc) {
+               smc_find_ism_store_rc(rc, ini);
+               goto not_found;
+       }
+       if (!ini->smcrv2.uses_gateway)
+               memcpy(ini->smcrv2.nexthop_mac, pclc->lcl.mac, ETH_ALEN);
+
+       smcr_version = ini->smcr_version;
+       ini->smcr_version = SMC_V2;
+       rc = smc_listen_rdma_init(new_smc, ini);
+       if (!rc)
+               rc = smc_listen_rdma_reg(new_smc, ini->first_contact_local);
+       if (!rc)
+               return;
+       ini->smcr_version = smcr_version;
+       smc_find_ism_store_rc(rc, ini);
+
+not_found:
+       ini->smcr_version &= ~SMC_V2;
+       ini->check_smcrv2 = false;
+}
+
 static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc,
                                        struct smc_clc_msg_proposal *pclc,
                                        struct smc_init_info *ini)
 {
        int rc;
 
-       if (!smcr_indicated(ini->smc_type_v1))
+       if (!(ini->smcr_version & SMC_V1) || !smcr_indicated(ini->smc_type_v1))
                return SMC_CLC_DECL_NOSMCDEV;
 
        /* prepare RDMA check */
-       ini->ib_lcl = &pclc->lcl;
+       memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
+       memcpy(ini->peer_gid, pclc->lcl.gid, SMC_GID_SIZE);
+       memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
        rc = smc_find_rdma_device(new_smc, ini);
        if (rc) {
                /* no RDMA device found */
-               if (ini->smc_type_v1 == SMC_TYPE_B)
-                       /* neither ISM nor RDMA device found */
-                       rc = SMC_CLC_DECL_NOSMCDEV;
-               return rc;
+               return SMC_CLC_DECL_NOSMCDEV;
        }
        rc = smc_listen_rdma_init(new_smc, ini);
        if (rc)
@@ -1784,51 +1843,60 @@ static int smc_listen_find_device(struct smc_sock *new_smc,
                                  struct smc_clc_msg_proposal *pclc,
                                  struct smc_init_info *ini)
 {
-       int rc;
+       int prfx_rc;
 
        /* check for ISM device matching V2 proposed device */
        smc_find_ism_v2_device_serv(new_smc, pclc, ini);
        if (ini->ism_dev[0])
                return 0;
 
-       if (!(ini->smcd_version & SMC_V1))
-               return ini->rc ?: SMC_CLC_DECL_NOSMCD2DEV;
-
-       /* check for matching IP prefix and subnet length */
-       rc = smc_listen_prfx_check(new_smc, pclc);
-       if (rc)
-               return ini->rc ?: rc;
+       /* check for matching IP prefix and subnet length (V1) */
+       prfx_rc = smc_listen_prfx_check(new_smc, pclc);
+       if (prfx_rc)
+               smc_find_ism_store_rc(prfx_rc, ini);
 
        /* get vlan id from IP device */
        if (smc_vlan_by_tcpsk(new_smc->clcsock, ini))
                return ini->rc ?: SMC_CLC_DECL_GETVLANERR;
 
        /* check for ISM device matching V1 proposed device */
-       smc_find_ism_v1_device_serv(new_smc, pclc, ini);
+       if (!prfx_rc)
+               smc_find_ism_v1_device_serv(new_smc, pclc, ini);
        if (ini->ism_dev[0])
                return 0;
 
-       if (pclc->hdr.typev1 == SMC_TYPE_D)
+       if (!smcr_indicated(pclc->hdr.typev1) &&
+           !smcr_indicated(pclc->hdr.typev2))
                /* skip RDMA and decline */
                return ini->rc ?: SMC_CLC_DECL_NOSMCDDEV;
 
-       /* check if RDMA is available */
-       rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini);
-       smc_find_ism_store_rc(rc, ini);
+       /* check if RDMA V2 is available */
+       smc_find_rdma_v2_device_serv(new_smc, pclc, ini);
+       if (ini->smcrv2.ib_dev_v2)
+               return 0;
 
-       return (!rc) ? 0 : ini->rc;
+       /* check if RDMA V1 is available */
+       if (!prfx_rc) {
+               int rc;
+
+               rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini);
+               smc_find_ism_store_rc(rc, ini);
+               return (!rc) ? 0 : ini->rc;
+       }
+       return SMC_CLC_DECL_NOSMCDEV;
 }
 
 /* listen worker: finish RDMA setup */
 static int smc_listen_rdma_finish(struct smc_sock *new_smc,
                                  struct smc_clc_msg_accept_confirm *cclc,
-                                 bool local_first)
+                                 bool local_first,
+                                 struct smc_init_info *ini)
 {
        struct smc_link *link = new_smc->conn.lnk;
        int reason_code = 0;
 
        if (local_first)
-               smc_link_save_peer_info(link, cclc, NULL);
+               smc_link_save_peer_info(link, cclc, ini);
 
        if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc))
                return SMC_CLC_DECL_ERR_RTOK;
@@ -1849,12 +1917,13 @@ static void smc_listen_work(struct work_struct *work)
 {
        struct smc_sock *new_smc = container_of(work, struct smc_sock,
                                                smc_listen_work);
-       u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1;
        struct socket *newclcsock = new_smc->clcsock;
        struct smc_clc_msg_accept_confirm *cclc;
        struct smc_clc_msg_proposal_area *buf;
        struct smc_clc_msg_proposal *pclc;
        struct smc_init_info *ini = NULL;
+       u8 proposal_version = SMC_V1;
+       u8 accept_version;
        int rc = 0;
 
        if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
@@ -1885,7 +1954,9 @@ static void smc_listen_work(struct work_struct *work)
                              SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
        if (rc)
                goto out_decl;
-       version = pclc->hdr.version == SMC_V1 ? SMC_V1 : version;
+
+       if (pclc->hdr.version > SMC_V1)
+               proposal_version = SMC_V2;
 
        /* IPSec connections opt out of SMC optimizations */
        if (using_ipsec(new_smc)) {
@@ -1915,9 +1986,9 @@ static void smc_listen_work(struct work_struct *work)
                goto out_unlock;
 
        /* send SMC Accept CLC message */
+       accept_version = ini->is_smcd ? ini->smcd_version : ini->smcr_version;
        rc = smc_clc_send_accept(new_smc, ini->first_contact_local,
-                                ini->smcd_version == SMC_V2 ? SMC_V2 : SMC_V1,
-                                ini->negotiated_eid);
+                                accept_version, ini->negotiated_eid);
        if (rc)
                goto out_unlock;
 
@@ -1939,7 +2010,7 @@ static void smc_listen_work(struct work_struct *work)
        /* finish worker */
        if (!ini->is_smcd) {
                rc = smc_listen_rdma_finish(new_smc, cclc,
-                                           ini->first_contact_local);
+                                           ini->first_contact_local, ini);
                if (rc)
                        goto out_unlock;
                mutex_unlock(&smc_server_lgr_pending);
@@ -1953,7 +2024,7 @@ out_unlock:
        mutex_unlock(&smc_server_lgr_pending);
 out_decl:
        smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0,
-                          version);
+                          proposal_version);
 out_free:
        kfree(ini);
        kfree(buf);
index 4f2fe27..22c079e 100644 (file)
@@ -44,6 +44,7 @@
 #define SMC_CLC_DECL_NOV2DEXT  0x03030005  /* peer sent no clc SMC-Dv2 ext.  */
 #define SMC_CLC_DECL_NOSEID    0x03030006  /* peer sent no SEID              */
 #define SMC_CLC_DECL_NOSMCD2DEV        0x03030007  /* no SMC-Dv2 device found        */
+#define SMC_CLC_DECL_NOUEID    0x03030008  /* peer sent no UEID              */
 #define SMC_CLC_DECL_MODEUNSUPP        0x03040000  /* smc modes do not match (R or D)*/
 #define SMC_CLC_DECL_RMBE_EC   0x03050000  /* peer has eyecatcher in RMBE    */
 #define SMC_CLC_DECL_OPTUNSUPP 0x03060000  /* fastopen sockopt not supported */
index 4d46370..a081582 100644 (file)
@@ -684,24 +684,30 @@ static void smcr_copy_dev_info_to_link(struct smc_link *link)
 int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
                   u8 link_idx, struct smc_init_info *ini)
 {
+       struct smc_ib_device *smcibdev;
        u8 rndvec[3];
        int rc;
 
-       get_device(&ini->ib_dev->ibdev->dev);
-       atomic_inc(&ini->ib_dev->lnk_cnt);
+       if (lgr->smc_version == SMC_V2) {
+               lnk->smcibdev = ini->smcrv2.ib_dev_v2;
+               lnk->ibport = ini->smcrv2.ib_port_v2;
+       } else {
+               lnk->smcibdev = ini->ib_dev;
+               lnk->ibport = ini->ib_port;
+       }
+       get_device(&lnk->smcibdev->ibdev->dev);
+       atomic_inc(&lnk->smcibdev->lnk_cnt);
+       lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu;
        lnk->link_id = smcr_next_link_id(lgr);
        lnk->lgr = lgr;
        lnk->link_idx = link_idx;
-       lnk->smcibdev = ini->ib_dev;
-       lnk->ibport = ini->ib_port;
        smc_ibdev_cnt_inc(lnk);
        smcr_copy_dev_info_to_link(lnk);
-       lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
        atomic_set(&lnk->conn_cnt, 0);
        smc_llc_link_set_uid(lnk);
        INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
-       if (!ini->ib_dev->initialized) {
-               rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
+       if (!lnk->smcibdev->initialized) {
+               rc = (int)smc_ib_setup_per_ibdev(lnk->smcibdev);
                if (rc)
                        goto out;
        }
@@ -740,11 +746,12 @@ clear_llc_lnk:
        smc_llc_link_clear(lnk, false);
 out:
        smc_ibdev_cnt_dec(lnk);
-       put_device(&ini->ib_dev->ibdev->dev);
+       put_device(&lnk->smcibdev->ibdev->dev);
+       smcibdev = lnk->smcibdev;
        memset(lnk, 0, sizeof(struct smc_link));
        lnk->state = SMC_LNK_UNUSED;
-       if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
-               wake_up(&ini->ib_dev->lnks_deleted);
+       if (!atomic_dec_return(&smcibdev->lnk_cnt))
+               wake_up(&smcibdev->lnks_deleted);
        return rc;
 }
 
@@ -808,10 +815,25 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
                atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt);
        } else {
                /* SMC-R specific settings */
+               struct smc_ib_device *ibdev;
+               int ibport;
+
                lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
-               memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
+               lgr->smc_version = ini->smcr_version;
+               memcpy(lgr->peer_systemid, ini->peer_systemid,
                       SMC_SYSTEMID_LEN);
-               memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1],
+               if (lgr->smc_version == SMC_V2) {
+                       ibdev = ini->smcrv2.ib_dev_v2;
+                       ibport = ini->smcrv2.ib_port_v2;
+                       lgr->saddr = ini->smcrv2.saddr;
+                       lgr->uses_gateway = ini->smcrv2.uses_gateway;
+                       memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac,
+                              ETH_ALEN);
+               } else {
+                       ibdev = ini->ib_dev;
+                       ibport = ini->ib_port;
+               }
+               memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1],
                       SMC_MAX_PNETID_LEN);
                smc_llc_lgr_init(lgr, smc);
 
@@ -1636,13 +1658,15 @@ out:
        return rc;
 }
 
-static bool smcr_lgr_match(struct smc_link_group *lgr,
-                          struct smc_clc_msg_local *lcl,
+static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version,
+                          u8 peer_systemid[],
+                          u8 peer_gid[],
+                          u8 peer_mac_v1[],
                           enum smc_lgr_role role, u32 clcqpn)
 {
        int i;
 
-       if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) ||
+       if (memcmp(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN) ||
            lgr->role != role)
                return false;
 
@@ -1650,8 +1674,9 @@ static bool smcr_lgr_match(struct smc_link_group *lgr,
                if (!smc_link_active(&lgr->lnk[i]))
                        continue;
                if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
-                   !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
-                   !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac)))
+                   !memcmp(lgr->lnk[i].peer_gid, peer_gid, SMC_GID_SIZE) &&
+                   (smcr_version == SMC_V2 ||
+                    !memcmp(lgr->lnk[i].peer_mac, peer_mac_v1, ETH_ALEN)))
                        return true;
        }
        return false;
@@ -1690,7 +1715,10 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
                if ((ini->is_smcd ?
                     smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected],
                                    ini->ism_peer_gid[ini->ism_selected]) :
-                    smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
+                    smcr_lgr_match(lgr, ini->smcr_version,
+                                   ini->peer_systemid,
+                                   ini->peer_gid, ini->peer_mac, role,
+                                   ini->ib_clcqpn)) &&
                    !lgr->sync_err &&
                    (ini->smcd_version == SMC_V2 ||
                     lgr->vlan_id == ini->vlan_id) &&
index 128474f..dd3198b 100644 (file)
@@ -340,7 +340,6 @@ struct smc_init_info {
        u32                     rc;
        u8                      negotiated_eid[SMC_MAX_EID_LEN];
        /* SMC-R */
-       struct smc_clc_msg_local *ib_lcl;
        u8                      smcr_version;
        u8                      check_smcrv2;
        u8                      peer_gid[SMC_GID_SIZE];