2 * Shared Memory Communications over RDMA (SMC-R) and RoCE
4 * CLC (connection layer control) handshake over initial TCP socket to
5 * prepare for RDMA traffic
7 * Copyright IBM Corp. 2016
9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
21 /* Wait for data on the tcp-socket, analyze received data
23 * 0 if success and it was not a decline that we received.
24 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send.
25 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
27 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
30 struct sock *clc_sk = smc->clcsock->sk;
31 struct smc_clc_msg_hdr *clcm = buf;
32 struct msghdr msg = {NULL, 0};
38 /* peek the first few bytes to determine length of data to receive
39 * so we don't consume any subsequent CLC message or payload data
40 * in the TCP byte stream
44 krflags = MSG_PEEK | MSG_WAITALL;
45 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
46 len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1,
47 sizeof(struct smc_clc_msg_hdr), krflags);
48 if (signal_pending(current)) {
50 clc_sk->sk_err = EINTR;
51 smc->sk.sk_err = EINTR;
55 reason_code = -clc_sk->sk_err;
56 smc->sk.sk_err = clc_sk->sk_err;
59 if (!len) { /* peer has performed orderly shutdown */
60 smc->sk.sk_err = ECONNRESET;
61 reason_code = -ECONNRESET;
65 smc->sk.sk_err = -len;
69 datlen = ntohs(clcm->length);
70 if ((len < sizeof(struct smc_clc_msg_hdr)) ||
71 (datlen < sizeof(struct smc_clc_msg_decline)) ||
72 (datlen > sizeof(struct smc_clc_msg_accept_confirm)) ||
73 memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) ||
74 ((clcm->type != SMC_CLC_DECLINE) &&
75 (clcm->type != expected_type))) {
76 smc->sk.sk_err = EPROTO;
77 reason_code = -EPROTO;
81 /* receive the complete CLC message */
84 memset(&msg, 0, sizeof(struct msghdr));
85 krflags = MSG_WAITALL;
86 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
87 len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags);
89 smc->sk.sk_err = EPROTO;
90 reason_code = -EPROTO;
93 if (clcm->type == SMC_CLC_DECLINE) {
94 reason_code = SMC_CLC_DECL_REPLY;
95 if (ntohl(((struct smc_clc_msg_decline *)buf)->peer_diagnosis)
96 == SMC_CLC_DECL_SYNCERR)
97 smc->conn.lgr->sync_err = true;
104 /* send CLC DECLINE message across internal TCP socket */
105 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
108 struct smc_clc_msg_decline dclc;
113 memset(&dclc, 0, sizeof(dclc));
114 memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
115 dclc.hdr.type = SMC_CLC_DECLINE;
116 dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
117 dclc.hdr.version = SMC_CLC_V1;
118 dclc.hdr.flag = out_of_sync ? 1 : 0;
119 memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
120 dclc.peer_diagnosis = htonl(peer_diag_info);
121 memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
123 memset(&msg, 0, sizeof(msg));
124 vec.iov_base = &dclc;
125 vec.iov_len = sizeof(struct smc_clc_msg_decline);
126 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
127 sizeof(struct smc_clc_msg_decline));
128 if (len < sizeof(struct smc_clc_msg_decline))
129 smc->sk.sk_err = EPROTO;
131 smc->sk.sk_err = -len;
135 /* send CLC PROPOSAL message across internal TCP socket */
136 int smc_clc_send_proposal(struct smc_sock *smc,
137 struct smc_ib_device *smcibdev,
140 struct smc_clc_msg_proposal pclc;
146 /* send SMC Proposal CLC message */
147 memset(&pclc, 0, sizeof(pclc));
148 memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
149 pclc.hdr.type = SMC_CLC_PROPOSAL;
150 pclc.hdr.length = htons(sizeof(pclc));
151 pclc.hdr.version = SMC_CLC_V1; /* SMC version */
152 memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
153 memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE);
154 memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1],
155 sizeof(smcibdev->mac[ibport - 1]));
157 /* determine subnet and mask from internal TCP socket */
158 rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet,
161 return SMC_CLC_DECL_CNFERR; /* configuration error */
162 memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
163 memset(&msg, 0, sizeof(msg));
164 vec.iov_base = &pclc;
165 vec.iov_len = sizeof(pclc);
166 /* due to the few bytes needed for clc-handshake this cannot block */
167 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc));
168 if (len < sizeof(pclc)) {
170 reason_code = -ENETUNREACH;
171 smc->sk.sk_err = -reason_code;
173 smc->sk.sk_err = smc->clcsock->sk->sk_err;
174 reason_code = -smc->sk.sk_err;
181 /* send CLC CONFIRM message across internal TCP socket */
182 int smc_clc_send_confirm(struct smc_sock *smc)
184 struct smc_connection *conn = &smc->conn;
185 struct smc_clc_msg_accept_confirm cclc;
186 struct smc_link *link;
192 link = &conn->lgr->lnk[SMC_SINGLE_LINK];
193 /* send SMC Confirm CLC msg */
194 memset(&cclc, 0, sizeof(cclc));
195 memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
196 cclc.hdr.type = SMC_CLC_CONFIRM;
197 cclc.hdr.length = htons(sizeof(cclc));
198 cclc.hdr.version = SMC_CLC_V1; /* SMC version */
199 memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
200 memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
202 memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1],
203 sizeof(link->smcibdev->mac));
205 /* tbd in follow-on patch: fill in rmb-related values */
207 hton24(cclc.qpn, link->roce_qp->qp_num);
208 cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
209 cclc.rmbe_alert_token = htonl(conn->alert_token_local);
210 cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
211 hton24(cclc.psn, link->psn_initial);
213 memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
215 memset(&msg, 0, sizeof(msg));
216 vec.iov_base = &cclc;
217 vec.iov_len = sizeof(cclc);
218 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc));
219 if (len < sizeof(cclc)) {
221 reason_code = -ENETUNREACH;
222 smc->sk.sk_err = -reason_code;
224 smc->sk.sk_err = smc->clcsock->sk->sk_err;
225 reason_code = -smc->sk.sk_err;
231 /* send CLC ACCEPT message across internal TCP socket */
232 int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
234 struct smc_connection *conn = &new_smc->conn;
235 struct smc_clc_msg_accept_confirm aclc;
236 struct smc_link *link;
242 link = &conn->lgr->lnk[SMC_SINGLE_LINK];
243 memset(&aclc, 0, sizeof(aclc));
244 memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
245 aclc.hdr.type = SMC_CLC_ACCEPT;
246 aclc.hdr.length = htons(sizeof(aclc));
247 aclc.hdr.version = SMC_CLC_V1; /* SMC version */
248 if (srv_first_contact)
250 memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
251 memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
253 memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1],
254 sizeof(link->smcibdev->mac[link->ibport - 1]));
256 /* tbd in follow-on patch: fill in rmb-related values */
258 hton24(aclc.qpn, link->roce_qp->qp_num);
259 aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */
260 aclc.rmbe_alert_token = htonl(conn->alert_token_local);
261 aclc.qp_mtu = link->path_mtu;
262 hton24(aclc.psn, link->psn_initial);
263 memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
265 memset(&msg, 0, sizeof(msg));
266 vec.iov_base = &aclc;
267 vec.iov_len = sizeof(aclc);
268 len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc));
269 if (len < sizeof(aclc)) {
271 new_smc->sk.sk_err = EPROTO;
273 new_smc->sk.sk_err = new_smc->clcsock->sk->sk_err;
274 rc = sock_error(&new_smc->sk);