net/smc: non blocking recvmsg() return -EAGAIN when no data and signal_pending
[platform/kernel/linux-rpi.git] / net / smc / smc_clc.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  CLC (connection layer control) handshake over initial TCP socket to
6  *  prepare for RDMA traffic
7  *
8  *  Copyright IBM Corp. 2016, 2018
9  *
10  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
11  */
12
13 #include <linux/in.h>
14 #include <linux/inetdevice.h>
15 #include <linux/if_ether.h>
16 #include <linux/sched/signal.h>
17 #include <linux/utsname.h>
18 #include <linux/ctype.h>
19
20 #include <net/addrconf.h>
21 #include <net/sock.h>
22 #include <net/tcp.h>
23
24 #include "smc.h"
25 #include "smc_core.h"
26 #include "smc_clc.h"
27 #include "smc_ib.h"
28 #include "smc_ism.h"
29
30 #define SMCR_CLC_ACCEPT_CONFIRM_LEN 68
31 #define SMCD_CLC_ACCEPT_CONFIRM_LEN 48
32 #define SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 78
33 #define SMC_CLC_RECV_BUF_LEN    100
34
35 /* eye catcher "SMCR" EBCDIC for CLC messages */
36 static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
37 /* eye catcher "SMCD" EBCDIC for CLC messages */
38 static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'};
39
40 static u8 smc_hostname[SMC_MAX_HOSTNAME_LEN];
41
42 /* check arriving CLC proposal */
43 static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc)
44 {
45         struct smc_clc_msg_proposal_prefix *pclc_prfx;
46         struct smc_clc_smcd_v2_extension *smcd_v2_ext;
47         struct smc_clc_msg_hdr *hdr = &pclc->hdr;
48         struct smc_clc_v2_extension *v2_ext;
49
50         v2_ext = smc_get_clc_v2_ext(pclc);
51         pclc_prfx = smc_clc_proposal_get_prefix(pclc);
52         if (hdr->version == SMC_V1) {
53                 if (hdr->typev1 == SMC_TYPE_N)
54                         return false;
55                 if (ntohs(hdr->length) !=
56                         sizeof(*pclc) + ntohs(pclc->iparea_offset) +
57                         sizeof(*pclc_prfx) +
58                         pclc_prfx->ipv6_prefixes_cnt *
59                                 sizeof(struct smc_clc_ipv6_prefix) +
60                         sizeof(struct smc_clc_msg_trail))
61                         return false;
62         } else {
63                 if (ntohs(hdr->length) !=
64                         sizeof(*pclc) +
65                         sizeof(struct smc_clc_msg_smcd) +
66                         (hdr->typev1 != SMC_TYPE_N ?
67                                 sizeof(*pclc_prfx) +
68                                 pclc_prfx->ipv6_prefixes_cnt *
69                                 sizeof(struct smc_clc_ipv6_prefix) : 0) +
70                         (hdr->typev2 != SMC_TYPE_N ?
71                                 sizeof(*v2_ext) +
72                                 v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN : 0) +
73                         (smcd_indicated(hdr->typev2) ?
74                                 sizeof(*smcd_v2_ext) + v2_ext->hdr.ism_gid_cnt *
75                                         sizeof(struct smc_clc_smcd_gid_chid) :
76                                 0) +
77                         sizeof(struct smc_clc_msg_trail))
78                         return false;
79         }
80         return true;
81 }
82
83 /* check arriving CLC accept or confirm */
84 static bool
85 smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2)
86 {
87         struct smc_clc_msg_hdr *hdr = &clc_v2->hdr;
88
89         if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D)
90                 return false;
91         if (hdr->version == SMC_V1) {
92                 if ((hdr->typev1 == SMC_TYPE_R &&
93                      ntohs(hdr->length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) ||
94                     (hdr->typev1 == SMC_TYPE_D &&
95                      ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN))
96                         return false;
97         } else {
98                 if (hdr->typev1 == SMC_TYPE_D &&
99                     ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 &&
100                     (ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 +
101                                 sizeof(struct smc_clc_first_contact_ext)))
102                         return false;
103         }
104         return true;
105 }
106
107 static void smc_clc_fill_fce(struct smc_clc_first_contact_ext *fce, int *len)
108 {
109         memset(fce, 0, sizeof(*fce));
110         fce->os_type = SMC_CLC_OS_LINUX;
111         fce->release = SMC_RELEASE;
112         memcpy(fce->hostname, smc_hostname, sizeof(smc_hostname));
113         (*len) += sizeof(*fce);
114 }
115
116 /* check if received message has a correct header length and contains valid
117  * heading and trailing eyecatchers
118  */
119 static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl)
120 {
121         struct smc_clc_msg_accept_confirm_v2 *clc_v2;
122         struct smc_clc_msg_proposal *pclc;
123         struct smc_clc_msg_decline *dclc;
124         struct smc_clc_msg_trail *trl;
125
126         if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) &&
127             memcmp(clcm->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)))
128                 return false;
129         switch (clcm->type) {
130         case SMC_CLC_PROPOSAL:
131                 pclc = (struct smc_clc_msg_proposal *)clcm;
132                 if (!smc_clc_msg_prop_valid(pclc))
133                         return false;
134                 trl = (struct smc_clc_msg_trail *)
135                         ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl));
136                 break;
137         case SMC_CLC_ACCEPT:
138         case SMC_CLC_CONFIRM:
139                 clc_v2 = (struct smc_clc_msg_accept_confirm_v2 *)clcm;
140                 if (!smc_clc_msg_acc_conf_valid(clc_v2))
141                         return false;
142                 trl = (struct smc_clc_msg_trail *)
143                         ((u8 *)clc_v2 + ntohs(clc_v2->hdr.length) -
144                                                         sizeof(*trl));
145                 break;
146         case SMC_CLC_DECLINE:
147                 dclc = (struct smc_clc_msg_decline *)clcm;
148                 if (ntohs(dclc->hdr.length) != sizeof(*dclc))
149                         return false;
150                 trl = &dclc->trl;
151                 break;
152         default:
153                 return false;
154         }
155         if (check_trl &&
156             memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) &&
157             memcmp(trl->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)))
158                 return false;
159         return true;
160 }
161
162 /* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */
163 static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4,
164                                  struct smc_clc_msg_proposal_prefix *prop)
165 {
166         struct in_device *in_dev = __in_dev_get_rcu(dst->dev);
167         const struct in_ifaddr *ifa;
168
169         if (!in_dev)
170                 return -ENODEV;
171
172         in_dev_for_each_ifa_rcu(ifa, in_dev) {
173                 if (!inet_ifa_match(ipv4, ifa))
174                         continue;
175                 prop->prefix_len = inet_mask_len(ifa->ifa_mask);
176                 prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask;
177                 /* prop->ipv6_prefixes_cnt = 0; already done by memset before */
178                 return 0;
179         }
180         return -ENOENT;
181 }
182
183 /* fill CLC proposal msg with ipv6 prefixes from device */
184 static int smc_clc_prfx_set6_rcu(struct dst_entry *dst,
185                                  struct smc_clc_msg_proposal_prefix *prop,
186                                  struct smc_clc_ipv6_prefix *ipv6_prfx)
187 {
188 #if IS_ENABLED(CONFIG_IPV6)
189         struct inet6_dev *in6_dev = __in6_dev_get(dst->dev);
190         struct inet6_ifaddr *ifa;
191         int cnt = 0;
192
193         if (!in6_dev)
194                 return -ENODEV;
195         /* use a maximum of 8 IPv6 prefixes from device */
196         list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
197                 if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
198                         continue;
199                 ipv6_addr_prefix(&ipv6_prfx[cnt].prefix,
200                                  &ifa->addr, ifa->prefix_len);
201                 ipv6_prfx[cnt].prefix_len = ifa->prefix_len;
202                 cnt++;
203                 if (cnt == SMC_CLC_MAX_V6_PREFIX)
204                         break;
205         }
206         prop->ipv6_prefixes_cnt = cnt;
207         if (cnt)
208                 return 0;
209 #endif
210         return -ENOENT;
211 }
212
213 /* retrieve and set prefixes in CLC proposal msg */
214 static int smc_clc_prfx_set(struct socket *clcsock,
215                             struct smc_clc_msg_proposal_prefix *prop,
216                             struct smc_clc_ipv6_prefix *ipv6_prfx)
217 {
218         struct dst_entry *dst = sk_dst_get(clcsock->sk);
219         struct sockaddr_storage addrs;
220         struct sockaddr_in6 *addr6;
221         struct sockaddr_in *addr;
222         int rc = -ENOENT;
223
224         if (!dst) {
225                 rc = -ENOTCONN;
226                 goto out;
227         }
228         if (!dst->dev) {
229                 rc = -ENODEV;
230                 goto out_rel;
231         }
232         /* get address to which the internal TCP socket is bound */
233         if (kernel_getsockname(clcsock, (struct sockaddr *)&addrs) < 0)
234                 goto out_rel;
235         /* analyze IP specific data of net_device belonging to TCP socket */
236         addr6 = (struct sockaddr_in6 *)&addrs;
237         rcu_read_lock();
238         if (addrs.ss_family == PF_INET) {
239                 /* IPv4 */
240                 addr = (struct sockaddr_in *)&addrs;
241                 rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop);
242         } else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) {
243                 /* mapped IPv4 address - peer is IPv4 only */
244                 rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3],
245                                            prop);
246         } else {
247                 /* IPv6 */
248                 rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx);
249         }
250         rcu_read_unlock();
251 out_rel:
252         dst_release(dst);
253 out:
254         return rc;
255 }
256
257 /* match ipv4 addrs of dev against addr in CLC proposal */
258 static int smc_clc_prfx_match4_rcu(struct net_device *dev,
259                                    struct smc_clc_msg_proposal_prefix *prop)
260 {
261         struct in_device *in_dev = __in_dev_get_rcu(dev);
262         const struct in_ifaddr *ifa;
263
264         if (!in_dev)
265                 return -ENODEV;
266         in_dev_for_each_ifa_rcu(ifa, in_dev) {
267                 if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) &&
268                     inet_ifa_match(prop->outgoing_subnet, ifa))
269                         return 0;
270         }
271
272         return -ENOENT;
273 }
274
275 /* match ipv6 addrs of dev against addrs in CLC proposal */
276 static int smc_clc_prfx_match6_rcu(struct net_device *dev,
277                                    struct smc_clc_msg_proposal_prefix *prop)
278 {
279 #if IS_ENABLED(CONFIG_IPV6)
280         struct inet6_dev *in6_dev = __in6_dev_get(dev);
281         struct smc_clc_ipv6_prefix *ipv6_prfx;
282         struct inet6_ifaddr *ifa;
283         int i, max;
284
285         if (!in6_dev)
286                 return -ENODEV;
287         /* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */
288         ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop));
289         max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX);
290         list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
291                 if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
292                         continue;
293                 for (i = 0; i < max; i++) {
294                         if (ifa->prefix_len == ipv6_prfx[i].prefix_len &&
295                             ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix,
296                                               ifa->prefix_len))
297                                 return 0;
298                 }
299         }
300 #endif
301         return -ENOENT;
302 }
303
304 /* check if proposed prefixes match one of our device prefixes */
305 int smc_clc_prfx_match(struct socket *clcsock,
306                        struct smc_clc_msg_proposal_prefix *prop)
307 {
308         struct dst_entry *dst = sk_dst_get(clcsock->sk);
309         int rc;
310
311         if (!dst) {
312                 rc = -ENOTCONN;
313                 goto out;
314         }
315         if (!dst->dev) {
316                 rc = -ENODEV;
317                 goto out_rel;
318         }
319         rcu_read_lock();
320         if (!prop->ipv6_prefixes_cnt)
321                 rc = smc_clc_prfx_match4_rcu(dst->dev, prop);
322         else
323                 rc = smc_clc_prfx_match6_rcu(dst->dev, prop);
324         rcu_read_unlock();
325 out_rel:
326         dst_release(dst);
327 out:
328         return rc;
329 }
330
331 /* Wait for data on the tcp-socket, analyze received data
332  * Returns:
333  * 0 if success and it was not a decline that we received.
334  * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send.
335  * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
336  */
337 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
338                      u8 expected_type, unsigned long timeout)
339 {
340         long rcvtimeo = smc->clcsock->sk->sk_rcvtimeo;
341         struct sock *clc_sk = smc->clcsock->sk;
342         struct smc_clc_msg_hdr *clcm = buf;
343         struct msghdr msg = {NULL, 0};
344         int reason_code = 0;
345         struct kvec vec = {buf, buflen};
346         int len, datlen, recvlen;
347         bool check_trl = true;
348         int krflags;
349
350         /* peek the first few bytes to determine length of data to receive
351          * so we don't consume any subsequent CLC message or payload data
352          * in the TCP byte stream
353          */
354         /*
355          * Caller must make sure that buflen is no less than
356          * sizeof(struct smc_clc_msg_hdr)
357          */
358         krflags = MSG_PEEK | MSG_WAITALL;
359         clc_sk->sk_rcvtimeo = timeout;
360         iov_iter_kvec(&msg.msg_iter, READ, &vec, 1,
361                         sizeof(struct smc_clc_msg_hdr));
362         len = sock_recvmsg(smc->clcsock, &msg, krflags);
363         if (signal_pending(current)) {
364                 reason_code = -EINTR;
365                 clc_sk->sk_err = EINTR;
366                 smc->sk.sk_err = EINTR;
367                 goto out;
368         }
369         if (clc_sk->sk_err) {
370                 reason_code = -clc_sk->sk_err;
371                 if (clc_sk->sk_err == EAGAIN &&
372                     expected_type == SMC_CLC_DECLINE)
373                         clc_sk->sk_err = 0; /* reset for fallback usage */
374                 else
375                         smc->sk.sk_err = clc_sk->sk_err;
376                 goto out;
377         }
378         if (!len) { /* peer has performed orderly shutdown */
379                 smc->sk.sk_err = ECONNRESET;
380                 reason_code = -ECONNRESET;
381                 goto out;
382         }
383         if (len < 0) {
384                 if (len != -EAGAIN || expected_type != SMC_CLC_DECLINE)
385                         smc->sk.sk_err = -len;
386                 reason_code = len;
387                 goto out;
388         }
389         datlen = ntohs(clcm->length);
390         if ((len < sizeof(struct smc_clc_msg_hdr)) ||
391             (clcm->version < SMC_V1) ||
392             ((clcm->type != SMC_CLC_DECLINE) &&
393              (clcm->type != expected_type))) {
394                 smc->sk.sk_err = EPROTO;
395                 reason_code = -EPROTO;
396                 goto out;
397         }
398
399         /* receive the complete CLC message */
400         memset(&msg, 0, sizeof(struct msghdr));
401         if (datlen > buflen) {
402                 check_trl = false;
403                 recvlen = buflen;
404         } else {
405                 recvlen = datlen;
406         }
407         iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, recvlen);
408         krflags = MSG_WAITALL;
409         len = sock_recvmsg(smc->clcsock, &msg, krflags);
410         if (len < recvlen || !smc_clc_msg_hdr_valid(clcm, check_trl)) {
411                 smc->sk.sk_err = EPROTO;
412                 reason_code = -EPROTO;
413                 goto out;
414         }
415         datlen -= len;
416         while (datlen) {
417                 u8 tmp[SMC_CLC_RECV_BUF_LEN];
418
419                 vec.iov_base = &tmp;
420                 vec.iov_len = SMC_CLC_RECV_BUF_LEN;
421                 /* receive remaining proposal message */
422                 recvlen = datlen > SMC_CLC_RECV_BUF_LEN ?
423                                                 SMC_CLC_RECV_BUF_LEN : datlen;
424                 iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, recvlen);
425                 len = sock_recvmsg(smc->clcsock, &msg, krflags);
426                 datlen -= len;
427         }
428         if (clcm->type == SMC_CLC_DECLINE) {
429                 struct smc_clc_msg_decline *dclc;
430
431                 dclc = (struct smc_clc_msg_decline *)clcm;
432                 reason_code = SMC_CLC_DECL_PEERDECL;
433                 smc->peer_diagnosis = ntohl(dclc->peer_diagnosis);
434                 if (((struct smc_clc_msg_decline *)buf)->hdr.typev2 &
435                                                 SMC_FIRST_CONTACT_MASK) {
436                         smc->conn.lgr->sync_err = 1;
437                         smc_lgr_terminate_sched(smc->conn.lgr);
438                 }
439         }
440
441 out:
442         clc_sk->sk_rcvtimeo = rcvtimeo;
443         return reason_code;
444 }
445
446 /* send CLC DECLINE message across internal TCP socket */
447 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version)
448 {
449         struct smc_clc_msg_decline dclc;
450         struct msghdr msg;
451         struct kvec vec;
452         int len;
453
454         memset(&dclc, 0, sizeof(dclc));
455         memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
456         dclc.hdr.type = SMC_CLC_DECLINE;
457         dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
458         dclc.hdr.version = version;
459         dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX;
460         dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ?
461                                                 SMC_FIRST_CONTACT_MASK : 0;
462         if ((!smc->conn.lgr || !smc->conn.lgr->is_smcd) &&
463             smc_ib_is_valid_local_systemid())
464                 memcpy(dclc.id_for_peer, local_systemid,
465                        sizeof(local_systemid));
466         dclc.peer_diagnosis = htonl(peer_diag_info);
467         memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
468
469         memset(&msg, 0, sizeof(msg));
470         vec.iov_base = &dclc;
471         vec.iov_len = sizeof(struct smc_clc_msg_decline);
472         len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
473                              sizeof(struct smc_clc_msg_decline));
474         if (len < 0 || len < sizeof(struct smc_clc_msg_decline))
475                 len = -EPROTO;
476         return len > 0 ? 0 : len;
477 }
478
479 /* send CLC PROPOSAL message across internal TCP socket */
480 int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
481 {
482         struct smc_clc_smcd_v2_extension *smcd_v2_ext;
483         struct smc_clc_msg_proposal_prefix *pclc_prfx;
484         struct smc_clc_msg_proposal *pclc_base;
485         struct smc_clc_smcd_gid_chid *gidchids;
486         struct smc_clc_msg_proposal_area *pclc;
487         struct smc_clc_ipv6_prefix *ipv6_prfx;
488         struct smc_clc_v2_extension *v2_ext;
489         struct smc_clc_msg_smcd *pclc_smcd;
490         struct smc_clc_msg_trail *trl;
491         int len, i, plen, rc;
492         int reason_code = 0;
493         struct kvec vec[8];
494         struct msghdr msg;
495
496         pclc = kzalloc(sizeof(*pclc), GFP_KERNEL);
497         if (!pclc)
498                 return -ENOMEM;
499
500         pclc_base = &pclc->pclc_base;
501         pclc_smcd = &pclc->pclc_smcd;
502         pclc_prfx = &pclc->pclc_prfx;
503         ipv6_prfx = pclc->pclc_prfx_ipv6;
504         v2_ext = &pclc->pclc_v2_ext;
505         smcd_v2_ext = &pclc->pclc_smcd_v2_ext;
506         gidchids = pclc->pclc_gidchids;
507         trl = &pclc->pclc_trl;
508
509         pclc_base->hdr.version = SMC_V2;
510         pclc_base->hdr.typev1 = ini->smc_type_v1;
511         pclc_base->hdr.typev2 = ini->smc_type_v2;
512         plen = sizeof(*pclc_base) + sizeof(*pclc_smcd) + sizeof(*trl);
513
514         /* retrieve ip prefixes for CLC proposal msg */
515         if (ini->smc_type_v1 != SMC_TYPE_N) {
516                 rc = smc_clc_prfx_set(smc->clcsock, pclc_prfx, ipv6_prfx);
517                 if (rc) {
518                         if (ini->smc_type_v2 == SMC_TYPE_N) {
519                                 kfree(pclc);
520                                 return SMC_CLC_DECL_CNFERR;
521                         }
522                         pclc_base->hdr.typev1 = SMC_TYPE_N;
523                 } else {
524                         pclc_base->iparea_offset = htons(sizeof(*pclc_smcd));
525                         plen += sizeof(*pclc_prfx) +
526                                         pclc_prfx->ipv6_prefixes_cnt *
527                                         sizeof(ipv6_prfx[0]);
528                 }
529         }
530
531         /* build SMC Proposal CLC message */
532         memcpy(pclc_base->hdr.eyecatcher, SMC_EYECATCHER,
533                sizeof(SMC_EYECATCHER));
534         pclc_base->hdr.type = SMC_CLC_PROPOSAL;
535         if (smcr_indicated(ini->smc_type_v1)) {
536                 /* add SMC-R specifics */
537                 memcpy(pclc_base->lcl.id_for_peer, local_systemid,
538                        sizeof(local_systemid));
539                 memcpy(pclc_base->lcl.gid, ini->ib_gid, SMC_GID_SIZE);
540                 memcpy(pclc_base->lcl.mac, &ini->ib_dev->mac[ini->ib_port - 1],
541                        ETH_ALEN);
542         }
543         if (smcd_indicated(ini->smc_type_v1)) {
544                 /* add SMC-D specifics */
545                 if (ini->ism_dev[0]) {
546                         pclc_smcd->ism.gid = htonll(ini->ism_dev[0]->local_gid);
547                         pclc_smcd->ism.chid =
548                                 htons(smc_ism_get_chid(ini->ism_dev[0]));
549                 }
550         }
551         if (ini->smc_type_v2 == SMC_TYPE_N) {
552                 pclc_smcd->v2_ext_offset = 0;
553         } else {
554                 u16 v2_ext_offset;
555                 u8 *eid = NULL;
556
557                 v2_ext_offset = sizeof(*pclc_smcd) -
558                         offsetofend(struct smc_clc_msg_smcd, v2_ext_offset);
559                 if (ini->smc_type_v1 != SMC_TYPE_N)
560                         v2_ext_offset += sizeof(*pclc_prfx) +
561                                                 pclc_prfx->ipv6_prefixes_cnt *
562                                                 sizeof(ipv6_prfx[0]);
563                 pclc_smcd->v2_ext_offset = htons(v2_ext_offset);
564                 v2_ext->hdr.eid_cnt = 0;
565                 v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt;
566                 v2_ext->hdr.flag.release = SMC_RELEASE;
567                 v2_ext->hdr.flag.seid = 1;
568                 v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) -
569                                 offsetofend(struct smc_clnt_opts_area_hdr,
570                                             smcd_v2_ext_offset) +
571                                 v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN);
572                 if (ini->ism_dev[0])
573                         smc_ism_get_system_eid(ini->ism_dev[0], &eid);
574                 else
575                         smc_ism_get_system_eid(ini->ism_dev[1], &eid);
576                 if (eid)
577                         memcpy(smcd_v2_ext->system_eid, eid, SMC_MAX_EID_LEN);
578                 plen += sizeof(*v2_ext) + sizeof(*smcd_v2_ext);
579                 if (ini->ism_offered_cnt) {
580                         for (i = 1; i <= ini->ism_offered_cnt; i++) {
581                                 gidchids[i - 1].gid =
582                                         htonll(ini->ism_dev[i]->local_gid);
583                                 gidchids[i - 1].chid =
584                                         htons(smc_ism_get_chid(ini->ism_dev[i]));
585                         }
586                         plen += ini->ism_offered_cnt *
587                                 sizeof(struct smc_clc_smcd_gid_chid);
588                 }
589         }
590         pclc_base->hdr.length = htons(plen);
591         memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
592
593         /* send SMC Proposal CLC message */
594         memset(&msg, 0, sizeof(msg));
595         i = 0;
596         vec[i].iov_base = pclc_base;
597         vec[i++].iov_len = sizeof(*pclc_base);
598         vec[i].iov_base = pclc_smcd;
599         vec[i++].iov_len = sizeof(*pclc_smcd);
600         if (ini->smc_type_v1 != SMC_TYPE_N) {
601                 vec[i].iov_base = pclc_prfx;
602                 vec[i++].iov_len = sizeof(*pclc_prfx);
603                 if (pclc_prfx->ipv6_prefixes_cnt > 0) {
604                         vec[i].iov_base = ipv6_prfx;
605                         vec[i++].iov_len = pclc_prfx->ipv6_prefixes_cnt *
606                                            sizeof(ipv6_prfx[0]);
607                 }
608         }
609         if (ini->smc_type_v2 != SMC_TYPE_N) {
610                 vec[i].iov_base = v2_ext;
611                 vec[i++].iov_len = sizeof(*v2_ext);
612                 vec[i].iov_base = smcd_v2_ext;
613                 vec[i++].iov_len = sizeof(*smcd_v2_ext);
614                 if (ini->ism_offered_cnt) {
615                         vec[i].iov_base = gidchids;
616                         vec[i++].iov_len = ini->ism_offered_cnt *
617                                         sizeof(struct smc_clc_smcd_gid_chid);
618                 }
619         }
620         vec[i].iov_base = trl;
621         vec[i++].iov_len = sizeof(*trl);
622         /* due to the few bytes needed for clc-handshake this cannot block */
623         len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen);
624         if (len < 0) {
625                 smc->sk.sk_err = smc->clcsock->sk->sk_err;
626                 reason_code = -smc->sk.sk_err;
627         } else if (len < ntohs(pclc_base->hdr.length)) {
628                 reason_code = -ENETUNREACH;
629                 smc->sk.sk_err = -reason_code;
630         }
631
632         kfree(pclc);
633         return reason_code;
634 }
635
636 /* build and send CLC CONFIRM / ACCEPT message */
637 static int smc_clc_send_confirm_accept(struct smc_sock *smc,
638                                        struct smc_clc_msg_accept_confirm_v2 *clc_v2,
639                                        int first_contact, u8 version)
640 {
641         struct smc_connection *conn = &smc->conn;
642         struct smc_clc_msg_accept_confirm *clc;
643         struct smc_clc_first_contact_ext fce;
644         struct smc_clc_msg_trail trl;
645         struct kvec vec[3];
646         struct msghdr msg;
647         int i, len;
648
649         /* send SMC Confirm CLC msg */
650         clc = (struct smc_clc_msg_accept_confirm *)clc_v2;
651         clc->hdr.version = version;     /* SMC version */
652         if (first_contact)
653                 clc->hdr.typev2 |= SMC_FIRST_CONTACT_MASK;
654         if (conn->lgr->is_smcd) {
655                 /* SMC-D specific settings */
656                 memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER,
657                        sizeof(SMCD_EYECATCHER));
658                 clc->hdr.typev1 = SMC_TYPE_D;
659                 clc->d0.gid = conn->lgr->smcd->local_gid;
660                 clc->d0.token = conn->rmb_desc->token;
661                 clc->d0.dmbe_size = conn->rmbe_size_short;
662                 clc->d0.dmbe_idx = 0;
663                 memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
664                 if (version == SMC_V1) {
665                         clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
666                 } else {
667                         u8 *eid = NULL;
668
669                         clc_v2->chid = htons(smc_ism_get_chid(conn->lgr->smcd));
670                         smc_ism_get_system_eid(conn->lgr->smcd, &eid);
671                         if (eid)
672                                 memcpy(clc_v2->eid, eid, SMC_MAX_EID_LEN);
673                         len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2;
674                         if (first_contact)
675                                 smc_clc_fill_fce(&fce, &len);
676                         clc_v2->hdr.length = htons(len);
677                 }
678                 memcpy(trl.eyecatcher, SMCD_EYECATCHER,
679                        sizeof(SMCD_EYECATCHER));
680         } else {
681                 struct smc_link *link = conn->lnk;
682
683                 /* SMC-R specific settings */
684                 link = conn->lnk;
685                 memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER,
686                        sizeof(SMC_EYECATCHER));
687                 clc->hdr.typev1 = SMC_TYPE_R;
688                 clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
689                 memcpy(clc->r0.lcl.id_for_peer, local_systemid,
690                        sizeof(local_systemid));
691                 memcpy(&clc->r0.lcl.gid, link->gid, SMC_GID_SIZE);
692                 memcpy(&clc->r0.lcl.mac, &link->smcibdev->mac[link->ibport - 1],
693                        ETH_ALEN);
694                 hton24(clc->r0.qpn, link->roce_qp->qp_num);
695                 clc->r0.rmb_rkey =
696                         htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey);
697                 clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
698                 clc->r0.rmbe_alert_token = htonl(conn->alert_token_local);
699                 switch (clc->hdr.type) {
700                 case SMC_CLC_ACCEPT:
701                         clc->r0.qp_mtu = link->path_mtu;
702                         break;
703                 case SMC_CLC_CONFIRM:
704                         clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu);
705                         break;
706                 }
707                 clc->r0.rmbe_size = conn->rmbe_size_short;
708                 clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
709                                 (conn->rmb_desc->sgt[link->link_idx].sgl));
710                 hton24(clc->r0.psn, link->psn_initial);
711                 memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
712         }
713
714         memset(&msg, 0, sizeof(msg));
715         i = 0;
716         vec[i].iov_base = clc_v2;
717         if (version > SMC_V1)
718                 vec[i++].iov_len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 - sizeof(trl);
719         else
720                 vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ?
721                                                 SMCD_CLC_ACCEPT_CONFIRM_LEN :
722                                                 SMCR_CLC_ACCEPT_CONFIRM_LEN) -
723                                    sizeof(trl);
724         if (version > SMC_V1 && first_contact) {
725                 vec[i].iov_base = &fce;
726                 vec[i++].iov_len = sizeof(fce);
727         }
728         vec[i].iov_base = &trl;
729         vec[i++].iov_len = sizeof(trl);
730         return kernel_sendmsg(smc->clcsock, &msg, vec, 1,
731                               ntohs(clc->hdr.length));
732 }
733
734 /* send CLC CONFIRM message across internal TCP socket */
735 int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
736                          u8 version)
737 {
738         struct smc_clc_msg_accept_confirm_v2 cclc_v2;
739         int reason_code = 0;
740         int len;
741
742         /* send SMC Confirm CLC msg */
743         memset(&cclc_v2, 0, sizeof(cclc_v2));
744         cclc_v2.hdr.type = SMC_CLC_CONFIRM;
745         len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact,
746                                           version);
747         if (len < ntohs(cclc_v2.hdr.length)) {
748                 if (len >= 0) {
749                         reason_code = -ENETUNREACH;
750                         smc->sk.sk_err = -reason_code;
751                 } else {
752                         smc->sk.sk_err = smc->clcsock->sk->sk_err;
753                         reason_code = -smc->sk.sk_err;
754                 }
755         }
756         return reason_code;
757 }
758
759 /* send CLC ACCEPT message across internal TCP socket */
760 int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
761                         u8 version)
762 {
763         struct smc_clc_msg_accept_confirm_v2 aclc_v2;
764         int len;
765
766         memset(&aclc_v2, 0, sizeof(aclc_v2));
767         aclc_v2.hdr.type = SMC_CLC_ACCEPT;
768         len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact,
769                                           version);
770         if (len < ntohs(aclc_v2.hdr.length))
771                 len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err;
772
773         return len > 0 ? 0 : len;
774 }
775
776 void smc_clc_get_hostname(u8 **host)
777 {
778         *host = &smc_hostname[0];
779 }
780
781 void __init smc_clc_init(void)
782 {
783         struct new_utsname *u;
784
785         memset(smc_hostname, _S, sizeof(smc_hostname)); /* ASCII blanks */
786         u = utsname();
787         memcpy(smc_hostname, u->nodename,
788                min_t(size_t, strlen(u->nodename), sizeof(smc_hostname)));
789 }