netfilter: nft_set_pipapo: .walk does not deal with generations
[platform/kernel/linux-starfive.git] / net / smc / smc_core.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Basic Transport Functions exploiting Infiniband API
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
10  */
11
12 #include <linux/socket.h>
13 #include <linux/if_vlan.h>
14 #include <linux/random.h>
15 #include <linux/workqueue.h>
16 #include <linux/wait.h>
17 #include <linux/reboot.h>
18 #include <linux/mutex.h>
19 #include <linux/list.h>
20 #include <linux/smc.h>
21 #include <net/tcp.h>
22 #include <net/sock.h>
23 #include <rdma/ib_verbs.h>
24 #include <rdma/ib_cache.h>
25
26 #include "smc.h"
27 #include "smc_clc.h"
28 #include "smc_core.h"
29 #include "smc_ib.h"
30 #include "smc_wr.h"
31 #include "smc_llc.h"
32 #include "smc_cdc.h"
33 #include "smc_close.h"
34 #include "smc_ism.h"
35 #include "smc_netlink.h"
36 #include "smc_stats.h"
37 #include "smc_tracepoint.h"
38
39 #define SMC_LGR_NUM_INCR                256
40 #define SMC_LGR_FREE_DELAY_SERV         (600 * HZ)
41 #define SMC_LGR_FREE_DELAY_CLNT         (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
42
43 struct smc_lgr_list smc_lgr_list = {    /* established link groups */
44         .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
45         .list = LIST_HEAD_INIT(smc_lgr_list.list),
46         .num = 0,
47 };
48
49 static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
50 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
51
52 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
53                          struct smc_buf_desc *buf_desc);
54 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
55
56 static void smc_link_down_work(struct work_struct *work);
57
58 /* return head of link group list and its lock for a given link group */
59 static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
60                                                   spinlock_t **lgr_lock)
61 {
62         if (lgr->is_smcd) {
63                 *lgr_lock = &lgr->smcd->lgr_lock;
64                 return &lgr->smcd->lgr_list;
65         }
66
67         *lgr_lock = &smc_lgr_list.lock;
68         return &smc_lgr_list.list;
69 }
70
71 static void smc_ibdev_cnt_inc(struct smc_link *lnk)
72 {
73         atomic_inc(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]);
74 }
75
76 static void smc_ibdev_cnt_dec(struct smc_link *lnk)
77 {
78         atomic_dec(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]);
79 }
80
81 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
82 {
83         /* client link group creation always follows the server link group
84          * creation. For client use a somewhat higher removal delay time,
85          * otherwise there is a risk of out-of-sync link groups.
86          */
87         if (!lgr->freeing) {
88                 mod_delayed_work(system_wq, &lgr->free_work,
89                                  (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
90                                                 SMC_LGR_FREE_DELAY_CLNT :
91                                                 SMC_LGR_FREE_DELAY_SERV);
92         }
93 }
94
95 /* Register connection's alert token in our lookup structure.
96  * To use rbtrees we have to implement our own insert core.
97  * Requires @conns_lock
98  * @smc         connection to register
99  * Returns 0 on success, != otherwise.
100  */
101 static void smc_lgr_add_alert_token(struct smc_connection *conn)
102 {
103         struct rb_node **link, *parent = NULL;
104         u32 token = conn->alert_token_local;
105
106         link = &conn->lgr->conns_all.rb_node;
107         while (*link) {
108                 struct smc_connection *cur = rb_entry(*link,
109                                         struct smc_connection, alert_node);
110
111                 parent = *link;
112                 if (cur->alert_token_local > token)
113                         link = &parent->rb_left;
114                 else
115                         link = &parent->rb_right;
116         }
117         /* Put the new node there */
118         rb_link_node(&conn->alert_node, parent, link);
119         rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
120 }
121
122 /* assign an SMC-R link to the connection */
123 static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
124 {
125         enum smc_link_state expected = first ? SMC_LNK_ACTIVATING :
126                                        SMC_LNK_ACTIVE;
127         int i, j;
128
129         /* do link balancing */
130         conn->lnk = NULL;       /* reset conn->lnk first */
131         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
132                 struct smc_link *lnk = &conn->lgr->lnk[i];
133
134                 if (lnk->state != expected || lnk->link_is_asym)
135                         continue;
136                 if (conn->lgr->role == SMC_CLNT) {
137                         conn->lnk = lnk; /* temporary, SMC server assigns link*/
138                         break;
139                 }
140                 if (conn->lgr->conns_num % 2) {
141                         for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
142                                 struct smc_link *lnk2;
143
144                                 lnk2 = &conn->lgr->lnk[j];
145                                 if (lnk2->state == expected &&
146                                     !lnk2->link_is_asym) {
147                                         conn->lnk = lnk2;
148                                         break;
149                                 }
150                         }
151                 }
152                 if (!conn->lnk)
153                         conn->lnk = lnk;
154                 break;
155         }
156         if (!conn->lnk)
157                 return SMC_CLC_DECL_NOACTLINK;
158         atomic_inc(&conn->lnk->conn_cnt);
159         return 0;
160 }
161
162 /* Register connection in link group by assigning an alert token
163  * registered in a search tree.
164  * Requires @conns_lock
165  * Note that '0' is a reserved value and not assigned.
166  */
167 static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
168 {
169         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
170         static atomic_t nexttoken = ATOMIC_INIT(0);
171         int rc;
172
173         if (!conn->lgr->is_smcd) {
174                 rc = smcr_lgr_conn_assign_link(conn, first);
175                 if (rc) {
176                         conn->lgr = NULL;
177                         return rc;
178                 }
179         }
180         /* find a new alert_token_local value not yet used by some connection
181          * in this link group
182          */
183         sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
184         while (!conn->alert_token_local) {
185                 conn->alert_token_local = atomic_inc_return(&nexttoken);
186                 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
187                         conn->alert_token_local = 0;
188         }
189         smc_lgr_add_alert_token(conn);
190         conn->lgr->conns_num++;
191         return 0;
192 }
193
194 /* Unregister connection and reset the alert token of the given connection<
195  */
196 static void __smc_lgr_unregister_conn(struct smc_connection *conn)
197 {
198         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
199         struct smc_link_group *lgr = conn->lgr;
200
201         rb_erase(&conn->alert_node, &lgr->conns_all);
202         if (conn->lnk)
203                 atomic_dec(&conn->lnk->conn_cnt);
204         lgr->conns_num--;
205         conn->alert_token_local = 0;
206         sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
207 }
208
209 /* Unregister connection from lgr
210  */
211 static void smc_lgr_unregister_conn(struct smc_connection *conn)
212 {
213         struct smc_link_group *lgr = conn->lgr;
214
215         if (!smc_conn_lgr_valid(conn))
216                 return;
217         write_lock_bh(&lgr->conns_lock);
218         if (conn->alert_token_local) {
219                 __smc_lgr_unregister_conn(conn);
220         }
221         write_unlock_bh(&lgr->conns_lock);
222 }
223
224 int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
225 {
226         struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
227         char hostname[SMC_MAX_HOSTNAME_LEN + 1];
228         char smc_seid[SMC_MAX_EID_LEN + 1];
229         struct nlattr *attrs;
230         u8 *seid = NULL;
231         u8 *host = NULL;
232         void *nlh;
233
234         nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
235                           &smc_gen_nl_family, NLM_F_MULTI,
236                           SMC_NETLINK_GET_SYS_INFO);
237         if (!nlh)
238                 goto errmsg;
239         if (cb_ctx->pos[0])
240                 goto errout;
241         attrs = nla_nest_start(skb, SMC_GEN_SYS_INFO);
242         if (!attrs)
243                 goto errout;
244         if (nla_put_u8(skb, SMC_NLA_SYS_VER, SMC_V2))
245                 goto errattr;
246         if (nla_put_u8(skb, SMC_NLA_SYS_REL, SMC_RELEASE))
247                 goto errattr;
248         if (nla_put_u8(skb, SMC_NLA_SYS_IS_ISM_V2, smc_ism_is_v2_capable()))
249                 goto errattr;
250         if (nla_put_u8(skb, SMC_NLA_SYS_IS_SMCR_V2, true))
251                 goto errattr;
252         smc_clc_get_hostname(&host);
253         if (host) {
254                 memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN);
255                 hostname[SMC_MAX_HOSTNAME_LEN] = 0;
256                 if (nla_put_string(skb, SMC_NLA_SYS_LOCAL_HOST, hostname))
257                         goto errattr;
258         }
259         if (smc_ism_is_v2_capable()) {
260                 smc_ism_get_system_eid(&seid);
261                 memcpy(smc_seid, seid, SMC_MAX_EID_LEN);
262                 smc_seid[SMC_MAX_EID_LEN] = 0;
263                 if (nla_put_string(skb, SMC_NLA_SYS_SEID, smc_seid))
264                         goto errattr;
265         }
266         nla_nest_end(skb, attrs);
267         genlmsg_end(skb, nlh);
268         cb_ctx->pos[0] = 1;
269         return skb->len;
270
271 errattr:
272         nla_nest_cancel(skb, attrs);
273 errout:
274         genlmsg_cancel(skb, nlh);
275 errmsg:
276         return skb->len;
277 }
278
279 /* Fill SMC_NLA_LGR_D_V2_COMMON/SMC_NLA_LGR_R_V2_COMMON nested attributes */
280 static int smc_nl_fill_lgr_v2_common(struct smc_link_group *lgr,
281                                      struct sk_buff *skb,
282                                      struct netlink_callback *cb,
283                                      struct nlattr *v2_attrs)
284 {
285         char smc_host[SMC_MAX_HOSTNAME_LEN + 1];
286         char smc_eid[SMC_MAX_EID_LEN + 1];
287
288         if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version))
289                 goto errv2attr;
290         if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release))
291                 goto errv2attr;
292         if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os))
293                 goto errv2attr;
294         memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN);
295         smc_host[SMC_MAX_HOSTNAME_LEN] = 0;
296         if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host))
297                 goto errv2attr;
298         memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN);
299         smc_eid[SMC_MAX_EID_LEN] = 0;
300         if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid))
301                 goto errv2attr;
302
303         nla_nest_end(skb, v2_attrs);
304         return 0;
305
306 errv2attr:
307         nla_nest_cancel(skb, v2_attrs);
308         return -EMSGSIZE;
309 }
310
311 static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr,
312                                    struct sk_buff *skb,
313                                    struct netlink_callback *cb)
314 {
315         struct nlattr *v2_attrs;
316
317         v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2);
318         if (!v2_attrs)
319                 goto errattr;
320         if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_DIRECT, !lgr->uses_gateway))
321                 goto errv2attr;
322
323         nla_nest_end(skb, v2_attrs);
324         return 0;
325
326 errv2attr:
327         nla_nest_cancel(skb, v2_attrs);
328 errattr:
329         return -EMSGSIZE;
330 }
331
332 static int smc_nl_fill_lgr(struct smc_link_group *lgr,
333                            struct sk_buff *skb,
334                            struct netlink_callback *cb)
335 {
336         char smc_target[SMC_MAX_PNETID_LEN + 1];
337         struct nlattr *attrs, *v2_attrs;
338
339         attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCR);
340         if (!attrs)
341                 goto errout;
342
343         if (nla_put_u32(skb, SMC_NLA_LGR_R_ID, *((u32 *)&lgr->id)))
344                 goto errattr;
345         if (nla_put_u32(skb, SMC_NLA_LGR_R_CONNS_NUM, lgr->conns_num))
346                 goto errattr;
347         if (nla_put_u8(skb, SMC_NLA_LGR_R_ROLE, lgr->role))
348                 goto errattr;
349         if (nla_put_u8(skb, SMC_NLA_LGR_R_TYPE, lgr->type))
350                 goto errattr;
351         if (nla_put_u8(skb, SMC_NLA_LGR_R_BUF_TYPE, lgr->buf_type))
352                 goto errattr;
353         if (nla_put_u8(skb, SMC_NLA_LGR_R_VLAN_ID, lgr->vlan_id))
354                 goto errattr;
355         if (nla_put_u64_64bit(skb, SMC_NLA_LGR_R_NET_COOKIE,
356                               lgr->net->net_cookie, SMC_NLA_LGR_R_PAD))
357                 goto errattr;
358         memcpy(smc_target, lgr->pnet_id, SMC_MAX_PNETID_LEN);
359         smc_target[SMC_MAX_PNETID_LEN] = 0;
360         if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target))
361                 goto errattr;
362         if (lgr->smc_version > SMC_V1) {
363                 v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2_COMMON);
364                 if (!v2_attrs)
365                         goto errattr;
366                 if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
367                         goto errattr;
368                 if (smc_nl_fill_smcr_lgr_v2(lgr, skb, cb))
369                         goto errattr;
370         }
371
372         nla_nest_end(skb, attrs);
373         return 0;
374 errattr:
375         nla_nest_cancel(skb, attrs);
376 errout:
377         return -EMSGSIZE;
378 }
379
380 static int smc_nl_fill_lgr_link(struct smc_link_group *lgr,
381                                 struct smc_link *link,
382                                 struct sk_buff *skb,
383                                 struct netlink_callback *cb)
384 {
385         char smc_ibname[IB_DEVICE_NAME_MAX];
386         u8 smc_gid_target[41];
387         struct nlattr *attrs;
388         u32 link_uid = 0;
389         void *nlh;
390
391         nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
392                           &smc_gen_nl_family, NLM_F_MULTI,
393                           SMC_NETLINK_GET_LINK_SMCR);
394         if (!nlh)
395                 goto errmsg;
396
397         attrs = nla_nest_start(skb, SMC_GEN_LINK_SMCR);
398         if (!attrs)
399                 goto errout;
400
401         if (nla_put_u8(skb, SMC_NLA_LINK_ID, link->link_id))
402                 goto errattr;
403         if (nla_put_u32(skb, SMC_NLA_LINK_STATE, link->state))
404                 goto errattr;
405         if (nla_put_u32(skb, SMC_NLA_LINK_CONN_CNT,
406                         atomic_read(&link->conn_cnt)))
407                 goto errattr;
408         if (nla_put_u8(skb, SMC_NLA_LINK_IB_PORT, link->ibport))
409                 goto errattr;
410         if (nla_put_u32(skb, SMC_NLA_LINK_NET_DEV, link->ndev_ifidx))
411                 goto errattr;
412         snprintf(smc_ibname, sizeof(smc_ibname), "%s", link->ibname);
413         if (nla_put_string(skb, SMC_NLA_LINK_IB_DEV, smc_ibname))
414                 goto errattr;
415         memcpy(&link_uid, link->link_uid, sizeof(link_uid));
416         if (nla_put_u32(skb, SMC_NLA_LINK_UID, link_uid))
417                 goto errattr;
418         memcpy(&link_uid, link->peer_link_uid, sizeof(link_uid));
419         if (nla_put_u32(skb, SMC_NLA_LINK_PEER_UID, link_uid))
420                 goto errattr;
421         memset(smc_gid_target, 0, sizeof(smc_gid_target));
422         smc_gid_be16_convert(smc_gid_target, link->gid);
423         if (nla_put_string(skb, SMC_NLA_LINK_GID, smc_gid_target))
424                 goto errattr;
425         memset(smc_gid_target, 0, sizeof(smc_gid_target));
426         smc_gid_be16_convert(smc_gid_target, link->peer_gid);
427         if (nla_put_string(skb, SMC_NLA_LINK_PEER_GID, smc_gid_target))
428                 goto errattr;
429
430         nla_nest_end(skb, attrs);
431         genlmsg_end(skb, nlh);
432         return 0;
433 errattr:
434         nla_nest_cancel(skb, attrs);
435 errout:
436         genlmsg_cancel(skb, nlh);
437 errmsg:
438         return -EMSGSIZE;
439 }
440
441 static int smc_nl_handle_lgr(struct smc_link_group *lgr,
442                              struct sk_buff *skb,
443                              struct netlink_callback *cb,
444                              bool list_links)
445 {
446         void *nlh;
447         int i;
448
449         nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
450                           &smc_gen_nl_family, NLM_F_MULTI,
451                           SMC_NETLINK_GET_LGR_SMCR);
452         if (!nlh)
453                 goto errmsg;
454         if (smc_nl_fill_lgr(lgr, skb, cb))
455                 goto errout;
456
457         genlmsg_end(skb, nlh);
458         if (!list_links)
459                 goto out;
460         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
461                 if (!smc_link_usable(&lgr->lnk[i]))
462                         continue;
463                 if (smc_nl_fill_lgr_link(lgr, &lgr->lnk[i], skb, cb))
464                         goto errout;
465         }
466 out:
467         return 0;
468
469 errout:
470         genlmsg_cancel(skb, nlh);
471 errmsg:
472         return -EMSGSIZE;
473 }
474
475 static void smc_nl_fill_lgr_list(struct smc_lgr_list *smc_lgr,
476                                  struct sk_buff *skb,
477                                  struct netlink_callback *cb,
478                                  bool list_links)
479 {
480         struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
481         struct smc_link_group *lgr;
482         int snum = cb_ctx->pos[0];
483         int num = 0;
484
485         spin_lock_bh(&smc_lgr->lock);
486         list_for_each_entry(lgr, &smc_lgr->list, list) {
487                 if (num < snum)
488                         goto next;
489                 if (smc_nl_handle_lgr(lgr, skb, cb, list_links))
490                         goto errout;
491 next:
492                 num++;
493         }
494 errout:
495         spin_unlock_bh(&smc_lgr->lock);
496         cb_ctx->pos[0] = num;
497 }
498
499 static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
500                                 struct sk_buff *skb,
501                                 struct netlink_callback *cb)
502 {
503         char smc_pnet[SMC_MAX_PNETID_LEN + 1];
504         struct nlattr *attrs;
505         void *nlh;
506
507         nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
508                           &smc_gen_nl_family, NLM_F_MULTI,
509                           SMC_NETLINK_GET_LGR_SMCD);
510         if (!nlh)
511                 goto errmsg;
512
513         attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCD);
514         if (!attrs)
515                 goto errout;
516
517         if (nla_put_u32(skb, SMC_NLA_LGR_D_ID, *((u32 *)&lgr->id)))
518                 goto errattr;
519         if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_GID, lgr->smcd->local_gid,
520                               SMC_NLA_LGR_D_PAD))
521                 goto errattr;
522         if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid,
523                               SMC_NLA_LGR_D_PAD))
524                 goto errattr;
525         if (nla_put_u8(skb, SMC_NLA_LGR_D_VLAN_ID, lgr->vlan_id))
526                 goto errattr;
527         if (nla_put_u32(skb, SMC_NLA_LGR_D_CONNS_NUM, lgr->conns_num))
528                 goto errattr;
529         if (nla_put_u32(skb, SMC_NLA_LGR_D_CHID, smc_ism_get_chid(lgr->smcd)))
530                 goto errattr;
531         memcpy(smc_pnet, lgr->smcd->pnetid, SMC_MAX_PNETID_LEN);
532         smc_pnet[SMC_MAX_PNETID_LEN] = 0;
533         if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet))
534                 goto errattr;
535         if (lgr->smc_version > SMC_V1) {
536                 struct nlattr *v2_attrs;
537
538                 v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_D_V2_COMMON);
539                 if (!v2_attrs)
540                         goto errattr;
541                 if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
542                         goto errattr;
543         }
544         nla_nest_end(skb, attrs);
545         genlmsg_end(skb, nlh);
546         return 0;
547
548 errattr:
549         nla_nest_cancel(skb, attrs);
550 errout:
551         genlmsg_cancel(skb, nlh);
552 errmsg:
553         return -EMSGSIZE;
554 }
555
556 static int smc_nl_handle_smcd_lgr(struct smcd_dev *dev,
557                                   struct sk_buff *skb,
558                                   struct netlink_callback *cb)
559 {
560         struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
561         struct smc_link_group *lgr;
562         int snum = cb_ctx->pos[1];
563         int rc = 0, num = 0;
564
565         spin_lock_bh(&dev->lgr_lock);
566         list_for_each_entry(lgr, &dev->lgr_list, list) {
567                 if (!lgr->is_smcd)
568                         continue;
569                 if (num < snum)
570                         goto next;
571                 rc = smc_nl_fill_smcd_lgr(lgr, skb, cb);
572                 if (rc)
573                         goto errout;
574 next:
575                 num++;
576         }
577 errout:
578         spin_unlock_bh(&dev->lgr_lock);
579         cb_ctx->pos[1] = num;
580         return rc;
581 }
582
583 static int smc_nl_fill_smcd_dev(struct smcd_dev_list *dev_list,
584                                 struct sk_buff *skb,
585                                 struct netlink_callback *cb)
586 {
587         struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
588         struct smcd_dev *smcd_dev;
589         int snum = cb_ctx->pos[0];
590         int rc = 0, num = 0;
591
592         mutex_lock(&dev_list->mutex);
593         list_for_each_entry(smcd_dev, &dev_list->list, list) {
594                 if (list_empty(&smcd_dev->lgr_list))
595                         continue;
596                 if (num < snum)
597                         goto next;
598                 rc = smc_nl_handle_smcd_lgr(smcd_dev, skb, cb);
599                 if (rc)
600                         goto errout;
601 next:
602                 num++;
603         }
604 errout:
605         mutex_unlock(&dev_list->mutex);
606         cb_ctx->pos[0] = num;
607         return rc;
608 }
609
610 int smcr_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
611 {
612         bool list_links = false;
613
614         smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb, list_links);
615         return skb->len;
616 }
617
618 int smcr_nl_get_link(struct sk_buff *skb, struct netlink_callback *cb)
619 {
620         bool list_links = true;
621
622         smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb, list_links);
623         return skb->len;
624 }
625
626 int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
627 {
628         smc_nl_fill_smcd_dev(&smcd_dev_list, skb, cb);
629         return skb->len;
630 }
631
632 void smc_lgr_cleanup_early(struct smc_link_group *lgr)
633 {
634         spinlock_t *lgr_lock;
635
636         if (!lgr)
637                 return;
638
639         smc_lgr_list_head(lgr, &lgr_lock);
640         spin_lock_bh(lgr_lock);
641         /* do not use this link group for new connections */
642         if (!list_empty(&lgr->list))
643                 list_del_init(&lgr->list);
644         spin_unlock_bh(lgr_lock);
645         __smc_lgr_terminate(lgr, true);
646 }
647
648 static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
649 {
650         int i;
651
652         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
653                 struct smc_link *lnk = &lgr->lnk[i];
654
655                 if (smc_link_sendable(lnk))
656                         lnk->state = SMC_LNK_INACTIVE;
657         }
658         wake_up_all(&lgr->llc_msg_waiter);
659         wake_up_all(&lgr->llc_flow_waiter);
660 }
661
662 static void smc_lgr_free(struct smc_link_group *lgr);
663
664 static void smc_lgr_free_work(struct work_struct *work)
665 {
666         struct smc_link_group *lgr = container_of(to_delayed_work(work),
667                                                   struct smc_link_group,
668                                                   free_work);
669         spinlock_t *lgr_lock;
670         bool conns;
671
672         smc_lgr_list_head(lgr, &lgr_lock);
673         spin_lock_bh(lgr_lock);
674         if (lgr->freeing) {
675                 spin_unlock_bh(lgr_lock);
676                 return;
677         }
678         read_lock_bh(&lgr->conns_lock);
679         conns = RB_EMPTY_ROOT(&lgr->conns_all);
680         read_unlock_bh(&lgr->conns_lock);
681         if (!conns) { /* number of lgr connections is no longer zero */
682                 spin_unlock_bh(lgr_lock);
683                 return;
684         }
685         list_del_init(&lgr->list); /* remove from smc_lgr_list */
686         lgr->freeing = 1; /* this instance does the freeing, no new schedule */
687         spin_unlock_bh(lgr_lock);
688         cancel_delayed_work(&lgr->free_work);
689
690         if (!lgr->is_smcd && !lgr->terminating)
691                 smc_llc_send_link_delete_all(lgr, true,
692                                              SMC_LLC_DEL_PROG_INIT_TERM);
693         if (lgr->is_smcd && !lgr->terminating)
694                 smc_ism_signal_shutdown(lgr);
695         if (!lgr->is_smcd)
696                 smcr_lgr_link_deactivate_all(lgr);
697         smc_lgr_free(lgr);
698 }
699
700 static void smc_lgr_terminate_work(struct work_struct *work)
701 {
702         struct smc_link_group *lgr = container_of(work, struct smc_link_group,
703                                                   terminate_work);
704
705         __smc_lgr_terminate(lgr, true);
706 }
707
708 /* return next unique link id for the lgr */
709 static u8 smcr_next_link_id(struct smc_link_group *lgr)
710 {
711         u8 link_id;
712         int i;
713
714         while (1) {
715 again:
716                 link_id = ++lgr->next_link_id;
717                 if (!link_id)   /* skip zero as link_id */
718                         link_id = ++lgr->next_link_id;
719                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
720                         if (smc_link_usable(&lgr->lnk[i]) &&
721                             lgr->lnk[i].link_id == link_id)
722                                 goto again;
723                 }
724                 break;
725         }
726         return link_id;
727 }
728
729 static void smcr_copy_dev_info_to_link(struct smc_link *link)
730 {
731         struct smc_ib_device *smcibdev = link->smcibdev;
732
733         snprintf(link->ibname, sizeof(link->ibname), "%s",
734                  smcibdev->ibdev->name);
735         link->ndev_ifidx = smcibdev->ndev_ifidx[link->ibport - 1];
736 }
737
738 int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
739                    u8 link_idx, struct smc_init_info *ini)
740 {
741         struct smc_ib_device *smcibdev;
742         u8 rndvec[3];
743         int rc;
744
745         if (lgr->smc_version == SMC_V2) {
746                 lnk->smcibdev = ini->smcrv2.ib_dev_v2;
747                 lnk->ibport = ini->smcrv2.ib_port_v2;
748         } else {
749                 lnk->smcibdev = ini->ib_dev;
750                 lnk->ibport = ini->ib_port;
751         }
752         get_device(&lnk->smcibdev->ibdev->dev);
753         atomic_inc(&lnk->smcibdev->lnk_cnt);
754         refcount_set(&lnk->refcnt, 1); /* link refcnt is set to 1 */
755         lnk->clearing = 0;
756         lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu;
757         lnk->link_id = smcr_next_link_id(lgr);
758         lnk->lgr = lgr;
759         smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */
760         lnk->link_idx = link_idx;
761         lnk->wr_rx_id_compl = 0;
762         smc_ibdev_cnt_inc(lnk);
763         smcr_copy_dev_info_to_link(lnk);
764         atomic_set(&lnk->conn_cnt, 0);
765         smc_llc_link_set_uid(lnk);
766         INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
767         if (!lnk->smcibdev->initialized) {
768                 rc = (int)smc_ib_setup_per_ibdev(lnk->smcibdev);
769                 if (rc)
770                         goto out;
771         }
772         get_random_bytes(rndvec, sizeof(rndvec));
773         lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
774                 (rndvec[2] << 16);
775         rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
776                                   ini->vlan_id, lnk->gid, &lnk->sgid_index,
777                                   lgr->smc_version == SMC_V2 ?
778                                                   &ini->smcrv2 : NULL);
779         if (rc)
780                 goto out;
781         rc = smc_llc_link_init(lnk);
782         if (rc)
783                 goto out;
784         rc = smc_wr_alloc_link_mem(lnk);
785         if (rc)
786                 goto clear_llc_lnk;
787         rc = smc_ib_create_protection_domain(lnk);
788         if (rc)
789                 goto free_link_mem;
790         rc = smc_ib_create_queue_pair(lnk);
791         if (rc)
792                 goto dealloc_pd;
793         rc = smc_wr_create_link(lnk);
794         if (rc)
795                 goto destroy_qp;
796         lnk->state = SMC_LNK_ACTIVATING;
797         return 0;
798
799 destroy_qp:
800         smc_ib_destroy_queue_pair(lnk);
801 dealloc_pd:
802         smc_ib_dealloc_protection_domain(lnk);
803 free_link_mem:
804         smc_wr_free_link_mem(lnk);
805 clear_llc_lnk:
806         smc_llc_link_clear(lnk, false);
807 out:
808         smc_ibdev_cnt_dec(lnk);
809         put_device(&lnk->smcibdev->ibdev->dev);
810         smcibdev = lnk->smcibdev;
811         memset(lnk, 0, sizeof(struct smc_link));
812         lnk->state = SMC_LNK_UNUSED;
813         if (!atomic_dec_return(&smcibdev->lnk_cnt))
814                 wake_up(&smcibdev->lnks_deleted);
815         smc_lgr_put(lgr); /* lgr_hold above */
816         return rc;
817 }
818
819 /* create a new SMC link group */
820 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
821 {
822         struct smc_link_group *lgr;
823         struct list_head *lgr_list;
824         struct smc_link *lnk;
825         spinlock_t *lgr_lock;
826         u8 link_idx;
827         int rc = 0;
828         int i;
829
830         if (ini->is_smcd && ini->vlan_id) {
831                 if (smc_ism_get_vlan(ini->ism_dev[ini->ism_selected],
832                                      ini->vlan_id)) {
833                         rc = SMC_CLC_DECL_ISMVLANERR;
834                         goto out;
835                 }
836         }
837
838         lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
839         if (!lgr) {
840                 rc = SMC_CLC_DECL_MEM;
841                 goto ism_put_vlan;
842         }
843         lgr->tx_wq = alloc_workqueue("smc_tx_wq-%*phN", 0, 0,
844                                      SMC_LGR_ID_SIZE, &lgr->id);
845         if (!lgr->tx_wq) {
846                 rc = -ENOMEM;
847                 goto free_lgr;
848         }
849         lgr->is_smcd = ini->is_smcd;
850         lgr->sync_err = 0;
851         lgr->terminating = 0;
852         lgr->freeing = 0;
853         lgr->vlan_id = ini->vlan_id;
854         refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */
855         mutex_init(&lgr->sndbufs_lock);
856         mutex_init(&lgr->rmbs_lock);
857         rwlock_init(&lgr->conns_lock);
858         for (i = 0; i < SMC_RMBE_SIZES; i++) {
859                 INIT_LIST_HEAD(&lgr->sndbufs[i]);
860                 INIT_LIST_HEAD(&lgr->rmbs[i]);
861         }
862         lgr->next_link_id = 0;
863         smc_lgr_list.num += SMC_LGR_NUM_INCR;
864         memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
865         INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
866         INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
867         lgr->conns_all = RB_ROOT;
868         if (ini->is_smcd) {
869                 /* SMC-D specific settings */
870                 get_device(&ini->ism_dev[ini->ism_selected]->dev);
871                 lgr->peer_gid = ini->ism_peer_gid[ini->ism_selected];
872                 lgr->smcd = ini->ism_dev[ini->ism_selected];
873                 lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list;
874                 lgr_lock = &lgr->smcd->lgr_lock;
875                 lgr->smc_version = ini->smcd_version;
876                 lgr->peer_shutdown = 0;
877                 atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt);
878         } else {
879                 /* SMC-R specific settings */
880                 struct smc_ib_device *ibdev;
881                 int ibport;
882
883                 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
884                 lgr->smc_version = ini->smcr_version;
885                 memcpy(lgr->peer_systemid, ini->peer_systemid,
886                        SMC_SYSTEMID_LEN);
887                 if (lgr->smc_version == SMC_V2) {
888                         ibdev = ini->smcrv2.ib_dev_v2;
889                         ibport = ini->smcrv2.ib_port_v2;
890                         lgr->saddr = ini->smcrv2.saddr;
891                         lgr->uses_gateway = ini->smcrv2.uses_gateway;
892                         memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac,
893                                ETH_ALEN);
894                 } else {
895                         ibdev = ini->ib_dev;
896                         ibport = ini->ib_port;
897                 }
898                 memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1],
899                        SMC_MAX_PNETID_LEN);
900                 rc = smc_wr_alloc_lgr_mem(lgr);
901                 if (rc)
902                         goto free_wq;
903                 smc_llc_lgr_init(lgr, smc);
904
905                 link_idx = SMC_SINGLE_LINK;
906                 lnk = &lgr->lnk[link_idx];
907                 rc = smcr_link_init(lgr, lnk, link_idx, ini);
908                 if (rc) {
909                         smc_wr_free_lgr_mem(lgr);
910                         goto free_wq;
911                 }
912                 lgr->net = smc_ib_net(lnk->smcibdev);
913                 lgr_list = &smc_lgr_list.list;
914                 lgr_lock = &smc_lgr_list.lock;
915                 lgr->buf_type = lgr->net->smc.sysctl_smcr_buf_type;
916                 atomic_inc(&lgr_cnt);
917         }
918         smc->conn.lgr = lgr;
919         spin_lock_bh(lgr_lock);
920         list_add_tail(&lgr->list, lgr_list);
921         spin_unlock_bh(lgr_lock);
922         return 0;
923
924 free_wq:
925         destroy_workqueue(lgr->tx_wq);
926 free_lgr:
927         kfree(lgr);
928 ism_put_vlan:
929         if (ini->is_smcd && ini->vlan_id)
930                 smc_ism_put_vlan(ini->ism_dev[ini->ism_selected], ini->vlan_id);
931 out:
932         if (rc < 0) {
933                 if (rc == -ENOMEM)
934                         rc = SMC_CLC_DECL_MEM;
935                 else
936                         rc = SMC_CLC_DECL_INTERR;
937         }
938         return rc;
939 }
940
941 static int smc_write_space(struct smc_connection *conn)
942 {
943         int buffer_len = conn->peer_rmbe_size;
944         union smc_host_cursor prod;
945         union smc_host_cursor cons;
946         int space;
947
948         smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
949         smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
950         /* determine rx_buf space */
951         space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
952         return space;
953 }
954
955 static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
956                              struct smc_wr_buf *wr_buf)
957 {
958         struct smc_connection *conn = &smc->conn;
959         union smc_host_cursor cons, fin;
960         int rc = 0;
961         int diff;
962
963         smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
964         smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
965         /* set prod cursor to old state, enforce tx_rdma_writes() */
966         smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
967         smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
968
969         if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
970                 /* cons cursor advanced more than fin, and prod was set
971                  * fin above, so now prod is smaller than cons. Fix that.
972                  */
973                 diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
974                 smc_curs_add(conn->sndbuf_desc->len,
975                              &conn->tx_curs_sent, diff);
976                 smc_curs_add(conn->sndbuf_desc->len,
977                              &conn->tx_curs_fin, diff);
978
979                 smp_mb__before_atomic();
980                 atomic_add(diff, &conn->sndbuf_space);
981                 smp_mb__after_atomic();
982
983                 smc_curs_add(conn->peer_rmbe_size,
984                              &conn->local_tx_ctrl.prod, diff);
985                 smc_curs_add(conn->peer_rmbe_size,
986                              &conn->local_tx_ctrl_fin, diff);
987         }
988         /* recalculate, value is used by tx_rdma_writes() */
989         atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));
990
991         if (smc->sk.sk_state != SMC_INIT &&
992             smc->sk.sk_state != SMC_CLOSED) {
993                 rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf);
994                 if (!rc) {
995                         queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, 0);
996                         smc->sk.sk_data_ready(&smc->sk);
997                 }
998         } else {
999                 smc_wr_tx_put_slot(conn->lnk,
1000                                    (struct smc_wr_tx_pend_priv *)pend);
1001         }
1002         return rc;
1003 }
1004
1005 void smc_switch_link_and_count(struct smc_connection *conn,
1006                                struct smc_link *to_lnk)
1007 {
1008         atomic_dec(&conn->lnk->conn_cnt);
1009         /* link_hold in smc_conn_create() */
1010         smcr_link_put(conn->lnk);
1011         conn->lnk = to_lnk;
1012         atomic_inc(&conn->lnk->conn_cnt);
1013         /* link_put in smc_conn_free() */
1014         smcr_link_hold(conn->lnk);
1015 }
1016
1017 struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
1018                                   struct smc_link *from_lnk, bool is_dev_err)
1019 {
1020         struct smc_link *to_lnk = NULL;
1021         struct smc_cdc_tx_pend *pend;
1022         struct smc_connection *conn;
1023         struct smc_wr_buf *wr_buf;
1024         struct smc_sock *smc;
1025         struct rb_node *node;
1026         int i, rc = 0;
1027
1028         /* link is inactive, wake up tx waiters */
1029         smc_wr_wakeup_tx_wait(from_lnk);
1030
1031         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1032                 if (!smc_link_active(&lgr->lnk[i]) || i == from_lnk->link_idx)
1033                         continue;
1034                 if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
1035                     from_lnk->ibport == lgr->lnk[i].ibport) {
1036                         continue;
1037                 }
1038                 to_lnk = &lgr->lnk[i];
1039                 break;
1040         }
1041         if (!to_lnk || !smc_wr_tx_link_hold(to_lnk)) {
1042                 smc_lgr_terminate_sched(lgr);
1043                 return NULL;
1044         }
1045 again:
1046         read_lock_bh(&lgr->conns_lock);
1047         for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
1048                 conn = rb_entry(node, struct smc_connection, alert_node);
1049                 if (conn->lnk != from_lnk)
1050                         continue;
1051                 smc = container_of(conn, struct smc_sock, conn);
1052                 /* conn->lnk not yet set in SMC_INIT state */
1053                 if (smc->sk.sk_state == SMC_INIT)
1054                         continue;
1055                 if (smc->sk.sk_state == SMC_CLOSED ||
1056                     smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
1057                     smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
1058                     smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
1059                     smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
1060                     smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
1061                     smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
1062                     smc->sk.sk_state == SMC_PEERABORTWAIT ||
1063                     smc->sk.sk_state == SMC_PROCESSABORT) {
1064                         spin_lock_bh(&conn->send_lock);
1065                         smc_switch_link_and_count(conn, to_lnk);
1066                         spin_unlock_bh(&conn->send_lock);
1067                         continue;
1068                 }
1069                 sock_hold(&smc->sk);
1070                 read_unlock_bh(&lgr->conns_lock);
1071                 /* pre-fetch buffer outside of send_lock, might sleep */
1072                 rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend);
1073                 if (rc)
1074                         goto err_out;
1075                 /* avoid race with smcr_tx_sndbuf_nonempty() */
1076                 spin_lock_bh(&conn->send_lock);
1077                 smc_switch_link_and_count(conn, to_lnk);
1078                 rc = smc_switch_cursor(smc, pend, wr_buf);
1079                 spin_unlock_bh(&conn->send_lock);
1080                 sock_put(&smc->sk);
1081                 if (rc)
1082                         goto err_out;
1083                 goto again;
1084         }
1085         read_unlock_bh(&lgr->conns_lock);
1086         smc_wr_tx_link_put(to_lnk);
1087         return to_lnk;
1088
1089 err_out:
1090         smcr_link_down_cond_sched(to_lnk);
1091         smc_wr_tx_link_put(to_lnk);
1092         return NULL;
1093 }
1094
1095 static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
1096                            struct smc_link_group *lgr)
1097 {
1098         struct mutex *lock;     /* lock buffer list */
1099         int rc;
1100
1101         if (is_rmb && buf_desc->is_conf_rkey && !list_empty(&lgr->list)) {
1102                 /* unregister rmb with peer */
1103                 rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
1104                 if (!rc) {
1105                         /* protect against smc_llc_cli_rkey_exchange() */
1106                         mutex_lock(&lgr->llc_conf_mutex);
1107                         smc_llc_do_delete_rkey(lgr, buf_desc);
1108                         buf_desc->is_conf_rkey = false;
1109                         mutex_unlock(&lgr->llc_conf_mutex);
1110                         smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
1111                 }
1112         }
1113
1114         if (buf_desc->is_reg_err) {
1115                 /* buf registration failed, reuse not possible */
1116                 lock = is_rmb ? &lgr->rmbs_lock :
1117                                 &lgr->sndbufs_lock;
1118                 mutex_lock(lock);
1119                 list_del(&buf_desc->list);
1120                 mutex_unlock(lock);
1121
1122                 smc_buf_free(lgr, is_rmb, buf_desc);
1123         } else {
1124                 /* memzero_explicit provides potential memory barrier semantics */
1125                 memzero_explicit(buf_desc->cpu_addr, buf_desc->len);
1126                 WRITE_ONCE(buf_desc->used, 0);
1127         }
1128 }
1129
1130 static void smc_buf_unuse(struct smc_connection *conn,
1131                           struct smc_link_group *lgr)
1132 {
1133         if (conn->sndbuf_desc) {
1134                 if (!lgr->is_smcd && conn->sndbuf_desc->is_vm) {
1135                         smcr_buf_unuse(conn->sndbuf_desc, false, lgr);
1136                 } else {
1137                         memzero_explicit(conn->sndbuf_desc->cpu_addr, conn->sndbuf_desc->len);
1138                         WRITE_ONCE(conn->sndbuf_desc->used, 0);
1139                 }
1140         }
1141         if (conn->rmb_desc) {
1142                 if (!lgr->is_smcd) {
1143                         smcr_buf_unuse(conn->rmb_desc, true, lgr);
1144                 } else {
1145                         memzero_explicit(conn->rmb_desc->cpu_addr,
1146                                          conn->rmb_desc->len + sizeof(struct smcd_cdc_msg));
1147                         WRITE_ONCE(conn->rmb_desc->used, 0);
1148                 }
1149         }
1150 }
1151
1152 /* remove a finished connection from its link group */
1153 void smc_conn_free(struct smc_connection *conn)
1154 {
1155         struct smc_link_group *lgr = conn->lgr;
1156
1157         if (!lgr || conn->freed)
1158                 /* Connection has never been registered in a
1159                  * link group, or has already been freed.
1160                  */
1161                 return;
1162
1163         conn->freed = 1;
1164         if (!smc_conn_lgr_valid(conn))
1165                 /* Connection has already unregistered from
1166                  * link group.
1167                  */
1168                 goto lgr_put;
1169
1170         if (lgr->is_smcd) {
1171                 if (!list_empty(&lgr->list))
1172                         smc_ism_unset_conn(conn);
1173                 tasklet_kill(&conn->rx_tsklet);
1174         } else {
1175                 smc_cdc_wait_pend_tx_wr(conn);
1176                 if (current_work() != &conn->abort_work)
1177                         cancel_work_sync(&conn->abort_work);
1178         }
1179         if (!list_empty(&lgr->list)) {
1180                 smc_buf_unuse(conn, lgr); /* allow buffer reuse */
1181                 smc_lgr_unregister_conn(conn);
1182         }
1183
1184         if (!lgr->conns_num)
1185                 smc_lgr_schedule_free_work(lgr);
1186 lgr_put:
1187         if (!lgr->is_smcd)
1188                 smcr_link_put(conn->lnk); /* link_hold in smc_conn_create() */
1189         smc_lgr_put(lgr); /* lgr_hold in smc_conn_create() */
1190 }
1191
1192 /* unregister a link from a buf_desc */
1193 static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
1194                                 struct smc_link *lnk)
1195 {
1196         if (is_rmb || buf_desc->is_vm)
1197                 buf_desc->is_reg_mr[lnk->link_idx] = false;
1198         if (!buf_desc->is_map_ib[lnk->link_idx])
1199                 return;
1200
1201         if ((is_rmb || buf_desc->is_vm) &&
1202             buf_desc->mr[lnk->link_idx]) {
1203                 smc_ib_put_memory_region(buf_desc->mr[lnk->link_idx]);
1204                 buf_desc->mr[lnk->link_idx] = NULL;
1205         }
1206         if (is_rmb)
1207                 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
1208         else
1209                 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
1210
1211         sg_free_table(&buf_desc->sgt[lnk->link_idx]);
1212         buf_desc->is_map_ib[lnk->link_idx] = false;
1213 }
1214
1215 /* unmap all buffers of lgr for a deleted link */
1216 static void smcr_buf_unmap_lgr(struct smc_link *lnk)
1217 {
1218         struct smc_link_group *lgr = lnk->lgr;
1219         struct smc_buf_desc *buf_desc, *bf;
1220         int i;
1221
1222         for (i = 0; i < SMC_RMBE_SIZES; i++) {
1223                 mutex_lock(&lgr->rmbs_lock);
1224                 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
1225                         smcr_buf_unmap_link(buf_desc, true, lnk);
1226                 mutex_unlock(&lgr->rmbs_lock);
1227                 mutex_lock(&lgr->sndbufs_lock);
1228                 list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
1229                                          list)
1230                         smcr_buf_unmap_link(buf_desc, false, lnk);
1231                 mutex_unlock(&lgr->sndbufs_lock);
1232         }
1233 }
1234
1235 static void smcr_rtoken_clear_link(struct smc_link *lnk)
1236 {
1237         struct smc_link_group *lgr = lnk->lgr;
1238         int i;
1239
1240         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1241                 lgr->rtokens[i][lnk->link_idx].rkey = 0;
1242                 lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
1243         }
1244 }
1245
1246 static void __smcr_link_clear(struct smc_link *lnk)
1247 {
1248         struct smc_link_group *lgr = lnk->lgr;
1249         struct smc_ib_device *smcibdev;
1250
1251         smc_wr_free_link_mem(lnk);
1252         smc_ibdev_cnt_dec(lnk);
1253         put_device(&lnk->smcibdev->ibdev->dev);
1254         smcibdev = lnk->smcibdev;
1255         memset(lnk, 0, sizeof(struct smc_link));
1256         lnk->state = SMC_LNK_UNUSED;
1257         if (!atomic_dec_return(&smcibdev->lnk_cnt))
1258                 wake_up(&smcibdev->lnks_deleted);
1259         smc_lgr_put(lgr); /* lgr_hold in smcr_link_init() */
1260 }
1261
1262 /* must be called under lgr->llc_conf_mutex lock */
1263 void smcr_link_clear(struct smc_link *lnk, bool log)
1264 {
1265         if (!lnk->lgr || lnk->clearing ||
1266             lnk->state == SMC_LNK_UNUSED)
1267                 return;
1268         lnk->clearing = 1;
1269         lnk->peer_qpn = 0;
1270         smc_llc_link_clear(lnk, log);
1271         smcr_buf_unmap_lgr(lnk);
1272         smcr_rtoken_clear_link(lnk);
1273         smc_ib_modify_qp_error(lnk);
1274         smc_wr_free_link(lnk);
1275         smc_ib_destroy_queue_pair(lnk);
1276         smc_ib_dealloc_protection_domain(lnk);
1277         smcr_link_put(lnk); /* theoretically last link_put */
1278 }
1279
1280 void smcr_link_hold(struct smc_link *lnk)
1281 {
1282         refcount_inc(&lnk->refcnt);
1283 }
1284
1285 void smcr_link_put(struct smc_link *lnk)
1286 {
1287         if (refcount_dec_and_test(&lnk->refcnt))
1288                 __smcr_link_clear(lnk);
1289 }
1290
1291 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
1292                           struct smc_buf_desc *buf_desc)
1293 {
1294         int i;
1295
1296         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
1297                 smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
1298
1299         if (!buf_desc->is_vm && buf_desc->pages)
1300                 __free_pages(buf_desc->pages, buf_desc->order);
1301         else if (buf_desc->is_vm && buf_desc->cpu_addr)
1302                 vfree(buf_desc->cpu_addr);
1303         kfree(buf_desc);
1304 }
1305
1306 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
1307                           struct smc_buf_desc *buf_desc)
1308 {
1309         if (is_dmb) {
1310                 /* restore original buf len */
1311                 buf_desc->len += sizeof(struct smcd_cdc_msg);
1312                 smc_ism_unregister_dmb(lgr->smcd, buf_desc);
1313         } else {
1314                 kfree(buf_desc->cpu_addr);
1315         }
1316         kfree(buf_desc);
1317 }
1318
1319 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
1320                          struct smc_buf_desc *buf_desc)
1321 {
1322         if (lgr->is_smcd)
1323                 smcd_buf_free(lgr, is_rmb, buf_desc);
1324         else
1325                 smcr_buf_free(lgr, is_rmb, buf_desc);
1326 }
1327
1328 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
1329 {
1330         struct smc_buf_desc *buf_desc, *bf_desc;
1331         struct list_head *buf_list;
1332         int i;
1333
1334         for (i = 0; i < SMC_RMBE_SIZES; i++) {
1335                 if (is_rmb)
1336                         buf_list = &lgr->rmbs[i];
1337                 else
1338                         buf_list = &lgr->sndbufs[i];
1339                 list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
1340                                          list) {
1341                         list_del(&buf_desc->list);
1342                         smc_buf_free(lgr, is_rmb, buf_desc);
1343                 }
1344         }
1345 }
1346
1347 static void smc_lgr_free_bufs(struct smc_link_group *lgr)
1348 {
1349         /* free send buffers */
1350         __smc_lgr_free_bufs(lgr, false);
1351         /* free rmbs */
1352         __smc_lgr_free_bufs(lgr, true);
1353 }
1354
1355 /* won't be freed until no one accesses to lgr anymore */
1356 static void __smc_lgr_free(struct smc_link_group *lgr)
1357 {
1358         smc_lgr_free_bufs(lgr);
1359         if (lgr->is_smcd) {
1360                 if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
1361                         wake_up(&lgr->smcd->lgrs_deleted);
1362         } else {
1363                 smc_wr_free_lgr_mem(lgr);
1364                 if (!atomic_dec_return(&lgr_cnt))
1365                         wake_up(&lgrs_deleted);
1366         }
1367         kfree(lgr);
1368 }
1369
1370 /* remove a link group */
1371 static void smc_lgr_free(struct smc_link_group *lgr)
1372 {
1373         int i;
1374
1375         if (!lgr->is_smcd) {
1376                 mutex_lock(&lgr->llc_conf_mutex);
1377                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1378                         if (lgr->lnk[i].state != SMC_LNK_UNUSED)
1379                                 smcr_link_clear(&lgr->lnk[i], false);
1380                 }
1381                 mutex_unlock(&lgr->llc_conf_mutex);
1382                 smc_llc_lgr_clear(lgr);
1383         }
1384
1385         destroy_workqueue(lgr->tx_wq);
1386         if (lgr->is_smcd) {
1387                 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
1388                 put_device(&lgr->smcd->dev);
1389         }
1390         smc_lgr_put(lgr); /* theoretically last lgr_put */
1391 }
1392
1393 void smc_lgr_hold(struct smc_link_group *lgr)
1394 {
1395         refcount_inc(&lgr->refcnt);
1396 }
1397
1398 void smc_lgr_put(struct smc_link_group *lgr)
1399 {
1400         if (refcount_dec_and_test(&lgr->refcnt))
1401                 __smc_lgr_free(lgr);
1402 }
1403
1404 static void smc_sk_wake_ups(struct smc_sock *smc)
1405 {
1406         smc->sk.sk_write_space(&smc->sk);
1407         smc->sk.sk_data_ready(&smc->sk);
1408         smc->sk.sk_state_change(&smc->sk);
1409 }
1410
1411 /* kill a connection */
1412 static void smc_conn_kill(struct smc_connection *conn, bool soft)
1413 {
1414         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
1415
1416         if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
1417                 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
1418         else
1419                 smc_close_abort(conn);
1420         conn->killed = 1;
1421         smc->sk.sk_err = ECONNABORTED;
1422         smc_sk_wake_ups(smc);
1423         if (conn->lgr->is_smcd) {
1424                 smc_ism_unset_conn(conn);
1425                 if (soft)
1426                         tasklet_kill(&conn->rx_tsklet);
1427                 else
1428                         tasklet_unlock_wait(&conn->rx_tsklet);
1429         } else {
1430                 smc_cdc_wait_pend_tx_wr(conn);
1431         }
1432         smc_lgr_unregister_conn(conn);
1433         smc_close_active_abort(smc);
1434 }
1435
1436 static void smc_lgr_cleanup(struct smc_link_group *lgr)
1437 {
1438         if (lgr->is_smcd) {
1439                 smc_ism_signal_shutdown(lgr);
1440         } else {
1441                 u32 rsn = lgr->llc_termination_rsn;
1442
1443                 if (!rsn)
1444                         rsn = SMC_LLC_DEL_PROG_INIT_TERM;
1445                 smc_llc_send_link_delete_all(lgr, false, rsn);
1446                 smcr_lgr_link_deactivate_all(lgr);
1447         }
1448 }
1449
1450 /* terminate link group
1451  * @soft: true if link group shutdown can take its time
1452  *        false if immediate link group shutdown is required
1453  */
1454 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
1455 {
1456         struct smc_connection *conn;
1457         struct smc_sock *smc;
1458         struct rb_node *node;
1459
1460         if (lgr->terminating)
1461                 return; /* lgr already terminating */
1462         /* cancel free_work sync, will terminate when lgr->freeing is set */
1463         cancel_delayed_work(&lgr->free_work);
1464         lgr->terminating = 1;
1465
1466         /* kill remaining link group connections */
1467         read_lock_bh(&lgr->conns_lock);
1468         node = rb_first(&lgr->conns_all);
1469         while (node) {
1470                 read_unlock_bh(&lgr->conns_lock);
1471                 conn = rb_entry(node, struct smc_connection, alert_node);
1472                 smc = container_of(conn, struct smc_sock, conn);
1473                 sock_hold(&smc->sk); /* sock_put below */
1474                 lock_sock(&smc->sk);
1475                 smc_conn_kill(conn, soft);
1476                 release_sock(&smc->sk);
1477                 sock_put(&smc->sk); /* sock_hold above */
1478                 read_lock_bh(&lgr->conns_lock);
1479                 node = rb_first(&lgr->conns_all);
1480         }
1481         read_unlock_bh(&lgr->conns_lock);
1482         smc_lgr_cleanup(lgr);
1483         smc_lgr_free(lgr);
1484 }
1485
1486 /* unlink link group and schedule termination */
1487 void smc_lgr_terminate_sched(struct smc_link_group *lgr)
1488 {
1489         spinlock_t *lgr_lock;
1490
1491         smc_lgr_list_head(lgr, &lgr_lock);
1492         spin_lock_bh(lgr_lock);
1493         if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) {
1494                 spin_unlock_bh(lgr_lock);
1495                 return; /* lgr already terminating */
1496         }
1497         list_del_init(&lgr->list);
1498         lgr->freeing = 1;
1499         spin_unlock_bh(lgr_lock);
1500         schedule_work(&lgr->terminate_work);
1501 }
1502
1503 /* Called when peer lgr shutdown (regularly or abnormally) is received */
1504 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
1505 {
1506         struct smc_link_group *lgr, *l;
1507         LIST_HEAD(lgr_free_list);
1508
1509         /* run common cleanup function and build free list */
1510         spin_lock_bh(&dev->lgr_lock);
1511         list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
1512                 if ((!peer_gid || lgr->peer_gid == peer_gid) &&
1513                     (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
1514                         if (peer_gid) /* peer triggered termination */
1515                                 lgr->peer_shutdown = 1;
1516                         list_move(&lgr->list, &lgr_free_list);
1517                         lgr->freeing = 1;
1518                 }
1519         }
1520         spin_unlock_bh(&dev->lgr_lock);
1521
1522         /* cancel the regular free workers and actually free lgrs */
1523         list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
1524                 list_del_init(&lgr->list);
1525                 schedule_work(&lgr->terminate_work);
1526         }
1527 }
1528
1529 /* Called when an SMCD device is removed or the smc module is unloaded */
1530 void smc_smcd_terminate_all(struct smcd_dev *smcd)
1531 {
1532         struct smc_link_group *lgr, *lg;
1533         LIST_HEAD(lgr_free_list);
1534
1535         spin_lock_bh(&smcd->lgr_lock);
1536         list_splice_init(&smcd->lgr_list, &lgr_free_list);
1537         list_for_each_entry(lgr, &lgr_free_list, list)
1538                 lgr->freeing = 1;
1539         spin_unlock_bh(&smcd->lgr_lock);
1540
1541         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
1542                 list_del_init(&lgr->list);
1543                 __smc_lgr_terminate(lgr, false);
1544         }
1545
1546         if (atomic_read(&smcd->lgr_cnt))
1547                 wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
1548 }
1549
1550 /* Called when an SMCR device is removed or the smc module is unloaded.
1551  * If smcibdev is given, all SMCR link groups using this device are terminated.
1552  * If smcibdev is NULL, all SMCR link groups are terminated.
1553  */
1554 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
1555 {
1556         struct smc_link_group *lgr, *lg;
1557         LIST_HEAD(lgr_free_list);
1558         int i;
1559
1560         spin_lock_bh(&smc_lgr_list.lock);
1561         if (!smcibdev) {
1562                 list_splice_init(&smc_lgr_list.list, &lgr_free_list);
1563                 list_for_each_entry(lgr, &lgr_free_list, list)
1564                         lgr->freeing = 1;
1565         } else {
1566                 list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
1567                         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1568                                 if (lgr->lnk[i].smcibdev == smcibdev)
1569                                         smcr_link_down_cond_sched(&lgr->lnk[i]);
1570                         }
1571                 }
1572         }
1573         spin_unlock_bh(&smc_lgr_list.lock);
1574
1575         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
1576                 list_del_init(&lgr->list);
1577                 smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM);
1578                 __smc_lgr_terminate(lgr, false);
1579         }
1580
1581         if (smcibdev) {
1582                 if (atomic_read(&smcibdev->lnk_cnt))
1583                         wait_event(smcibdev->lnks_deleted,
1584                                    !atomic_read(&smcibdev->lnk_cnt));
1585         } else {
1586                 if (atomic_read(&lgr_cnt))
1587                         wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
1588         }
1589 }
1590
1591 /* set new lgr type and clear all asymmetric link tagging */
1592 void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
1593 {
1594         char *lgr_type = "";
1595         int i;
1596
1597         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
1598                 if (smc_link_usable(&lgr->lnk[i]))
1599                         lgr->lnk[i].link_is_asym = false;
1600         if (lgr->type == new_type)
1601                 return;
1602         lgr->type = new_type;
1603
1604         switch (lgr->type) {
1605         case SMC_LGR_NONE:
1606                 lgr_type = "NONE";
1607                 break;
1608         case SMC_LGR_SINGLE:
1609                 lgr_type = "SINGLE";
1610                 break;
1611         case SMC_LGR_SYMMETRIC:
1612                 lgr_type = "SYMMETRIC";
1613                 break;
1614         case SMC_LGR_ASYMMETRIC_PEER:
1615                 lgr_type = "ASYMMETRIC_PEER";
1616                 break;
1617         case SMC_LGR_ASYMMETRIC_LOCAL:
1618                 lgr_type = "ASYMMETRIC_LOCAL";
1619                 break;
1620         }
1621         pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu state changed: "
1622                             "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id,
1623                             lgr->net->net_cookie, lgr_type, lgr->pnet_id);
1624 }
1625
1626 /* set new lgr type and tag a link as asymmetric */
1627 void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
1628                             enum smc_lgr_type new_type, int asym_lnk_idx)
1629 {
1630         smcr_lgr_set_type(lgr, new_type);
1631         lgr->lnk[asym_lnk_idx].link_is_asym = true;
1632 }
1633
1634 /* abort connection, abort_work scheduled from tasklet context */
1635 static void smc_conn_abort_work(struct work_struct *work)
1636 {
1637         struct smc_connection *conn = container_of(work,
1638                                                    struct smc_connection,
1639                                                    abort_work);
1640         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
1641
1642         lock_sock(&smc->sk);
1643         smc_conn_kill(conn, true);
1644         release_sock(&smc->sk);
1645         sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
1646 }
1647
1648 void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
1649 {
1650         struct smc_link_group *lgr, *n;
1651
1652         list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1653                 struct smc_link *link;
1654
1655                 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1656                             SMC_MAX_PNETID_LEN) ||
1657                     lgr->type == SMC_LGR_SYMMETRIC ||
1658                     lgr->type == SMC_LGR_ASYMMETRIC_PEER ||
1659                     !rdma_dev_access_netns(smcibdev->ibdev, lgr->net))
1660                         continue;
1661
1662                 /* trigger local add link processing */
1663                 link = smc_llc_usable_link(lgr);
1664                 if (link)
1665                         smc_llc_add_link_local(link);
1666         }
1667 }
1668
1669 /* link is down - switch connections to alternate link,
1670  * must be called under lgr->llc_conf_mutex lock
1671  */
1672 static void smcr_link_down(struct smc_link *lnk)
1673 {
1674         struct smc_link_group *lgr = lnk->lgr;
1675         struct smc_link *to_lnk;
1676         int del_link_id;
1677
1678         if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
1679                 return;
1680
1681         to_lnk = smc_switch_conns(lgr, lnk, true);
1682         if (!to_lnk) { /* no backup link available */
1683                 smcr_link_clear(lnk, true);
1684                 return;
1685         }
1686         smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
1687         del_link_id = lnk->link_id;
1688
1689         if (lgr->role == SMC_SERV) {
1690                 /* trigger local delete link processing */
1691                 smc_llc_srv_delete_link_local(to_lnk, del_link_id);
1692         } else {
1693                 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
1694                         /* another llc task is ongoing */
1695                         mutex_unlock(&lgr->llc_conf_mutex);
1696                         wait_event_timeout(lgr->llc_flow_waiter,
1697                                 (list_empty(&lgr->list) ||
1698                                  lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
1699                                 SMC_LLC_WAIT_TIME);
1700                         mutex_lock(&lgr->llc_conf_mutex);
1701                 }
1702                 if (!list_empty(&lgr->list)) {
1703                         smc_llc_send_delete_link(to_lnk, del_link_id,
1704                                                  SMC_LLC_REQ, true,
1705                                                  SMC_LLC_DEL_LOST_PATH);
1706                         smcr_link_clear(lnk, true);
1707                 }
1708                 wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */
1709         }
1710 }
1711
1712 /* must be called under lgr->llc_conf_mutex lock */
1713 void smcr_link_down_cond(struct smc_link *lnk)
1714 {
1715         if (smc_link_downing(&lnk->state)) {
1716                 trace_smcr_link_down(lnk, __builtin_return_address(0));
1717                 smcr_link_down(lnk);
1718         }
1719 }
1720
1721 /* will get the lgr->llc_conf_mutex lock */
1722 void smcr_link_down_cond_sched(struct smc_link *lnk)
1723 {
1724         if (smc_link_downing(&lnk->state)) {
1725                 trace_smcr_link_down(lnk, __builtin_return_address(0));
1726                 schedule_work(&lnk->link_down_wrk);
1727         }
1728 }
1729
1730 void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
1731 {
1732         struct smc_link_group *lgr, *n;
1733         int i;
1734
1735         list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1736                 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1737                             SMC_MAX_PNETID_LEN))
1738                         continue; /* lgr is not affected */
1739                 if (list_empty(&lgr->list))
1740                         continue;
1741                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1742                         struct smc_link *lnk = &lgr->lnk[i];
1743
1744                         if (smc_link_usable(lnk) &&
1745                             lnk->smcibdev == smcibdev && lnk->ibport == ibport)
1746                                 smcr_link_down_cond_sched(lnk);
1747                 }
1748         }
1749 }
1750
1751 static void smc_link_down_work(struct work_struct *work)
1752 {
1753         struct smc_link *link = container_of(work, struct smc_link,
1754                                              link_down_wrk);
1755         struct smc_link_group *lgr = link->lgr;
1756
1757         if (list_empty(&lgr->list))
1758                 return;
1759         wake_up_all(&lgr->llc_msg_waiter);
1760         mutex_lock(&lgr->llc_conf_mutex);
1761         smcr_link_down(link);
1762         mutex_unlock(&lgr->llc_conf_mutex);
1763 }
1764
1765 static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev,
1766                                   struct netdev_nested_priv *priv)
1767 {
1768         unsigned short *vlan_id = (unsigned short *)priv->data;
1769
1770         if (is_vlan_dev(lower_dev)) {
1771                 *vlan_id = vlan_dev_vlan_id(lower_dev);
1772                 return 1;
1773         }
1774
1775         return 0;
1776 }
1777
1778 /* Determine vlan of internal TCP socket. */
1779 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
1780 {
1781         struct dst_entry *dst = sk_dst_get(clcsock->sk);
1782         struct netdev_nested_priv priv;
1783         struct net_device *ndev;
1784         int rc = 0;
1785
1786         ini->vlan_id = 0;
1787         if (!dst) {
1788                 rc = -ENOTCONN;
1789                 goto out;
1790         }
1791         if (!dst->dev) {
1792                 rc = -ENODEV;
1793                 goto out_rel;
1794         }
1795
1796         ndev = dst->dev;
1797         if (is_vlan_dev(ndev)) {
1798                 ini->vlan_id = vlan_dev_vlan_id(ndev);
1799                 goto out_rel;
1800         }
1801
1802         priv.data = (void *)&ini->vlan_id;
1803         rtnl_lock();
1804         netdev_walk_all_lower_dev(ndev, smc_vlan_by_tcpsk_walk, &priv);
1805         rtnl_unlock();
1806
1807 out_rel:
1808         dst_release(dst);
1809 out:
1810         return rc;
1811 }
1812
1813 static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version,
1814                            u8 peer_systemid[],
1815                            u8 peer_gid[],
1816                            u8 peer_mac_v1[],
1817                            enum smc_lgr_role role, u32 clcqpn,
1818                            struct net *net)
1819 {
1820         struct smc_link *lnk;
1821         int i;
1822
1823         if (memcmp(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN) ||
1824             lgr->role != role)
1825                 return false;
1826
1827         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1828                 lnk = &lgr->lnk[i];
1829
1830                 if (!smc_link_active(lnk))
1831                         continue;
1832                 /* use verbs API to check netns, instead of lgr->net */
1833                 if (!rdma_dev_access_netns(lnk->smcibdev->ibdev, net))
1834                         return false;
1835                 if ((lgr->role == SMC_SERV || lnk->peer_qpn == clcqpn) &&
1836                     !memcmp(lnk->peer_gid, peer_gid, SMC_GID_SIZE) &&
1837                     (smcr_version == SMC_V2 ||
1838                      !memcmp(lnk->peer_mac, peer_mac_v1, ETH_ALEN)))
1839                         return true;
1840         }
1841         return false;
1842 }
1843
1844 static bool smcd_lgr_match(struct smc_link_group *lgr,
1845                            struct smcd_dev *smcismdev, u64 peer_gid)
1846 {
1847         return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
1848 }
1849
1850 /* create a new SMC connection (and a new link group if necessary) */
1851 int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
1852 {
1853         struct smc_connection *conn = &smc->conn;
1854         struct net *net = sock_net(&smc->sk);
1855         struct list_head *lgr_list;
1856         struct smc_link_group *lgr;
1857         enum smc_lgr_role role;
1858         spinlock_t *lgr_lock;
1859         int rc = 0;
1860
1861         lgr_list = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_list :
1862                                   &smc_lgr_list.list;
1863         lgr_lock = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_lock :
1864                                   &smc_lgr_list.lock;
1865         ini->first_contact_local = 1;
1866         role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
1867         if (role == SMC_CLNT && ini->first_contact_peer)
1868                 /* create new link group as well */
1869                 goto create;
1870
1871         /* determine if an existing link group can be reused */
1872         spin_lock_bh(lgr_lock);
1873         list_for_each_entry(lgr, lgr_list, list) {
1874                 write_lock_bh(&lgr->conns_lock);
1875                 if ((ini->is_smcd ?
1876                      smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected],
1877                                     ini->ism_peer_gid[ini->ism_selected]) :
1878                      smcr_lgr_match(lgr, ini->smcr_version,
1879                                     ini->peer_systemid,
1880                                     ini->peer_gid, ini->peer_mac, role,
1881                                     ini->ib_clcqpn, net)) &&
1882                     !lgr->sync_err &&
1883                     (ini->smcd_version == SMC_V2 ||
1884                      lgr->vlan_id == ini->vlan_id) &&
1885                     (role == SMC_CLNT || ini->is_smcd ||
1886                     (lgr->conns_num < SMC_RMBS_PER_LGR_MAX &&
1887                       !bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) {
1888                         /* link group found */
1889                         ini->first_contact_local = 0;
1890                         conn->lgr = lgr;
1891                         rc = smc_lgr_register_conn(conn, false);
1892                         write_unlock_bh(&lgr->conns_lock);
1893                         if (!rc && delayed_work_pending(&lgr->free_work))
1894                                 cancel_delayed_work(&lgr->free_work);
1895                         break;
1896                 }
1897                 write_unlock_bh(&lgr->conns_lock);
1898         }
1899         spin_unlock_bh(lgr_lock);
1900         if (rc)
1901                 return rc;
1902
1903         if (role == SMC_CLNT && !ini->first_contact_peer &&
1904             ini->first_contact_local) {
1905                 /* Server reuses a link group, but Client wants to start
1906                  * a new one
1907                  * send out_of_sync decline, reason synchr. error
1908                  */
1909                 return SMC_CLC_DECL_SYNCERR;
1910         }
1911
1912 create:
1913         if (ini->first_contact_local) {
1914                 rc = smc_lgr_create(smc, ini);
1915                 if (rc)
1916                         goto out;
1917                 lgr = conn->lgr;
1918                 write_lock_bh(&lgr->conns_lock);
1919                 rc = smc_lgr_register_conn(conn, true);
1920                 write_unlock_bh(&lgr->conns_lock);
1921                 if (rc) {
1922                         smc_lgr_cleanup_early(lgr);
1923                         goto out;
1924                 }
1925         }
1926         smc_lgr_hold(conn->lgr); /* lgr_put in smc_conn_free() */
1927         if (!conn->lgr->is_smcd)
1928                 smcr_link_hold(conn->lnk); /* link_put in smc_conn_free() */
1929         conn->freed = 0;
1930         conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
1931         conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
1932         conn->urg_state = SMC_URG_READ;
1933         init_waitqueue_head(&conn->cdc_pend_tx_wq);
1934         INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
1935         if (ini->is_smcd) {
1936                 conn->rx_off = sizeof(struct smcd_cdc_msg);
1937                 smcd_cdc_rx_init(conn); /* init tasklet for this conn */
1938         } else {
1939                 conn->rx_off = 0;
1940         }
1941 #ifndef KERNEL_HAS_ATOMIC64
1942         spin_lock_init(&conn->acurs_lock);
1943 #endif
1944
1945 out:
1946         return rc;
1947 }
1948
1949 #define SMCD_DMBE_SIZES         6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
1950 #define SMCR_RMBE_SIZES         5 /* 0 -> 16KB, 1 -> 32KB, .. 5 -> 512KB */
1951
1952 /* convert the RMB size into the compressed notation (minimum 16K, see
1953  * SMCD/R_DMBE_SIZES.
1954  * In contrast to plain ilog2, this rounds towards the next power of 2,
1955  * so the socket application gets at least its desired sndbuf / rcvbuf size.
1956  */
1957 static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb)
1958 {
1959         const unsigned int max_scat = SG_MAX_SINGLE_ALLOC * PAGE_SIZE;
1960         u8 compressed;
1961
1962         if (size <= SMC_BUF_MIN_SIZE)
1963                 return 0;
1964
1965         size = (size - 1) >> 14;  /* convert to 16K multiple */
1966         compressed = min_t(u8, ilog2(size) + 1,
1967                            is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES);
1968
1969         if (!is_smcd && is_rmb)
1970                 /* RMBs are backed by & limited to max size of scatterlists */
1971                 compressed = min_t(u8, compressed, ilog2(max_scat >> 14));
1972
1973         return compressed;
1974 }
1975
1976 /* convert the RMB size from compressed notation into integer */
1977 int smc_uncompress_bufsize(u8 compressed)
1978 {
1979         u32 size;
1980
1981         size = 0x00000001 << (((int)compressed) + 14);
1982         return (int)size;
1983 }
1984
1985 /* try to reuse a sndbuf or rmb description slot for a certain
1986  * buffer size; if not available, return NULL
1987  */
1988 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
1989                                              struct mutex *lock,
1990                                              struct list_head *buf_list)
1991 {
1992         struct smc_buf_desc *buf_slot;
1993
1994         mutex_lock(lock);
1995         list_for_each_entry(buf_slot, buf_list, list) {
1996                 if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
1997                         mutex_unlock(lock);
1998                         return buf_slot;
1999                 }
2000         }
2001         mutex_unlock(lock);
2002         return NULL;
2003 }
2004
2005 /* one of the conditions for announcing a receiver's current window size is
2006  * that it "results in a minimum increase in the window size of 10% of the
2007  * receive buffer space" [RFC7609]
2008  */
2009 static inline int smc_rmb_wnd_update_limit(int rmbe_size)
2010 {
2011         return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
2012 }
2013
2014 /* map an buf to a link */
2015 static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
2016                              struct smc_link *lnk)
2017 {
2018         int rc, i, nents, offset, buf_size, size, access_flags;
2019         struct scatterlist *sg;
2020         void *buf;
2021
2022         if (buf_desc->is_map_ib[lnk->link_idx])
2023                 return 0;
2024
2025         if (buf_desc->is_vm) {
2026                 buf = buf_desc->cpu_addr;
2027                 buf_size = buf_desc->len;
2028                 offset = offset_in_page(buf_desc->cpu_addr);
2029                 nents = PAGE_ALIGN(buf_size + offset) / PAGE_SIZE;
2030         } else {
2031                 nents = 1;
2032         }
2033
2034         rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], nents, GFP_KERNEL);
2035         if (rc)
2036                 return rc;
2037
2038         if (buf_desc->is_vm) {
2039                 /* virtually contiguous buffer */
2040                 for_each_sg(buf_desc->sgt[lnk->link_idx].sgl, sg, nents, i) {
2041                         size = min_t(int, PAGE_SIZE - offset, buf_size);
2042                         sg_set_page(sg, vmalloc_to_page(buf), size, offset);
2043                         buf += size / sizeof(*buf);
2044                         buf_size -= size;
2045                         offset = 0;
2046                 }
2047         } else {
2048                 /* physically contiguous buffer */
2049                 sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
2050                            buf_desc->cpu_addr, buf_desc->len);
2051         }
2052
2053         /* map sg table to DMA address */
2054         rc = smc_ib_buf_map_sg(lnk, buf_desc,
2055                                is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
2056         /* SMC protocol depends on mapping to one DMA address only */
2057         if (rc != nents) {
2058                 rc = -EAGAIN;
2059                 goto free_table;
2060         }
2061
2062         buf_desc->is_dma_need_sync |=
2063                 smc_ib_is_sg_need_sync(lnk, buf_desc) << lnk->link_idx;
2064
2065         if (is_rmb || buf_desc->is_vm) {
2066                 /* create a new memory region for the RMB or vzalloced sndbuf */
2067                 access_flags = is_rmb ?
2068                                IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
2069                                IB_ACCESS_LOCAL_WRITE;
2070
2071                 rc = smc_ib_get_memory_region(lnk->roce_pd, access_flags,
2072                                               buf_desc, lnk->link_idx);
2073                 if (rc)
2074                         goto buf_unmap;
2075                 smc_ib_sync_sg_for_device(lnk, buf_desc,
2076                                           is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
2077         }
2078         buf_desc->is_map_ib[lnk->link_idx] = true;
2079         return 0;
2080
2081 buf_unmap:
2082         smc_ib_buf_unmap_sg(lnk, buf_desc,
2083                             is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
2084 free_table:
2085         sg_free_table(&buf_desc->sgt[lnk->link_idx]);
2086         return rc;
2087 }
2088
2089 /* register a new buf on IB device, rmb or vzalloced sndbuf
2090  * must be called under lgr->llc_conf_mutex lock
2091  */
2092 int smcr_link_reg_buf(struct smc_link *link, struct smc_buf_desc *buf_desc)
2093 {
2094         if (list_empty(&link->lgr->list))
2095                 return -ENOLINK;
2096         if (!buf_desc->is_reg_mr[link->link_idx]) {
2097                 /* register memory region for new buf */
2098                 if (buf_desc->is_vm)
2099                         buf_desc->mr[link->link_idx]->iova =
2100                                 (uintptr_t)buf_desc->cpu_addr;
2101                 if (smc_wr_reg_send(link, buf_desc->mr[link->link_idx])) {
2102                         buf_desc->is_reg_err = true;
2103                         return -EFAULT;
2104                 }
2105                 buf_desc->is_reg_mr[link->link_idx] = true;
2106         }
2107         return 0;
2108 }
2109
2110 static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
2111                              struct list_head *lst, bool is_rmb)
2112 {
2113         struct smc_buf_desc *buf_desc, *bf;
2114         int rc = 0;
2115
2116         mutex_lock(lock);
2117         list_for_each_entry_safe(buf_desc, bf, lst, list) {
2118                 if (!buf_desc->used)
2119                         continue;
2120                 rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
2121                 if (rc)
2122                         goto out;
2123         }
2124 out:
2125         mutex_unlock(lock);
2126         return rc;
2127 }
2128
2129 /* map all used buffers of lgr for a new link */
2130 int smcr_buf_map_lgr(struct smc_link *lnk)
2131 {
2132         struct smc_link_group *lgr = lnk->lgr;
2133         int i, rc = 0;
2134
2135         for (i = 0; i < SMC_RMBE_SIZES; i++) {
2136                 rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
2137                                        &lgr->rmbs[i], true);
2138                 if (rc)
2139                         return rc;
2140                 rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
2141                                        &lgr->sndbufs[i], false);
2142                 if (rc)
2143                         return rc;
2144         }
2145         return 0;
2146 }
2147
2148 /* register all used buffers of lgr for a new link,
2149  * must be called under lgr->llc_conf_mutex lock
2150  */
2151 int smcr_buf_reg_lgr(struct smc_link *lnk)
2152 {
2153         struct smc_link_group *lgr = lnk->lgr;
2154         struct smc_buf_desc *buf_desc, *bf;
2155         int i, rc = 0;
2156
2157         /* reg all RMBs for a new link */
2158         mutex_lock(&lgr->rmbs_lock);
2159         for (i = 0; i < SMC_RMBE_SIZES; i++) {
2160                 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
2161                         if (!buf_desc->used)
2162                                 continue;
2163                         rc = smcr_link_reg_buf(lnk, buf_desc);
2164                         if (rc) {
2165                                 mutex_unlock(&lgr->rmbs_lock);
2166                                 return rc;
2167                         }
2168                 }
2169         }
2170         mutex_unlock(&lgr->rmbs_lock);
2171
2172         if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
2173                 return rc;
2174
2175         /* reg all vzalloced sndbufs for a new link */
2176         mutex_lock(&lgr->sndbufs_lock);
2177         for (i = 0; i < SMC_RMBE_SIZES; i++) {
2178                 list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], list) {
2179                         if (!buf_desc->used || !buf_desc->is_vm)
2180                                 continue;
2181                         rc = smcr_link_reg_buf(lnk, buf_desc);
2182                         if (rc) {
2183                                 mutex_unlock(&lgr->sndbufs_lock);
2184                                 return rc;
2185                         }
2186                 }
2187         }
2188         mutex_unlock(&lgr->sndbufs_lock);
2189         return rc;
2190 }
2191
2192 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
2193                                                 bool is_rmb, int bufsize)
2194 {
2195         struct smc_buf_desc *buf_desc;
2196
2197         /* try to alloc a new buffer */
2198         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
2199         if (!buf_desc)
2200                 return ERR_PTR(-ENOMEM);
2201
2202         switch (lgr->buf_type) {
2203         case SMCR_PHYS_CONT_BUFS:
2204         case SMCR_MIXED_BUFS:
2205                 buf_desc->order = get_order(bufsize);
2206                 buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
2207                                               __GFP_NOMEMALLOC | __GFP_COMP |
2208                                               __GFP_NORETRY | __GFP_ZERO,
2209                                               buf_desc->order);
2210                 if (buf_desc->pages) {
2211                         buf_desc->cpu_addr =
2212                                 (void *)page_address(buf_desc->pages);
2213                         buf_desc->len = bufsize;
2214                         buf_desc->is_vm = false;
2215                         break;
2216                 }
2217                 if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
2218                         goto out;
2219                 fallthrough;    // try virtually continguous buf
2220         case SMCR_VIRT_CONT_BUFS:
2221                 buf_desc->order = get_order(bufsize);
2222                 buf_desc->cpu_addr = vzalloc(PAGE_SIZE << buf_desc->order);
2223                 if (!buf_desc->cpu_addr)
2224                         goto out;
2225                 buf_desc->pages = NULL;
2226                 buf_desc->len = bufsize;
2227                 buf_desc->is_vm = true;
2228                 break;
2229         }
2230         return buf_desc;
2231
2232 out:
2233         kfree(buf_desc);
2234         return ERR_PTR(-EAGAIN);
2235 }
2236
2237 /* map buf_desc on all usable links,
2238  * unused buffers stay mapped as long as the link is up
2239  */
2240 static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
2241                                      struct smc_buf_desc *buf_desc, bool is_rmb)
2242 {
2243         int i, rc = 0, cnt = 0;
2244
2245         /* protect against parallel link reconfiguration */
2246         mutex_lock(&lgr->llc_conf_mutex);
2247         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
2248                 struct smc_link *lnk = &lgr->lnk[i];
2249
2250                 if (!smc_link_usable(lnk))
2251                         continue;
2252                 if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
2253                         rc = -ENOMEM;
2254                         goto out;
2255                 }
2256                 cnt++;
2257         }
2258 out:
2259         mutex_unlock(&lgr->llc_conf_mutex);
2260         if (!rc && !cnt)
2261                 rc = -EINVAL;
2262         return rc;
2263 }
2264
2265 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
2266                                                 bool is_dmb, int bufsize)
2267 {
2268         struct smc_buf_desc *buf_desc;
2269         int rc;
2270
2271         /* try to alloc a new DMB */
2272         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
2273         if (!buf_desc)
2274                 return ERR_PTR(-ENOMEM);
2275         if (is_dmb) {
2276                 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
2277                 if (rc) {
2278                         kfree(buf_desc);
2279                         if (rc == -ENOMEM)
2280                                 return ERR_PTR(-EAGAIN);
2281                         if (rc == -ENOSPC)
2282                                 return ERR_PTR(-ENOSPC);
2283                         return ERR_PTR(-EIO);
2284                 }
2285                 buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
2286                 /* CDC header stored in buf. So, pretend it was smaller */
2287                 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
2288         } else {
2289                 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
2290                                              __GFP_NOWARN | __GFP_NORETRY |
2291                                              __GFP_NOMEMALLOC);
2292                 if (!buf_desc->cpu_addr) {
2293                         kfree(buf_desc);
2294                         return ERR_PTR(-EAGAIN);
2295                 }
2296                 buf_desc->len = bufsize;
2297         }
2298         return buf_desc;
2299 }
2300
2301 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
2302 {
2303         struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
2304         struct smc_connection *conn = &smc->conn;
2305         struct smc_link_group *lgr = conn->lgr;
2306         struct list_head *buf_list;
2307         int bufsize, bufsize_short;
2308         bool is_dgraded = false;
2309         struct mutex *lock;     /* lock buffer list */
2310         int sk_buf_size;
2311
2312         if (is_rmb)
2313                 /* use socket recv buffer size (w/o overhead) as start value */
2314                 sk_buf_size = smc->sk.sk_rcvbuf;
2315         else
2316                 /* use socket send buffer size (w/o overhead) as start value */
2317                 sk_buf_size = smc->sk.sk_sndbuf;
2318
2319         for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
2320              bufsize_short >= 0; bufsize_short--) {
2321                 if (is_rmb) {
2322                         lock = &lgr->rmbs_lock;
2323                         buf_list = &lgr->rmbs[bufsize_short];
2324                 } else {
2325                         lock = &lgr->sndbufs_lock;
2326                         buf_list = &lgr->sndbufs[bufsize_short];
2327                 }
2328                 bufsize = smc_uncompress_bufsize(bufsize_short);
2329
2330                 /* check for reusable slot in the link group */
2331                 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
2332                 if (buf_desc) {
2333                         buf_desc->is_dma_need_sync = 0;
2334                         SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
2335                         SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb);
2336                         break; /* found reusable slot */
2337                 }
2338
2339                 if (is_smcd)
2340                         buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
2341                 else
2342                         buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
2343
2344                 if (PTR_ERR(buf_desc) == -ENOMEM)
2345                         break;
2346                 if (IS_ERR(buf_desc)) {
2347                         if (!is_dgraded) {
2348                                 is_dgraded = true;
2349                                 SMC_STAT_RMB_DOWNGRADED(smc, is_smcd, is_rmb);
2350                         }
2351                         continue;
2352                 }
2353
2354                 SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb);
2355                 SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
2356                 buf_desc->used = 1;
2357                 mutex_lock(lock);
2358                 list_add(&buf_desc->list, buf_list);
2359                 mutex_unlock(lock);
2360                 break; /* found */
2361         }
2362
2363         if (IS_ERR(buf_desc))
2364                 return PTR_ERR(buf_desc);
2365
2366         if (!is_smcd) {
2367                 if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
2368                         smcr_buf_unuse(buf_desc, is_rmb, lgr);
2369                         return -ENOMEM;
2370                 }
2371         }
2372
2373         if (is_rmb) {
2374                 conn->rmb_desc = buf_desc;
2375                 conn->rmbe_size_short = bufsize_short;
2376                 smc->sk.sk_rcvbuf = bufsize;
2377                 atomic_set(&conn->bytes_to_rcv, 0);
2378                 conn->rmbe_update_limit =
2379                         smc_rmb_wnd_update_limit(buf_desc->len);
2380                 if (is_smcd)
2381                         smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
2382         } else {
2383                 conn->sndbuf_desc = buf_desc;
2384                 smc->sk.sk_sndbuf = bufsize;
2385                 atomic_set(&conn->sndbuf_space, bufsize);
2386         }
2387         return 0;
2388 }
2389
2390 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
2391 {
2392         if (!conn->sndbuf_desc->is_dma_need_sync)
2393                 return;
2394         if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd ||
2395             !smc_link_active(conn->lnk))
2396                 return;
2397         smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
2398 }
2399
2400 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
2401 {
2402         int i;
2403
2404         if (!conn->rmb_desc->is_dma_need_sync)
2405                 return;
2406         if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd)
2407                 return;
2408         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
2409                 if (!smc_link_active(&conn->lgr->lnk[i]))
2410                         continue;
2411                 smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
2412                                        DMA_FROM_DEVICE);
2413         }
2414 }
2415
2416 /* create the send and receive buffer for an SMC socket;
2417  * receive buffers are called RMBs;
2418  * (even though the SMC protocol allows more than one RMB-element per RMB,
2419  * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
2420  * extra RMB for every connection in a link group
2421  */
2422 int smc_buf_create(struct smc_sock *smc, bool is_smcd)
2423 {
2424         int rc;
2425
2426         /* create send buffer */
2427         rc = __smc_buf_create(smc, is_smcd, false);
2428         if (rc)
2429                 return rc;
2430         /* create rmb */
2431         rc = __smc_buf_create(smc, is_smcd, true);
2432         if (rc) {
2433                 mutex_lock(&smc->conn.lgr->sndbufs_lock);
2434                 list_del(&smc->conn.sndbuf_desc->list);
2435                 mutex_unlock(&smc->conn.lgr->sndbufs_lock);
2436                 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
2437                 smc->conn.sndbuf_desc = NULL;
2438         }
2439         return rc;
2440 }
2441
2442 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
2443 {
2444         int i;
2445
2446         for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
2447                 if (!test_and_set_bit(i, lgr->rtokens_used_mask))
2448                         return i;
2449         }
2450         return -ENOSPC;
2451 }
2452
2453 static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
2454                                    u32 rkey)
2455 {
2456         int i;
2457
2458         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
2459                 if (test_bit(i, lgr->rtokens_used_mask) &&
2460                     lgr->rtokens[i][lnk_idx].rkey == rkey)
2461                         return i;
2462         }
2463         return -ENOENT;
2464 }
2465
2466 /* set rtoken for a new link to an existing rmb */
2467 void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
2468                     __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
2469 {
2470         int rtok_idx;
2471
2472         rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
2473         if (rtok_idx == -ENOENT)
2474                 return;
2475         lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
2476         lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
2477 }
2478
2479 /* set rtoken for a new link whose link_id is given */
2480 void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
2481                      __be64 nw_vaddr, __be32 nw_rkey)
2482 {
2483         u64 dma_addr = be64_to_cpu(nw_vaddr);
2484         u32 rkey = ntohl(nw_rkey);
2485         bool found = false;
2486         int link_idx;
2487
2488         for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
2489                 if (lgr->lnk[link_idx].link_id == link_id) {
2490                         found = true;
2491                         break;
2492                 }
2493         }
2494         if (!found)
2495                 return;
2496         lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
2497         lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
2498 }
2499
2500 /* add a new rtoken from peer */
2501 int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
2502 {
2503         struct smc_link_group *lgr = smc_get_lgr(lnk);
2504         u64 dma_addr = be64_to_cpu(nw_vaddr);
2505         u32 rkey = ntohl(nw_rkey);
2506         int i;
2507
2508         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
2509                 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
2510                     lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
2511                     test_bit(i, lgr->rtokens_used_mask)) {
2512                         /* already in list */
2513                         return i;
2514                 }
2515         }
2516         i = smc_rmb_reserve_rtoken_idx(lgr);
2517         if (i < 0)
2518                 return i;
2519         lgr->rtokens[i][lnk->link_idx].rkey = rkey;
2520         lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
2521         return i;
2522 }
2523
2524 /* delete an rtoken from all links */
2525 int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
2526 {
2527         struct smc_link_group *lgr = smc_get_lgr(lnk);
2528         u32 rkey = ntohl(nw_rkey);
2529         int i, j;
2530
2531         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
2532                 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
2533                     test_bit(i, lgr->rtokens_used_mask)) {
2534                         for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
2535                                 lgr->rtokens[i][j].rkey = 0;
2536                                 lgr->rtokens[i][j].dma_addr = 0;
2537                         }
2538                         clear_bit(i, lgr->rtokens_used_mask);
2539                         return 0;
2540                 }
2541         }
2542         return -ENOENT;
2543 }
2544
2545 /* save rkey and dma_addr received from peer during clc handshake */
2546 int smc_rmb_rtoken_handling(struct smc_connection *conn,
2547                             struct smc_link *lnk,
2548                             struct smc_clc_msg_accept_confirm *clc)
2549 {
2550         conn->rtoken_idx = smc_rtoken_add(lnk, clc->r0.rmb_dma_addr,
2551                                           clc->r0.rmb_rkey);
2552         if (conn->rtoken_idx < 0)
2553                 return conn->rtoken_idx;
2554         return 0;
2555 }
2556
2557 static void smc_core_going_away(void)
2558 {
2559         struct smc_ib_device *smcibdev;
2560         struct smcd_dev *smcd;
2561
2562         mutex_lock(&smc_ib_devices.mutex);
2563         list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
2564                 int i;
2565
2566                 for (i = 0; i < SMC_MAX_PORTS; i++)
2567                         set_bit(i, smcibdev->ports_going_away);
2568         }
2569         mutex_unlock(&smc_ib_devices.mutex);
2570
2571         mutex_lock(&smcd_dev_list.mutex);
2572         list_for_each_entry(smcd, &smcd_dev_list.list, list) {
2573                 smcd->going_away = 1;
2574         }
2575         mutex_unlock(&smcd_dev_list.mutex);
2576 }
2577
2578 /* Clean up all SMC link groups */
2579 static void smc_lgrs_shutdown(void)
2580 {
2581         struct smcd_dev *smcd;
2582
2583         smc_core_going_away();
2584
2585         smc_smcr_terminate_all(NULL);
2586
2587         mutex_lock(&smcd_dev_list.mutex);
2588         list_for_each_entry(smcd, &smcd_dev_list.list, list)
2589                 smc_smcd_terminate_all(smcd);
2590         mutex_unlock(&smcd_dev_list.mutex);
2591 }
2592
2593 static int smc_core_reboot_event(struct notifier_block *this,
2594                                  unsigned long event, void *ptr)
2595 {
2596         smc_lgrs_shutdown();
2597         smc_ib_unregister_client();
2598         return 0;
2599 }
2600
2601 static struct notifier_block smc_reboot_notifier = {
2602         .notifier_call = smc_core_reboot_event,
2603 };
2604
2605 int __init smc_core_init(void)
2606 {
2607         return register_reboot_notifier(&smc_reboot_notifier);
2608 }
2609
2610 /* Called (from smc_exit) when module is removed */
2611 void smc_core_exit(void)
2612 {
2613         unregister_reboot_notifier(&smc_reboot_notifier);
2614         smc_lgrs_shutdown();
2615 }