31bb2d1dbd77a36940bc9bc19c2afa14f01b1c04
[platform/kernel/linux-rpi.git] / net / smc / smc_core.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Basic Transport Functions exploiting Infiniband API
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
10  */
11
12 #include <linux/socket.h>
13 #include <linux/if_vlan.h>
14 #include <linux/random.h>
15 #include <linux/workqueue.h>
16 #include <net/tcp.h>
17 #include <net/sock.h>
18 #include <rdma/ib_verbs.h>
19
20 #include "smc.h"
21 #include "smc_clc.h"
22 #include "smc_core.h"
23 #include "smc_ib.h"
24 #include "smc_wr.h"
25 #include "smc_llc.h"
26 #include "smc_cdc.h"
27 #include "smc_close.h"
28
29 #define SMC_LGR_NUM_INCR                256
30 #define SMC_LGR_FREE_DELAY_SERV         (600 * HZ)
31 #define SMC_LGR_FREE_DELAY_CLNT         (SMC_LGR_FREE_DELAY_SERV + 10)
32
33 static u32 smc_lgr_num;                 /* unique link group number */
34
35 /* Register connection's alert token in our lookup structure.
36  * To use rbtrees we have to implement our own insert core.
37  * Requires @conns_lock
38  * @smc         connection to register
39  * Returns 0 on success, != otherwise.
40  */
41 static void smc_lgr_add_alert_token(struct smc_connection *conn)
42 {
43         struct rb_node **link, *parent = NULL;
44         u32 token = conn->alert_token_local;
45
46         link = &conn->lgr->conns_all.rb_node;
47         while (*link) {
48                 struct smc_connection *cur = rb_entry(*link,
49                                         struct smc_connection, alert_node);
50
51                 parent = *link;
52                 if (cur->alert_token_local > token)
53                         link = &parent->rb_left;
54                 else
55                         link = &parent->rb_right;
56         }
57         /* Put the new node there */
58         rb_link_node(&conn->alert_node, parent, link);
59         rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
60 }
61
62 /* Register connection in link group by assigning an alert token
63  * registered in a search tree.
64  * Requires @conns_lock
65  * Note that '0' is a reserved value and not assigned.
66  */
67 static void smc_lgr_register_conn(struct smc_connection *conn)
68 {
69         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
70         static atomic_t nexttoken = ATOMIC_INIT(0);
71
72         /* find a new alert_token_local value not yet used by some connection
73          * in this link group
74          */
75         sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
76         while (!conn->alert_token_local) {
77                 conn->alert_token_local = atomic_inc_return(&nexttoken);
78                 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
79                         conn->alert_token_local = 0;
80         }
81         smc_lgr_add_alert_token(conn);
82         conn->lgr->conns_num++;
83 }
84
85 /* Unregister connection and reset the alert token of the given connection<
86  */
87 static void __smc_lgr_unregister_conn(struct smc_connection *conn)
88 {
89         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
90         struct smc_link_group *lgr = conn->lgr;
91
92         rb_erase(&conn->alert_node, &lgr->conns_all);
93         lgr->conns_num--;
94         conn->alert_token_local = 0;
95         conn->lgr = NULL;
96         sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
97 }
98
99 /* Unregister connection and trigger lgr freeing if applicable
100  */
101 static void smc_lgr_unregister_conn(struct smc_connection *conn)
102 {
103         struct smc_link_group *lgr = conn->lgr;
104         int reduced = 0;
105
106         write_lock_bh(&lgr->conns_lock);
107         if (conn->alert_token_local) {
108                 reduced = 1;
109                 __smc_lgr_unregister_conn(conn);
110         }
111         write_unlock_bh(&lgr->conns_lock);
112         if (!reduced || lgr->conns_num)
113                 return;
114         /* client link group creation always follows the server link group
115          * creation. For client use a somewhat higher removal delay time,
116          * otherwise there is a risk of out-of-sync link groups.
117          */
118         mod_delayed_work(system_wq, &lgr->free_work,
119                          lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
120                                                  SMC_LGR_FREE_DELAY_SERV);
121 }
122
123 static void smc_lgr_free_work(struct work_struct *work)
124 {
125         struct smc_link_group *lgr = container_of(to_delayed_work(work),
126                                                   struct smc_link_group,
127                                                   free_work);
128         bool conns;
129
130         spin_lock_bh(&smc_lgr_list.lock);
131         if (list_empty(&lgr->list))
132                 goto free;
133         read_lock_bh(&lgr->conns_lock);
134         conns = RB_EMPTY_ROOT(&lgr->conns_all);
135         read_unlock_bh(&lgr->conns_lock);
136         if (!conns) { /* number of lgr connections is no longer zero */
137                 spin_unlock_bh(&smc_lgr_list.lock);
138                 return;
139         }
140         list_del_init(&lgr->list); /* remove from smc_lgr_list */
141 free:
142         spin_unlock_bh(&smc_lgr_list.lock);
143         smc_lgr_free(lgr);
144 }
145
146 /* create a new SMC link group */
147 static int smc_lgr_create(struct smc_sock *smc,
148                           struct smc_ib_device *smcibdev, u8 ibport,
149                           char *peer_systemid, unsigned short vlan_id)
150 {
151         struct smc_link_group *lgr;
152         struct smc_link *lnk;
153         u8 rndvec[3];
154         int rc = 0;
155         int i;
156
157         lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
158         if (!lgr) {
159                 rc = -ENOMEM;
160                 goto out;
161         }
162         lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
163         lgr->sync_err = false;
164         memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
165         lgr->vlan_id = vlan_id;
166         rwlock_init(&lgr->sndbufs_lock);
167         rwlock_init(&lgr->rmbs_lock);
168         for (i = 0; i < SMC_RMBE_SIZES; i++) {
169                 INIT_LIST_HEAD(&lgr->sndbufs[i]);
170                 INIT_LIST_HEAD(&lgr->rmbs[i]);
171         }
172         smc_lgr_num += SMC_LGR_NUM_INCR;
173         memcpy(&lgr->id, (u8 *)&smc_lgr_num, SMC_LGR_ID_SIZE);
174         INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
175         lgr->conns_all = RB_ROOT;
176
177         lnk = &lgr->lnk[SMC_SINGLE_LINK];
178         /* initialize link */
179         lnk->smcibdev = smcibdev;
180         lnk->ibport = ibport;
181         lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
182         if (!smcibdev->initialized)
183                 smc_ib_setup_per_ibdev(smcibdev);
184         get_random_bytes(rndvec, sizeof(rndvec));
185         lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16);
186         rc = smc_wr_alloc_link_mem(lnk);
187         if (rc)
188                 goto free_lgr;
189         rc = smc_ib_create_protection_domain(lnk);
190         if (rc)
191                 goto free_link_mem;
192         rc = smc_ib_create_queue_pair(lnk);
193         if (rc)
194                 goto dealloc_pd;
195         rc = smc_wr_create_link(lnk);
196         if (rc)
197                 goto destroy_qp;
198         init_completion(&lnk->llc_confirm);
199         init_completion(&lnk->llc_confirm_resp);
200
201         smc->conn.lgr = lgr;
202         rwlock_init(&lgr->conns_lock);
203         spin_lock_bh(&smc_lgr_list.lock);
204         list_add(&lgr->list, &smc_lgr_list.list);
205         spin_unlock_bh(&smc_lgr_list.lock);
206         return 0;
207
208 destroy_qp:
209         smc_ib_destroy_queue_pair(lnk);
210 dealloc_pd:
211         smc_ib_dealloc_protection_domain(lnk);
212 free_link_mem:
213         smc_wr_free_link_mem(lnk);
214 free_lgr:
215         kfree(lgr);
216 out:
217         return rc;
218 }
219
220 static void smc_buf_unuse(struct smc_connection *conn)
221 {
222         if (conn->sndbuf_desc) {
223                 conn->sndbuf_desc->used = 0;
224                 conn->sndbuf_size = 0;
225         }
226         if (conn->rmb_desc) {
227                 conn->rmb_desc->reused = true;
228                 conn->rmb_desc->used = 0;
229                 conn->rmbe_size = 0;
230         }
231 }
232
233 /* remove a finished connection from its link group */
234 void smc_conn_free(struct smc_connection *conn)
235 {
236         if (!conn->lgr)
237                 return;
238         smc_cdc_tx_dismiss_slots(conn);
239         smc_lgr_unregister_conn(conn);
240         smc_buf_unuse(conn);
241 }
242
243 static void smc_link_clear(struct smc_link *lnk)
244 {
245         lnk->peer_qpn = 0;
246         smc_ib_modify_qp_reset(lnk);
247         smc_wr_free_link(lnk);
248         smc_ib_destroy_queue_pair(lnk);
249         smc_ib_dealloc_protection_domain(lnk);
250         smc_wr_free_link_mem(lnk);
251 }
252
253 static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk,
254                          bool is_rmb)
255 {
256         if (is_rmb) {
257                 if (buf_desc->mr_rx[SMC_SINGLE_LINK])
258                         smc_ib_put_memory_region(
259                                         buf_desc->mr_rx[SMC_SINGLE_LINK]);
260                 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
261                                     DMA_FROM_DEVICE);
262         } else {
263                 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
264                                     DMA_TO_DEVICE);
265         }
266         sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
267         if (buf_desc->cpu_addr)
268                 free_pages((unsigned long)buf_desc->cpu_addr, buf_desc->order);
269         kfree(buf_desc);
270 }
271
272 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
273 {
274         struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
275         struct smc_buf_desc *buf_desc, *bf_desc;
276         struct list_head *buf_list;
277         int i;
278
279         for (i = 0; i < SMC_RMBE_SIZES; i++) {
280                 if (is_rmb)
281                         buf_list = &lgr->rmbs[i];
282                 else
283                         buf_list = &lgr->sndbufs[i];
284                 list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
285                                          list) {
286                         list_del(&buf_desc->list);
287                         smc_buf_free(buf_desc, lnk, is_rmb);
288                 }
289         }
290 }
291
292 static void smc_lgr_free_bufs(struct smc_link_group *lgr)
293 {
294         /* free send buffers */
295         __smc_lgr_free_bufs(lgr, false);
296         /* free rmbs */
297         __smc_lgr_free_bufs(lgr, true);
298 }
299
300 /* remove a link group */
301 void smc_lgr_free(struct smc_link_group *lgr)
302 {
303         smc_lgr_free_bufs(lgr);
304         smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
305         kfree(lgr);
306 }
307
308 /* terminate linkgroup abnormally */
309 void smc_lgr_terminate(struct smc_link_group *lgr)
310 {
311         struct smc_connection *conn;
312         struct smc_sock *smc;
313         struct rb_node *node;
314
315         spin_lock_bh(&smc_lgr_list.lock);
316         if (list_empty(&lgr->list)) {
317                 /* termination already triggered */
318                 spin_unlock_bh(&smc_lgr_list.lock);
319                 return;
320         }
321         /* do not use this link group for new connections */
322         list_del_init(&lgr->list);
323         spin_unlock_bh(&smc_lgr_list.lock);
324
325         write_lock_bh(&lgr->conns_lock);
326         node = rb_first(&lgr->conns_all);
327         while (node) {
328                 conn = rb_entry(node, struct smc_connection, alert_node);
329                 smc = container_of(conn, struct smc_sock, conn);
330                 sock_hold(&smc->sk); /* sock_put in close work */
331                 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
332                 __smc_lgr_unregister_conn(conn);
333                 write_unlock_bh(&lgr->conns_lock);
334                 if (!schedule_work(&conn->close_work))
335                         sock_put(&smc->sk);
336                 write_lock_bh(&lgr->conns_lock);
337                 node = rb_first(&lgr->conns_all);
338         }
339         write_unlock_bh(&lgr->conns_lock);
340         wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
341 }
342
343 /* Determine vlan of internal TCP socket.
344  * @vlan_id: address to store the determined vlan id into
345  */
346 static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
347 {
348         struct dst_entry *dst = sk_dst_get(clcsock->sk);
349         int rc = 0;
350
351         *vlan_id = 0;
352         if (!dst) {
353                 rc = -ENOTCONN;
354                 goto out;
355         }
356         if (!dst->dev) {
357                 rc = -ENODEV;
358                 goto out_rel;
359         }
360
361         if (is_vlan_dev(dst->dev))
362                 *vlan_id = vlan_dev_vlan_id(dst->dev);
363
364 out_rel:
365         dst_release(dst);
366 out:
367         return rc;
368 }
369
370 /* determine the link gid matching the vlan id of the link group */
371 static int smc_link_determine_gid(struct smc_link_group *lgr)
372 {
373         struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
374         struct ib_gid_attr gattr;
375         union ib_gid gid;
376         int i;
377
378         if (!lgr->vlan_id) {
379                 lnk->gid = lnk->smcibdev->gid[lnk->ibport - 1];
380                 return 0;
381         }
382
383         for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len;
384              i++) {
385                 if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid,
386                                  &gattr))
387                         continue;
388                 if (gattr.ndev) {
389                         if (is_vlan_dev(gattr.ndev) &&
390                             vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) {
391                                 lnk->gid = gid;
392                                 dev_put(gattr.ndev);
393                                 return 0;
394                         }
395                         dev_put(gattr.ndev);
396                 }
397         }
398         return -ENODEV;
399 }
400
401 /* create a new SMC connection (and a new link group if necessary) */
402 int smc_conn_create(struct smc_sock *smc,
403                     struct smc_ib_device *smcibdev, u8 ibport,
404                     struct smc_clc_msg_local *lcl, int srv_first_contact)
405 {
406         struct smc_connection *conn = &smc->conn;
407         struct smc_link_group *lgr;
408         unsigned short vlan_id;
409         enum smc_lgr_role role;
410         int local_contact = SMC_FIRST_CONTACT;
411         int rc = 0;
412
413         role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
414         rc = smc_vlan_by_tcpsk(smc->clcsock, &vlan_id);
415         if (rc)
416                 return rc;
417
418         if ((role == SMC_CLNT) && srv_first_contact)
419                 /* create new link group as well */
420                 goto create;
421
422         /* determine if an existing link group can be reused */
423         spin_lock_bh(&smc_lgr_list.lock);
424         list_for_each_entry(lgr, &smc_lgr_list.list, list) {
425                 write_lock_bh(&lgr->conns_lock);
426                 if (!memcmp(lgr->peer_systemid, lcl->id_for_peer,
427                             SMC_SYSTEMID_LEN) &&
428                     !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
429                             SMC_GID_SIZE) &&
430                     !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
431                             sizeof(lcl->mac)) &&
432                     !lgr->sync_err &&
433                     (lgr->role == role) &&
434                     (lgr->vlan_id == vlan_id) &&
435                     ((role == SMC_CLNT) ||
436                      (lgr->conns_num < SMC_RMBS_PER_LGR_MAX))) {
437                         /* link group found */
438                         local_contact = SMC_REUSE_CONTACT;
439                         conn->lgr = lgr;
440                         smc_lgr_register_conn(conn); /* add smc conn to lgr */
441                         write_unlock_bh(&lgr->conns_lock);
442                         break;
443                 }
444                 write_unlock_bh(&lgr->conns_lock);
445         }
446         spin_unlock_bh(&smc_lgr_list.lock);
447
448         if (role == SMC_CLNT && !srv_first_contact &&
449             (local_contact == SMC_FIRST_CONTACT)) {
450                 /* Server reuses a link group, but Client wants to start
451                  * a new one
452                  * send out_of_sync decline, reason synchr. error
453                  */
454                 return -ENOLINK;
455         }
456
457 create:
458         if (local_contact == SMC_FIRST_CONTACT) {
459                 rc = smc_lgr_create(smc, smcibdev, ibport,
460                                     lcl->id_for_peer, vlan_id);
461                 if (rc)
462                         goto out;
463                 smc_lgr_register_conn(conn); /* add smc conn to lgr */
464                 rc = smc_link_determine_gid(conn->lgr);
465         }
466         conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
467         conn->local_tx_ctrl.len = sizeof(struct smc_cdc_msg);
468 #ifndef KERNEL_HAS_ATOMIC64
469         spin_lock_init(&conn->acurs_lock);
470 #endif
471
472 out:
473         return rc ? rc : local_contact;
474 }
475
476 /* try to reuse a sndbuf or rmb description slot for a certain
477  * buffer size; if not available, return NULL
478  */
479 static inline
480 struct smc_buf_desc *smc_buf_get_slot(struct smc_link_group *lgr,
481                                       int compressed_bufsize,
482                                       rwlock_t *lock,
483                                       struct list_head *buf_list)
484 {
485         struct smc_buf_desc *buf_slot;
486
487         read_lock_bh(lock);
488         list_for_each_entry(buf_slot, buf_list, list) {
489                 if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
490                         read_unlock_bh(lock);
491                         return buf_slot;
492                 }
493         }
494         read_unlock_bh(lock);
495         return NULL;
496 }
497
498 /* one of the conditions for announcing a receiver's current window size is
499  * that it "results in a minimum increase in the window size of 10% of the
500  * receive buffer space" [RFC7609]
501  */
502 static inline int smc_rmb_wnd_update_limit(int rmbe_size)
503 {
504         return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
505 }
506
507 static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
508                                                bool is_rmb, int bufsize)
509 {
510         struct smc_buf_desc *buf_desc;
511         struct smc_link *lnk;
512         int rc;
513
514         /* try to alloc a new buffer */
515         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
516         if (!buf_desc)
517                 return ERR_PTR(-ENOMEM);
518
519         buf_desc->cpu_addr =
520                 (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN |
521                                          __GFP_NOMEMALLOC |
522                                          __GFP_NORETRY | __GFP_ZERO,
523                                          get_order(bufsize));
524         if (!buf_desc->cpu_addr) {
525                 kfree(buf_desc);
526                 return ERR_PTR(-EAGAIN);
527         }
528         buf_desc->order = get_order(bufsize);
529
530         /* build the sg table from the pages */
531         lnk = &lgr->lnk[SMC_SINGLE_LINK];
532         rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
533                             GFP_KERNEL);
534         if (rc) {
535                 smc_buf_free(buf_desc, lnk, is_rmb);
536                 return ERR_PTR(rc);
537         }
538         sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
539                    buf_desc->cpu_addr, bufsize);
540
541         /* map sg table to DMA address */
542         rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc,
543                                is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
544         /* SMC protocol depends on mapping to one DMA address only */
545         if (rc != 1)  {
546                 smc_buf_free(buf_desc, lnk, is_rmb);
547                 return ERR_PTR(-EAGAIN);
548         }
549
550         /* create a new memory region for the RMB */
551         if (is_rmb) {
552                 rc = smc_ib_get_memory_region(lnk->roce_pd,
553                                               IB_ACCESS_REMOTE_WRITE |
554                                               IB_ACCESS_LOCAL_WRITE,
555                                               buf_desc);
556                 if (rc) {
557                         smc_buf_free(buf_desc, lnk, is_rmb);
558                         return ERR_PTR(rc);
559                 }
560         }
561
562         return buf_desc;
563 }
564
565 static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
566 {
567         struct smc_connection *conn = &smc->conn;
568         struct smc_link_group *lgr = conn->lgr;
569         struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
570         struct list_head *buf_list;
571         int bufsize, bufsize_short;
572         int sk_buf_size;
573         rwlock_t *lock;
574
575         if (is_rmb)
576                 /* use socket recv buffer size (w/o overhead) as start value */
577                 sk_buf_size = smc->sk.sk_rcvbuf / 2;
578         else
579                 /* use socket send buffer size (w/o overhead) as start value */
580                 sk_buf_size = smc->sk.sk_sndbuf / 2;
581
582         for (bufsize_short = smc_compress_bufsize(sk_buf_size);
583              bufsize_short >= 0; bufsize_short--) {
584
585                 if (is_rmb) {
586                         lock = &lgr->rmbs_lock;
587                         buf_list = &lgr->rmbs[bufsize_short];
588                 } else {
589                         lock = &lgr->sndbufs_lock;
590                         buf_list = &lgr->sndbufs[bufsize_short];
591                 }
592                 bufsize = smc_uncompress_bufsize(bufsize_short);
593                 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
594                         continue;
595
596                 /* check for reusable slot in the link group */
597                 buf_desc = smc_buf_get_slot(lgr, bufsize_short, lock, buf_list);
598                 if (buf_desc) {
599                         memset(buf_desc->cpu_addr, 0, bufsize);
600                         break; /* found reusable slot */
601                 }
602
603                 buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize);
604                 if (PTR_ERR(buf_desc) == -ENOMEM)
605                         break;
606                 if (IS_ERR(buf_desc))
607                         continue;
608
609                 buf_desc->used = 1;
610                 write_lock_bh(lock);
611                 list_add(&buf_desc->list, buf_list);
612                 write_unlock_bh(lock);
613                 break; /* found */
614         }
615
616         if (IS_ERR(buf_desc))
617                 return -ENOMEM;
618
619         if (is_rmb) {
620                 conn->rmb_desc = buf_desc;
621                 conn->rmbe_size = bufsize;
622                 conn->rmbe_size_short = bufsize_short;
623                 smc->sk.sk_rcvbuf = bufsize * 2;
624                 atomic_set(&conn->bytes_to_rcv, 0);
625                 conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize);
626         } else {
627                 conn->sndbuf_desc = buf_desc;
628                 conn->sndbuf_size = bufsize;
629                 smc->sk.sk_sndbuf = bufsize * 2;
630                 atomic_set(&conn->sndbuf_space, bufsize);
631         }
632         return 0;
633 }
634
635 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
636 {
637         struct smc_link_group *lgr = conn->lgr;
638
639         smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
640                                conn->sndbuf_desc, DMA_TO_DEVICE);
641 }
642
643 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
644 {
645         struct smc_link_group *lgr = conn->lgr;
646
647         smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
648                                   conn->sndbuf_desc, DMA_TO_DEVICE);
649 }
650
651 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
652 {
653         struct smc_link_group *lgr = conn->lgr;
654
655         smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
656                                conn->rmb_desc, DMA_FROM_DEVICE);
657 }
658
659 void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
660 {
661         struct smc_link_group *lgr = conn->lgr;
662
663         smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
664                                   conn->rmb_desc, DMA_FROM_DEVICE);
665 }
666
667 /* create the send and receive buffer for an SMC socket;
668  * receive buffers are called RMBs;
669  * (even though the SMC protocol allows more than one RMB-element per RMB,
670  * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
671  * extra RMB for every connection in a link group
672  */
673 int smc_buf_create(struct smc_sock *smc)
674 {
675         int rc;
676
677         /* create send buffer */
678         rc = __smc_buf_create(smc, false);
679         if (rc)
680                 return rc;
681         /* create rmb */
682         rc = __smc_buf_create(smc, true);
683         if (rc)
684                 smc_buf_free(smc->conn.sndbuf_desc,
685                              &smc->conn.lgr->lnk[SMC_SINGLE_LINK], false);
686         return rc;
687 }
688
689 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
690 {
691         int i;
692
693         for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
694                 if (!test_and_set_bit(i, lgr->rtokens_used_mask))
695                         return i;
696         }
697         return -ENOSPC;
698 }
699
700 /* add a new rtoken from peer */
701 int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
702 {
703         u64 dma_addr = be64_to_cpu(nw_vaddr);
704         u32 rkey = ntohl(nw_rkey);
705         int i;
706
707         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
708                 if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
709                     (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
710                     test_bit(i, lgr->rtokens_used_mask)) {
711                         /* already in list */
712                         return i;
713                 }
714         }
715         i = smc_rmb_reserve_rtoken_idx(lgr);
716         if (i < 0)
717                 return i;
718         lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
719         lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
720         return i;
721 }
722
723 /* delete an rtoken */
724 int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
725 {
726         u32 rkey = ntohl(nw_rkey);
727         int i;
728
729         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
730                 if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
731                     test_bit(i, lgr->rtokens_used_mask)) {
732                         lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
733                         lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
734
735                         clear_bit(i, lgr->rtokens_used_mask);
736                         return 0;
737                 }
738         }
739         return -ENOENT;
740 }
741
742 /* save rkey and dma_addr received from peer during clc handshake */
743 int smc_rmb_rtoken_handling(struct smc_connection *conn,
744                             struct smc_clc_msg_accept_confirm *clc)
745 {
746         conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
747                                           clc->rmb_rkey);
748         if (conn->rtoken_idx < 0)
749                 return conn->rtoken_idx;
750         return 0;
751 }