net/smc: process add/delete link messages
[platform/kernel/linux-rpi.git] / net / smc / smc_core.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Basic Transport Functions exploiting Infiniband API
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
10  */
11
12 #include <linux/socket.h>
13 #include <linux/if_vlan.h>
14 #include <linux/random.h>
15 #include <linux/workqueue.h>
16 #include <net/tcp.h>
17 #include <net/sock.h>
18 #include <rdma/ib_verbs.h>
19
20 #include "smc.h"
21 #include "smc_clc.h"
22 #include "smc_core.h"
23 #include "smc_ib.h"
24 #include "smc_wr.h"
25 #include "smc_llc.h"
26 #include "smc_cdc.h"
27 #include "smc_close.h"
28
29 #define SMC_LGR_NUM_INCR                256
30 #define SMC_LGR_FREE_DELAY_SERV         (600 * HZ)
31 #define SMC_LGR_FREE_DELAY_CLNT         (SMC_LGR_FREE_DELAY_SERV + 10)
32
33 static u32 smc_lgr_num;                 /* unique link group number */
34
35 /* Register connection's alert token in our lookup structure.
36  * To use rbtrees we have to implement our own insert core.
37  * Requires @conns_lock
38  * @smc         connection to register
39  * Returns 0 on success, != otherwise.
40  */
41 static void smc_lgr_add_alert_token(struct smc_connection *conn)
42 {
43         struct rb_node **link, *parent = NULL;
44         u32 token = conn->alert_token_local;
45
46         link = &conn->lgr->conns_all.rb_node;
47         while (*link) {
48                 struct smc_connection *cur = rb_entry(*link,
49                                         struct smc_connection, alert_node);
50
51                 parent = *link;
52                 if (cur->alert_token_local > token)
53                         link = &parent->rb_left;
54                 else
55                         link = &parent->rb_right;
56         }
57         /* Put the new node there */
58         rb_link_node(&conn->alert_node, parent, link);
59         rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
60 }
61
62 /* Register connection in link group by assigning an alert token
63  * registered in a search tree.
64  * Requires @conns_lock
65  * Note that '0' is a reserved value and not assigned.
66  */
67 static void smc_lgr_register_conn(struct smc_connection *conn)
68 {
69         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
70         static atomic_t nexttoken = ATOMIC_INIT(0);
71
72         /* find a new alert_token_local value not yet used by some connection
73          * in this link group
74          */
75         sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
76         while (!conn->alert_token_local) {
77                 conn->alert_token_local = atomic_inc_return(&nexttoken);
78                 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
79                         conn->alert_token_local = 0;
80         }
81         smc_lgr_add_alert_token(conn);
82         conn->lgr->conns_num++;
83 }
84
85 /* Unregister connection and reset the alert token of the given connection<
86  */
87 static void __smc_lgr_unregister_conn(struct smc_connection *conn)
88 {
89         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
90         struct smc_link_group *lgr = conn->lgr;
91
92         rb_erase(&conn->alert_node, &lgr->conns_all);
93         lgr->conns_num--;
94         conn->alert_token_local = 0;
95         conn->lgr = NULL;
96         sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
97 }
98
99 /* Unregister connection and trigger lgr freeing if applicable
100  */
101 static void smc_lgr_unregister_conn(struct smc_connection *conn)
102 {
103         struct smc_link_group *lgr = conn->lgr;
104         int reduced = 0;
105
106         write_lock_bh(&lgr->conns_lock);
107         if (conn->alert_token_local) {
108                 reduced = 1;
109                 __smc_lgr_unregister_conn(conn);
110         }
111         write_unlock_bh(&lgr->conns_lock);
112         if (!reduced || lgr->conns_num)
113                 return;
114         /* client link group creation always follows the server link group
115          * creation. For client use a somewhat higher removal delay time,
116          * otherwise there is a risk of out-of-sync link groups.
117          */
118         mod_delayed_work(system_wq, &lgr->free_work,
119                          lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
120                                                  SMC_LGR_FREE_DELAY_SERV);
121 }
122
123 static void smc_lgr_free_work(struct work_struct *work)
124 {
125         struct smc_link_group *lgr = container_of(to_delayed_work(work),
126                                                   struct smc_link_group,
127                                                   free_work);
128         bool conns;
129
130         spin_lock_bh(&smc_lgr_list.lock);
131         if (list_empty(&lgr->list))
132                 goto free;
133         read_lock_bh(&lgr->conns_lock);
134         conns = RB_EMPTY_ROOT(&lgr->conns_all);
135         read_unlock_bh(&lgr->conns_lock);
136         if (!conns) { /* number of lgr connections is no longer zero */
137                 spin_unlock_bh(&smc_lgr_list.lock);
138                 return;
139         }
140         list_del_init(&lgr->list); /* remove from smc_lgr_list */
141 free:
142         spin_unlock_bh(&smc_lgr_list.lock);
143         smc_lgr_free(lgr);
144 }
145
146 /* create a new SMC link group */
147 static int smc_lgr_create(struct smc_sock *smc,
148                           struct smc_ib_device *smcibdev, u8 ibport,
149                           char *peer_systemid, unsigned short vlan_id)
150 {
151         struct smc_link_group *lgr;
152         struct smc_link *lnk;
153         u8 rndvec[3];
154         int rc = 0;
155         int i;
156
157         lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
158         if (!lgr) {
159                 rc = -ENOMEM;
160                 goto out;
161         }
162         lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
163         lgr->sync_err = false;
164         memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
165         lgr->vlan_id = vlan_id;
166         rwlock_init(&lgr->sndbufs_lock);
167         rwlock_init(&lgr->rmbs_lock);
168         for (i = 0; i < SMC_RMBE_SIZES; i++) {
169                 INIT_LIST_HEAD(&lgr->sndbufs[i]);
170                 INIT_LIST_HEAD(&lgr->rmbs[i]);
171         }
172         smc_lgr_num += SMC_LGR_NUM_INCR;
173         memcpy(&lgr->id, (u8 *)&smc_lgr_num, SMC_LGR_ID_SIZE);
174         INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
175         lgr->conns_all = RB_ROOT;
176
177         lnk = &lgr->lnk[SMC_SINGLE_LINK];
178         /* initialize link */
179         lnk->state = SMC_LNK_ACTIVATING;
180         lnk->smcibdev = smcibdev;
181         lnk->ibport = ibport;
182         lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
183         if (!smcibdev->initialized)
184                 smc_ib_setup_per_ibdev(smcibdev);
185         get_random_bytes(rndvec, sizeof(rndvec));
186         lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16);
187         rc = smc_wr_alloc_link_mem(lnk);
188         if (rc)
189                 goto free_lgr;
190         rc = smc_ib_create_protection_domain(lnk);
191         if (rc)
192                 goto free_link_mem;
193         rc = smc_ib_create_queue_pair(lnk);
194         if (rc)
195                 goto dealloc_pd;
196         rc = smc_wr_create_link(lnk);
197         if (rc)
198                 goto destroy_qp;
199         init_completion(&lnk->llc_confirm);
200         init_completion(&lnk->llc_confirm_resp);
201         init_completion(&lnk->llc_add);
202         init_completion(&lnk->llc_add_resp);
203
204         smc->conn.lgr = lgr;
205         rwlock_init(&lgr->conns_lock);
206         spin_lock_bh(&smc_lgr_list.lock);
207         list_add(&lgr->list, &smc_lgr_list.list);
208         spin_unlock_bh(&smc_lgr_list.lock);
209         return 0;
210
211 destroy_qp:
212         smc_ib_destroy_queue_pair(lnk);
213 dealloc_pd:
214         smc_ib_dealloc_protection_domain(lnk);
215 free_link_mem:
216         smc_wr_free_link_mem(lnk);
217 free_lgr:
218         kfree(lgr);
219 out:
220         return rc;
221 }
222
223 static void smc_buf_unuse(struct smc_connection *conn)
224 {
225         if (conn->sndbuf_desc) {
226                 conn->sndbuf_desc->used = 0;
227                 conn->sndbuf_size = 0;
228         }
229         if (conn->rmb_desc) {
230                 conn->rmb_desc->reused = true;
231                 conn->rmb_desc->used = 0;
232                 conn->rmbe_size = 0;
233         }
234 }
235
236 /* remove a finished connection from its link group */
237 void smc_conn_free(struct smc_connection *conn)
238 {
239         if (!conn->lgr)
240                 return;
241         smc_cdc_tx_dismiss_slots(conn);
242         smc_lgr_unregister_conn(conn);
243         smc_buf_unuse(conn);
244 }
245
246 static void smc_link_clear(struct smc_link *lnk)
247 {
248         lnk->peer_qpn = 0;
249         smc_ib_modify_qp_reset(lnk);
250         smc_wr_free_link(lnk);
251         smc_ib_destroy_queue_pair(lnk);
252         smc_ib_dealloc_protection_domain(lnk);
253         smc_wr_free_link_mem(lnk);
254 }
255
256 static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk,
257                          bool is_rmb)
258 {
259         if (is_rmb) {
260                 if (buf_desc->mr_rx[SMC_SINGLE_LINK])
261                         smc_ib_put_memory_region(
262                                         buf_desc->mr_rx[SMC_SINGLE_LINK]);
263                 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
264                                     DMA_FROM_DEVICE);
265         } else {
266                 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
267                                     DMA_TO_DEVICE);
268         }
269         sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
270         if (buf_desc->cpu_addr)
271                 free_pages((unsigned long)buf_desc->cpu_addr, buf_desc->order);
272         kfree(buf_desc);
273 }
274
275 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
276 {
277         struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
278         struct smc_buf_desc *buf_desc, *bf_desc;
279         struct list_head *buf_list;
280         int i;
281
282         for (i = 0; i < SMC_RMBE_SIZES; i++) {
283                 if (is_rmb)
284                         buf_list = &lgr->rmbs[i];
285                 else
286                         buf_list = &lgr->sndbufs[i];
287                 list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
288                                          list) {
289                         list_del(&buf_desc->list);
290                         smc_buf_free(buf_desc, lnk, is_rmb);
291                 }
292         }
293 }
294
295 static void smc_lgr_free_bufs(struct smc_link_group *lgr)
296 {
297         /* free send buffers */
298         __smc_lgr_free_bufs(lgr, false);
299         /* free rmbs */
300         __smc_lgr_free_bufs(lgr, true);
301 }
302
303 /* remove a link group */
304 void smc_lgr_free(struct smc_link_group *lgr)
305 {
306         smc_lgr_free_bufs(lgr);
307         smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
308         kfree(lgr);
309 }
310
311 /* terminate linkgroup abnormally */
312 void smc_lgr_terminate(struct smc_link_group *lgr)
313 {
314         struct smc_connection *conn;
315         struct smc_sock *smc;
316         struct rb_node *node;
317
318         spin_lock_bh(&smc_lgr_list.lock);
319         if (list_empty(&lgr->list)) {
320                 /* termination already triggered */
321                 spin_unlock_bh(&smc_lgr_list.lock);
322                 return;
323         }
324         /* do not use this link group for new connections */
325         list_del_init(&lgr->list);
326         spin_unlock_bh(&smc_lgr_list.lock);
327
328         write_lock_bh(&lgr->conns_lock);
329         node = rb_first(&lgr->conns_all);
330         while (node) {
331                 conn = rb_entry(node, struct smc_connection, alert_node);
332                 smc = container_of(conn, struct smc_sock, conn);
333                 sock_hold(&smc->sk); /* sock_put in close work */
334                 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
335                 __smc_lgr_unregister_conn(conn);
336                 write_unlock_bh(&lgr->conns_lock);
337                 if (!schedule_work(&conn->close_work))
338                         sock_put(&smc->sk);
339                 write_lock_bh(&lgr->conns_lock);
340                 node = rb_first(&lgr->conns_all);
341         }
342         write_unlock_bh(&lgr->conns_lock);
343         wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
344 }
345
346 /* Determine vlan of internal TCP socket.
347  * @vlan_id: address to store the determined vlan id into
348  */
349 static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
350 {
351         struct dst_entry *dst = sk_dst_get(clcsock->sk);
352         int rc = 0;
353
354         *vlan_id = 0;
355         if (!dst) {
356                 rc = -ENOTCONN;
357                 goto out;
358         }
359         if (!dst->dev) {
360                 rc = -ENODEV;
361                 goto out_rel;
362         }
363
364         if (is_vlan_dev(dst->dev))
365                 *vlan_id = vlan_dev_vlan_id(dst->dev);
366
367 out_rel:
368         dst_release(dst);
369 out:
370         return rc;
371 }
372
373 /* determine the link gid matching the vlan id of the link group */
374 static int smc_link_determine_gid(struct smc_link_group *lgr)
375 {
376         struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
377         struct ib_gid_attr gattr;
378         union ib_gid gid;
379         int i;
380
381         if (!lgr->vlan_id) {
382                 lnk->gid = lnk->smcibdev->gid[lnk->ibport - 1];
383                 return 0;
384         }
385
386         for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len;
387              i++) {
388                 if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid,
389                                  &gattr))
390                         continue;
391                 if (gattr.ndev) {
392                         if (is_vlan_dev(gattr.ndev) &&
393                             vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) {
394                                 lnk->gid = gid;
395                                 dev_put(gattr.ndev);
396                                 return 0;
397                         }
398                         dev_put(gattr.ndev);
399                 }
400         }
401         return -ENODEV;
402 }
403
404 /* create a new SMC connection (and a new link group if necessary) */
405 int smc_conn_create(struct smc_sock *smc,
406                     struct smc_ib_device *smcibdev, u8 ibport,
407                     struct smc_clc_msg_local *lcl, int srv_first_contact)
408 {
409         struct smc_connection *conn = &smc->conn;
410         struct smc_link_group *lgr;
411         unsigned short vlan_id;
412         enum smc_lgr_role role;
413         int local_contact = SMC_FIRST_CONTACT;
414         int rc = 0;
415
416         role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
417         rc = smc_vlan_by_tcpsk(smc->clcsock, &vlan_id);
418         if (rc)
419                 return rc;
420
421         if ((role == SMC_CLNT) && srv_first_contact)
422                 /* create new link group as well */
423                 goto create;
424
425         /* determine if an existing link group can be reused */
426         spin_lock_bh(&smc_lgr_list.lock);
427         list_for_each_entry(lgr, &smc_lgr_list.list, list) {
428                 write_lock_bh(&lgr->conns_lock);
429                 if (!memcmp(lgr->peer_systemid, lcl->id_for_peer,
430                             SMC_SYSTEMID_LEN) &&
431                     !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
432                             SMC_GID_SIZE) &&
433                     !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
434                             sizeof(lcl->mac)) &&
435                     !lgr->sync_err &&
436                     (lgr->role == role) &&
437                     (lgr->vlan_id == vlan_id) &&
438                     ((role == SMC_CLNT) ||
439                      (lgr->conns_num < SMC_RMBS_PER_LGR_MAX))) {
440                         /* link group found */
441                         local_contact = SMC_REUSE_CONTACT;
442                         conn->lgr = lgr;
443                         smc_lgr_register_conn(conn); /* add smc conn to lgr */
444                         write_unlock_bh(&lgr->conns_lock);
445                         break;
446                 }
447                 write_unlock_bh(&lgr->conns_lock);
448         }
449         spin_unlock_bh(&smc_lgr_list.lock);
450
451         if (role == SMC_CLNT && !srv_first_contact &&
452             (local_contact == SMC_FIRST_CONTACT)) {
453                 /* Server reuses a link group, but Client wants to start
454                  * a new one
455                  * send out_of_sync decline, reason synchr. error
456                  */
457                 return -ENOLINK;
458         }
459
460 create:
461         if (local_contact == SMC_FIRST_CONTACT) {
462                 rc = smc_lgr_create(smc, smcibdev, ibport,
463                                     lcl->id_for_peer, vlan_id);
464                 if (rc)
465                         goto out;
466                 smc_lgr_register_conn(conn); /* add smc conn to lgr */
467                 rc = smc_link_determine_gid(conn->lgr);
468         }
469         conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
470         conn->local_tx_ctrl.len = sizeof(struct smc_cdc_msg);
471 #ifndef KERNEL_HAS_ATOMIC64
472         spin_lock_init(&conn->acurs_lock);
473 #endif
474
475 out:
476         return rc ? rc : local_contact;
477 }
478
479 /* try to reuse a sndbuf or rmb description slot for a certain
480  * buffer size; if not available, return NULL
481  */
482 static inline
483 struct smc_buf_desc *smc_buf_get_slot(struct smc_link_group *lgr,
484                                       int compressed_bufsize,
485                                       rwlock_t *lock,
486                                       struct list_head *buf_list)
487 {
488         struct smc_buf_desc *buf_slot;
489
490         read_lock_bh(lock);
491         list_for_each_entry(buf_slot, buf_list, list) {
492                 if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
493                         read_unlock_bh(lock);
494                         return buf_slot;
495                 }
496         }
497         read_unlock_bh(lock);
498         return NULL;
499 }
500
501 /* one of the conditions for announcing a receiver's current window size is
502  * that it "results in a minimum increase in the window size of 10% of the
503  * receive buffer space" [RFC7609]
504  */
505 static inline int smc_rmb_wnd_update_limit(int rmbe_size)
506 {
507         return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
508 }
509
510 static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
511                                                bool is_rmb, int bufsize)
512 {
513         struct smc_buf_desc *buf_desc;
514         struct smc_link *lnk;
515         int rc;
516
517         /* try to alloc a new buffer */
518         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
519         if (!buf_desc)
520                 return ERR_PTR(-ENOMEM);
521
522         buf_desc->cpu_addr =
523                 (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN |
524                                          __GFP_NOMEMALLOC |
525                                          __GFP_NORETRY | __GFP_ZERO,
526                                          get_order(bufsize));
527         if (!buf_desc->cpu_addr) {
528                 kfree(buf_desc);
529                 return ERR_PTR(-EAGAIN);
530         }
531         buf_desc->order = get_order(bufsize);
532
533         /* build the sg table from the pages */
534         lnk = &lgr->lnk[SMC_SINGLE_LINK];
535         rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
536                             GFP_KERNEL);
537         if (rc) {
538                 smc_buf_free(buf_desc, lnk, is_rmb);
539                 return ERR_PTR(rc);
540         }
541         sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
542                    buf_desc->cpu_addr, bufsize);
543
544         /* map sg table to DMA address */
545         rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc,
546                                is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
547         /* SMC protocol depends on mapping to one DMA address only */
548         if (rc != 1)  {
549                 smc_buf_free(buf_desc, lnk, is_rmb);
550                 return ERR_PTR(-EAGAIN);
551         }
552
553         /* create a new memory region for the RMB */
554         if (is_rmb) {
555                 rc = smc_ib_get_memory_region(lnk->roce_pd,
556                                               IB_ACCESS_REMOTE_WRITE |
557                                               IB_ACCESS_LOCAL_WRITE,
558                                               buf_desc);
559                 if (rc) {
560                         smc_buf_free(buf_desc, lnk, is_rmb);
561                         return ERR_PTR(rc);
562                 }
563         }
564
565         return buf_desc;
566 }
567
568 static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
569 {
570         struct smc_connection *conn = &smc->conn;
571         struct smc_link_group *lgr = conn->lgr;
572         struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
573         struct list_head *buf_list;
574         int bufsize, bufsize_short;
575         int sk_buf_size;
576         rwlock_t *lock;
577
578         if (is_rmb)
579                 /* use socket recv buffer size (w/o overhead) as start value */
580                 sk_buf_size = smc->sk.sk_rcvbuf / 2;
581         else
582                 /* use socket send buffer size (w/o overhead) as start value */
583                 sk_buf_size = smc->sk.sk_sndbuf / 2;
584
585         for (bufsize_short = smc_compress_bufsize(sk_buf_size);
586              bufsize_short >= 0; bufsize_short--) {
587
588                 if (is_rmb) {
589                         lock = &lgr->rmbs_lock;
590                         buf_list = &lgr->rmbs[bufsize_short];
591                 } else {
592                         lock = &lgr->sndbufs_lock;
593                         buf_list = &lgr->sndbufs[bufsize_short];
594                 }
595                 bufsize = smc_uncompress_bufsize(bufsize_short);
596                 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
597                         continue;
598
599                 /* check for reusable slot in the link group */
600                 buf_desc = smc_buf_get_slot(lgr, bufsize_short, lock, buf_list);
601                 if (buf_desc) {
602                         memset(buf_desc->cpu_addr, 0, bufsize);
603                         break; /* found reusable slot */
604                 }
605
606                 buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize);
607                 if (PTR_ERR(buf_desc) == -ENOMEM)
608                         break;
609                 if (IS_ERR(buf_desc))
610                         continue;
611
612                 buf_desc->used = 1;
613                 write_lock_bh(lock);
614                 list_add(&buf_desc->list, buf_list);
615                 write_unlock_bh(lock);
616                 break; /* found */
617         }
618
619         if (IS_ERR(buf_desc))
620                 return -ENOMEM;
621
622         if (is_rmb) {
623                 conn->rmb_desc = buf_desc;
624                 conn->rmbe_size = bufsize;
625                 conn->rmbe_size_short = bufsize_short;
626                 smc->sk.sk_rcvbuf = bufsize * 2;
627                 atomic_set(&conn->bytes_to_rcv, 0);
628                 conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize);
629         } else {
630                 conn->sndbuf_desc = buf_desc;
631                 conn->sndbuf_size = bufsize;
632                 smc->sk.sk_sndbuf = bufsize * 2;
633                 atomic_set(&conn->sndbuf_space, bufsize);
634         }
635         return 0;
636 }
637
638 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
639 {
640         struct smc_link_group *lgr = conn->lgr;
641
642         smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
643                                conn->sndbuf_desc, DMA_TO_DEVICE);
644 }
645
646 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
647 {
648         struct smc_link_group *lgr = conn->lgr;
649
650         smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
651                                   conn->sndbuf_desc, DMA_TO_DEVICE);
652 }
653
654 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
655 {
656         struct smc_link_group *lgr = conn->lgr;
657
658         smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
659                                conn->rmb_desc, DMA_FROM_DEVICE);
660 }
661
662 void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
663 {
664         struct smc_link_group *lgr = conn->lgr;
665
666         smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
667                                   conn->rmb_desc, DMA_FROM_DEVICE);
668 }
669
670 /* create the send and receive buffer for an SMC socket;
671  * receive buffers are called RMBs;
672  * (even though the SMC protocol allows more than one RMB-element per RMB,
673  * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
674  * extra RMB for every connection in a link group
675  */
676 int smc_buf_create(struct smc_sock *smc)
677 {
678         int rc;
679
680         /* create send buffer */
681         rc = __smc_buf_create(smc, false);
682         if (rc)
683                 return rc;
684         /* create rmb */
685         rc = __smc_buf_create(smc, true);
686         if (rc)
687                 smc_buf_free(smc->conn.sndbuf_desc,
688                              &smc->conn.lgr->lnk[SMC_SINGLE_LINK], false);
689         return rc;
690 }
691
692 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
693 {
694         int i;
695
696         for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
697                 if (!test_and_set_bit(i, lgr->rtokens_used_mask))
698                         return i;
699         }
700         return -ENOSPC;
701 }
702
703 /* add a new rtoken from peer */
704 int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
705 {
706         u64 dma_addr = be64_to_cpu(nw_vaddr);
707         u32 rkey = ntohl(nw_rkey);
708         int i;
709
710         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
711                 if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
712                     (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
713                     test_bit(i, lgr->rtokens_used_mask)) {
714                         /* already in list */
715                         return i;
716                 }
717         }
718         i = smc_rmb_reserve_rtoken_idx(lgr);
719         if (i < 0)
720                 return i;
721         lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
722         lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
723         return i;
724 }
725
726 /* delete an rtoken */
727 int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
728 {
729         u32 rkey = ntohl(nw_rkey);
730         int i;
731
732         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
733                 if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
734                     test_bit(i, lgr->rtokens_used_mask)) {
735                         lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
736                         lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
737
738                         clear_bit(i, lgr->rtokens_used_mask);
739                         return 0;
740                 }
741         }
742         return -ENOENT;
743 }
744
745 /* save rkey and dma_addr received from peer during clc handshake */
746 int smc_rmb_rtoken_handling(struct smc_connection *conn,
747                             struct smc_clc_msg_accept_confirm *clc)
748 {
749         conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
750                                           clc->rmb_rkey);
751         if (conn->rtoken_idx < 0)
752                 return conn->rtoken_idx;
753         return 0;
754 }