net/smc: handle unregistered buffers
[platform/kernel/linux-rpi.git] / net / smc / smc_core.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Basic Transport Functions exploiting Infiniband API
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
10  */
11
12 #include <linux/socket.h>
13 #include <linux/if_vlan.h>
14 #include <linux/random.h>
15 #include <linux/workqueue.h>
16 #include <net/tcp.h>
17 #include <net/sock.h>
18 #include <rdma/ib_verbs.h>
19
20 #include "smc.h"
21 #include "smc_clc.h"
22 #include "smc_core.h"
23 #include "smc_ib.h"
24 #include "smc_wr.h"
25 #include "smc_llc.h"
26 #include "smc_cdc.h"
27 #include "smc_close.h"
28
29 #define SMC_LGR_NUM_INCR                256
30 #define SMC_LGR_FREE_DELAY_SERV         (600 * HZ)
31 #define SMC_LGR_FREE_DELAY_CLNT         (SMC_LGR_FREE_DELAY_SERV + 10)
32
33 static u32 smc_lgr_num;                 /* unique link group number */
34
35 static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk,
36                          bool is_rmb);
37
38 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
39 {
40         /* client link group creation always follows the server link group
41          * creation. For client use a somewhat higher removal delay time,
42          * otherwise there is a risk of out-of-sync link groups.
43          */
44         mod_delayed_work(system_wq, &lgr->free_work,
45                          lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
46                                                  SMC_LGR_FREE_DELAY_SERV);
47 }
48
49 /* Register connection's alert token in our lookup structure.
50  * To use rbtrees we have to implement our own insert core.
51  * Requires @conns_lock
52  * @smc         connection to register
53  * Returns 0 on success, != otherwise.
54  */
55 static void smc_lgr_add_alert_token(struct smc_connection *conn)
56 {
57         struct rb_node **link, *parent = NULL;
58         u32 token = conn->alert_token_local;
59
60         link = &conn->lgr->conns_all.rb_node;
61         while (*link) {
62                 struct smc_connection *cur = rb_entry(*link,
63                                         struct smc_connection, alert_node);
64
65                 parent = *link;
66                 if (cur->alert_token_local > token)
67                         link = &parent->rb_left;
68                 else
69                         link = &parent->rb_right;
70         }
71         /* Put the new node there */
72         rb_link_node(&conn->alert_node, parent, link);
73         rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
74 }
75
76 /* Register connection in link group by assigning an alert token
77  * registered in a search tree.
78  * Requires @conns_lock
79  * Note that '0' is a reserved value and not assigned.
80  */
81 static void smc_lgr_register_conn(struct smc_connection *conn)
82 {
83         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
84         static atomic_t nexttoken = ATOMIC_INIT(0);
85
86         /* find a new alert_token_local value not yet used by some connection
87          * in this link group
88          */
89         sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
90         while (!conn->alert_token_local) {
91                 conn->alert_token_local = atomic_inc_return(&nexttoken);
92                 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
93                         conn->alert_token_local = 0;
94         }
95         smc_lgr_add_alert_token(conn);
96         conn->lgr->conns_num++;
97 }
98
99 /* Unregister connection and reset the alert token of the given connection<
100  */
101 static void __smc_lgr_unregister_conn(struct smc_connection *conn)
102 {
103         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
104         struct smc_link_group *lgr = conn->lgr;
105
106         rb_erase(&conn->alert_node, &lgr->conns_all);
107         lgr->conns_num--;
108         conn->alert_token_local = 0;
109         conn->lgr = NULL;
110         sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
111 }
112
113 /* Unregister connection and trigger lgr freeing if applicable
114  */
115 static void smc_lgr_unregister_conn(struct smc_connection *conn)
116 {
117         struct smc_link_group *lgr = conn->lgr;
118         int reduced = 0;
119
120         write_lock_bh(&lgr->conns_lock);
121         if (conn->alert_token_local) {
122                 reduced = 1;
123                 __smc_lgr_unregister_conn(conn);
124         }
125         write_unlock_bh(&lgr->conns_lock);
126         if (!reduced || lgr->conns_num)
127                 return;
128         smc_lgr_schedule_free_work(lgr);
129 }
130
131 static void smc_lgr_free_work(struct work_struct *work)
132 {
133         struct smc_link_group *lgr = container_of(to_delayed_work(work),
134                                                   struct smc_link_group,
135                                                   free_work);
136         bool conns;
137
138         spin_lock_bh(&smc_lgr_list.lock);
139         if (list_empty(&lgr->list))
140                 goto free;
141         read_lock_bh(&lgr->conns_lock);
142         conns = RB_EMPTY_ROOT(&lgr->conns_all);
143         read_unlock_bh(&lgr->conns_lock);
144         if (!conns) { /* number of lgr connections is no longer zero */
145                 spin_unlock_bh(&smc_lgr_list.lock);
146                 return;
147         }
148         list_del_init(&lgr->list); /* remove from smc_lgr_list */
149 free:
150         spin_unlock_bh(&smc_lgr_list.lock);
151         if (!delayed_work_pending(&lgr->free_work))
152                 smc_lgr_free(lgr);
153 }
154
155 /* create a new SMC link group */
156 static int smc_lgr_create(struct smc_sock *smc,
157                           struct smc_ib_device *smcibdev, u8 ibport,
158                           char *peer_systemid, unsigned short vlan_id)
159 {
160         struct smc_link_group *lgr;
161         struct smc_link *lnk;
162         u8 rndvec[3];
163         int rc = 0;
164         int i;
165
166         lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
167         if (!lgr) {
168                 rc = -ENOMEM;
169                 goto out;
170         }
171         lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
172         lgr->sync_err = false;
173         memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
174         lgr->vlan_id = vlan_id;
175         rwlock_init(&lgr->sndbufs_lock);
176         rwlock_init(&lgr->rmbs_lock);
177         for (i = 0; i < SMC_RMBE_SIZES; i++) {
178                 INIT_LIST_HEAD(&lgr->sndbufs[i]);
179                 INIT_LIST_HEAD(&lgr->rmbs[i]);
180         }
181         smc_lgr_num += SMC_LGR_NUM_INCR;
182         memcpy(&lgr->id, (u8 *)&smc_lgr_num, SMC_LGR_ID_SIZE);
183         INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
184         lgr->conns_all = RB_ROOT;
185
186         lnk = &lgr->lnk[SMC_SINGLE_LINK];
187         /* initialize link */
188         lnk->state = SMC_LNK_ACTIVATING;
189         lnk->link_id = SMC_SINGLE_LINK;
190         lnk->smcibdev = smcibdev;
191         lnk->ibport = ibport;
192         lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
193         if (!smcibdev->initialized)
194                 smc_ib_setup_per_ibdev(smcibdev);
195         get_random_bytes(rndvec, sizeof(rndvec));
196         lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16);
197         rc = smc_wr_alloc_link_mem(lnk);
198         if (rc)
199                 goto free_lgr;
200         rc = smc_ib_create_protection_domain(lnk);
201         if (rc)
202                 goto free_link_mem;
203         rc = smc_ib_create_queue_pair(lnk);
204         if (rc)
205                 goto dealloc_pd;
206         rc = smc_wr_create_link(lnk);
207         if (rc)
208                 goto destroy_qp;
209         init_completion(&lnk->llc_confirm);
210         init_completion(&lnk->llc_confirm_resp);
211         init_completion(&lnk->llc_add);
212         init_completion(&lnk->llc_add_resp);
213
214         smc->conn.lgr = lgr;
215         rwlock_init(&lgr->conns_lock);
216         spin_lock_bh(&smc_lgr_list.lock);
217         list_add(&lgr->list, &smc_lgr_list.list);
218         spin_unlock_bh(&smc_lgr_list.lock);
219         return 0;
220
221 destroy_qp:
222         smc_ib_destroy_queue_pair(lnk);
223 dealloc_pd:
224         smc_ib_dealloc_protection_domain(lnk);
225 free_link_mem:
226         smc_wr_free_link_mem(lnk);
227 free_lgr:
228         kfree(lgr);
229 out:
230         return rc;
231 }
232
233 static void smc_buf_unuse(struct smc_connection *conn)
234 {
235         if (conn->sndbuf_desc) {
236                 conn->sndbuf_desc->used = 0;
237                 conn->sndbuf_size = 0;
238         }
239         if (conn->rmb_desc) {
240                 if (!conn->rmb_desc->regerr) {
241                         conn->rmb_desc->reused = 1;
242                         conn->rmb_desc->used = 0;
243                         conn->rmbe_size = 0;
244                 } else {
245                         /* buf registration failed, reuse not possible */
246                         struct smc_link_group *lgr = conn->lgr;
247                         struct smc_link *lnk;
248
249                         write_lock_bh(&lgr->rmbs_lock);
250                         list_del(&conn->rmb_desc->list);
251                         write_unlock_bh(&lgr->rmbs_lock);
252
253                         lnk = &lgr->lnk[SMC_SINGLE_LINK];
254                         smc_buf_free(conn->rmb_desc, lnk, true);
255                 }
256         }
257 }
258
259 /* remove a finished connection from its link group */
260 void smc_conn_free(struct smc_connection *conn)
261 {
262         if (!conn->lgr)
263                 return;
264         smc_cdc_tx_dismiss_slots(conn);
265         smc_lgr_unregister_conn(conn);
266         smc_buf_unuse(conn);
267 }
268
269 static void smc_link_clear(struct smc_link *lnk)
270 {
271         lnk->peer_qpn = 0;
272         smc_ib_modify_qp_reset(lnk);
273         smc_wr_free_link(lnk);
274         smc_ib_destroy_queue_pair(lnk);
275         smc_ib_dealloc_protection_domain(lnk);
276         smc_wr_free_link_mem(lnk);
277 }
278
279 static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk,
280                          bool is_rmb)
281 {
282         if (is_rmb) {
283                 if (buf_desc->mr_rx[SMC_SINGLE_LINK])
284                         smc_ib_put_memory_region(
285                                         buf_desc->mr_rx[SMC_SINGLE_LINK]);
286                 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
287                                     DMA_FROM_DEVICE);
288         } else {
289                 smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
290                                     DMA_TO_DEVICE);
291         }
292         sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
293         if (buf_desc->cpu_addr)
294                 free_pages((unsigned long)buf_desc->cpu_addr, buf_desc->order);
295         kfree(buf_desc);
296 }
297
298 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
299 {
300         struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
301         struct smc_buf_desc *buf_desc, *bf_desc;
302         struct list_head *buf_list;
303         int i;
304
305         for (i = 0; i < SMC_RMBE_SIZES; i++) {
306                 if (is_rmb)
307                         buf_list = &lgr->rmbs[i];
308                 else
309                         buf_list = &lgr->sndbufs[i];
310                 list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
311                                          list) {
312                         list_del(&buf_desc->list);
313                         smc_buf_free(buf_desc, lnk, is_rmb);
314                 }
315         }
316 }
317
318 static void smc_lgr_free_bufs(struct smc_link_group *lgr)
319 {
320         /* free send buffers */
321         __smc_lgr_free_bufs(lgr, false);
322         /* free rmbs */
323         __smc_lgr_free_bufs(lgr, true);
324 }
325
326 /* remove a link group */
327 void smc_lgr_free(struct smc_link_group *lgr)
328 {
329         smc_lgr_free_bufs(lgr);
330         smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
331         kfree(lgr);
332 }
333
334 void smc_lgr_forget(struct smc_link_group *lgr)
335 {
336         spin_lock_bh(&smc_lgr_list.lock);
337         /* do not use this link group for new connections */
338         if (!list_empty(&lgr->list))
339                 list_del_init(&lgr->list);
340         spin_unlock_bh(&smc_lgr_list.lock);
341 }
342
343 /* terminate linkgroup abnormally */
344 void smc_lgr_terminate(struct smc_link_group *lgr)
345 {
346         struct smc_connection *conn;
347         struct smc_sock *smc;
348         struct rb_node *node;
349
350         smc_lgr_forget(lgr);
351
352         write_lock_bh(&lgr->conns_lock);
353         node = rb_first(&lgr->conns_all);
354         while (node) {
355                 conn = rb_entry(node, struct smc_connection, alert_node);
356                 smc = container_of(conn, struct smc_sock, conn);
357                 sock_hold(&smc->sk); /* sock_put in close work */
358                 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
359                 __smc_lgr_unregister_conn(conn);
360                 write_unlock_bh(&lgr->conns_lock);
361                 if (!schedule_work(&conn->close_work))
362                         sock_put(&smc->sk);
363                 write_lock_bh(&lgr->conns_lock);
364                 node = rb_first(&lgr->conns_all);
365         }
366         write_unlock_bh(&lgr->conns_lock);
367         wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
368         smc_lgr_schedule_free_work(lgr);
369 }
370
371 /* Determine vlan of internal TCP socket.
372  * @vlan_id: address to store the determined vlan id into
373  */
374 static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
375 {
376         struct dst_entry *dst = sk_dst_get(clcsock->sk);
377         int rc = 0;
378
379         *vlan_id = 0;
380         if (!dst) {
381                 rc = -ENOTCONN;
382                 goto out;
383         }
384         if (!dst->dev) {
385                 rc = -ENODEV;
386                 goto out_rel;
387         }
388
389         if (is_vlan_dev(dst->dev))
390                 *vlan_id = vlan_dev_vlan_id(dst->dev);
391
392 out_rel:
393         dst_release(dst);
394 out:
395         return rc;
396 }
397
398 /* determine the link gid matching the vlan id of the link group */
399 static int smc_link_determine_gid(struct smc_link_group *lgr)
400 {
401         struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
402         struct ib_gid_attr gattr;
403         union ib_gid gid;
404         int i;
405
406         if (!lgr->vlan_id) {
407                 lnk->gid = lnk->smcibdev->gid[lnk->ibport - 1];
408                 return 0;
409         }
410
411         for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len;
412              i++) {
413                 if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid,
414                                  &gattr))
415                         continue;
416                 if (gattr.ndev) {
417                         if (is_vlan_dev(gattr.ndev) &&
418                             vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) {
419                                 lnk->gid = gid;
420                                 dev_put(gattr.ndev);
421                                 return 0;
422                         }
423                         dev_put(gattr.ndev);
424                 }
425         }
426         return -ENODEV;
427 }
428
429 /* create a new SMC connection (and a new link group if necessary) */
430 int smc_conn_create(struct smc_sock *smc,
431                     struct smc_ib_device *smcibdev, u8 ibport,
432                     struct smc_clc_msg_local *lcl, int srv_first_contact)
433 {
434         struct smc_connection *conn = &smc->conn;
435         struct smc_link_group *lgr;
436         unsigned short vlan_id;
437         enum smc_lgr_role role;
438         int local_contact = SMC_FIRST_CONTACT;
439         int rc = 0;
440
441         role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
442         rc = smc_vlan_by_tcpsk(smc->clcsock, &vlan_id);
443         if (rc)
444                 return rc;
445
446         if ((role == SMC_CLNT) && srv_first_contact)
447                 /* create new link group as well */
448                 goto create;
449
450         /* determine if an existing link group can be reused */
451         spin_lock_bh(&smc_lgr_list.lock);
452         list_for_each_entry(lgr, &smc_lgr_list.list, list) {
453                 write_lock_bh(&lgr->conns_lock);
454                 if (!memcmp(lgr->peer_systemid, lcl->id_for_peer,
455                             SMC_SYSTEMID_LEN) &&
456                     !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
457                             SMC_GID_SIZE) &&
458                     !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
459                             sizeof(lcl->mac)) &&
460                     !lgr->sync_err &&
461                     (lgr->role == role) &&
462                     (lgr->vlan_id == vlan_id) &&
463                     ((role == SMC_CLNT) ||
464                      (lgr->conns_num < SMC_RMBS_PER_LGR_MAX))) {
465                         /* link group found */
466                         local_contact = SMC_REUSE_CONTACT;
467                         conn->lgr = lgr;
468                         smc_lgr_register_conn(conn); /* add smc conn to lgr */
469                         write_unlock_bh(&lgr->conns_lock);
470                         break;
471                 }
472                 write_unlock_bh(&lgr->conns_lock);
473         }
474         spin_unlock_bh(&smc_lgr_list.lock);
475
476         if (role == SMC_CLNT && !srv_first_contact &&
477             (local_contact == SMC_FIRST_CONTACT)) {
478                 /* Server reuses a link group, but Client wants to start
479                  * a new one
480                  * send out_of_sync decline, reason synchr. error
481                  */
482                 return -ENOLINK;
483         }
484
485 create:
486         if (local_contact == SMC_FIRST_CONTACT) {
487                 rc = smc_lgr_create(smc, smcibdev, ibport,
488                                     lcl->id_for_peer, vlan_id);
489                 if (rc)
490                         goto out;
491                 smc_lgr_register_conn(conn); /* add smc conn to lgr */
492                 rc = smc_link_determine_gid(conn->lgr);
493         }
494         conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
495         conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
496 #ifndef KERNEL_HAS_ATOMIC64
497         spin_lock_init(&conn->acurs_lock);
498 #endif
499
500 out:
501         return rc ? rc : local_contact;
502 }
503
504 /* try to reuse a sndbuf or rmb description slot for a certain
505  * buffer size; if not available, return NULL
506  */
507 static inline
508 struct smc_buf_desc *smc_buf_get_slot(struct smc_link_group *lgr,
509                                       int compressed_bufsize,
510                                       rwlock_t *lock,
511                                       struct list_head *buf_list)
512 {
513         struct smc_buf_desc *buf_slot;
514
515         read_lock_bh(lock);
516         list_for_each_entry(buf_slot, buf_list, list) {
517                 if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
518                         read_unlock_bh(lock);
519                         return buf_slot;
520                 }
521         }
522         read_unlock_bh(lock);
523         return NULL;
524 }
525
526 /* one of the conditions for announcing a receiver's current window size is
527  * that it "results in a minimum increase in the window size of 10% of the
528  * receive buffer space" [RFC7609]
529  */
530 static inline int smc_rmb_wnd_update_limit(int rmbe_size)
531 {
532         return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
533 }
534
535 static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
536                                                bool is_rmb, int bufsize)
537 {
538         struct smc_buf_desc *buf_desc;
539         struct smc_link *lnk;
540         int rc;
541
542         /* try to alloc a new buffer */
543         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
544         if (!buf_desc)
545                 return ERR_PTR(-ENOMEM);
546
547         buf_desc->cpu_addr =
548                 (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN |
549                                          __GFP_NOMEMALLOC |
550                                          __GFP_NORETRY | __GFP_ZERO,
551                                          get_order(bufsize));
552         if (!buf_desc->cpu_addr) {
553                 kfree(buf_desc);
554                 return ERR_PTR(-EAGAIN);
555         }
556         buf_desc->order = get_order(bufsize);
557
558         /* build the sg table from the pages */
559         lnk = &lgr->lnk[SMC_SINGLE_LINK];
560         rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
561                             GFP_KERNEL);
562         if (rc) {
563                 smc_buf_free(buf_desc, lnk, is_rmb);
564                 return ERR_PTR(rc);
565         }
566         sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
567                    buf_desc->cpu_addr, bufsize);
568
569         /* map sg table to DMA address */
570         rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc,
571                                is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
572         /* SMC protocol depends on mapping to one DMA address only */
573         if (rc != 1)  {
574                 smc_buf_free(buf_desc, lnk, is_rmb);
575                 return ERR_PTR(-EAGAIN);
576         }
577
578         /* create a new memory region for the RMB */
579         if (is_rmb) {
580                 rc = smc_ib_get_memory_region(lnk->roce_pd,
581                                               IB_ACCESS_REMOTE_WRITE |
582                                               IB_ACCESS_LOCAL_WRITE,
583                                               buf_desc);
584                 if (rc) {
585                         smc_buf_free(buf_desc, lnk, is_rmb);
586                         return ERR_PTR(rc);
587                 }
588         }
589
590         return buf_desc;
591 }
592
593 static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
594 {
595         struct smc_connection *conn = &smc->conn;
596         struct smc_link_group *lgr = conn->lgr;
597         struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
598         struct list_head *buf_list;
599         int bufsize, bufsize_short;
600         int sk_buf_size;
601         rwlock_t *lock;
602
603         if (is_rmb)
604                 /* use socket recv buffer size (w/o overhead) as start value */
605                 sk_buf_size = smc->sk.sk_rcvbuf / 2;
606         else
607                 /* use socket send buffer size (w/o overhead) as start value */
608                 sk_buf_size = smc->sk.sk_sndbuf / 2;
609
610         for (bufsize_short = smc_compress_bufsize(sk_buf_size);
611              bufsize_short >= 0; bufsize_short--) {
612
613                 if (is_rmb) {
614                         lock = &lgr->rmbs_lock;
615                         buf_list = &lgr->rmbs[bufsize_short];
616                 } else {
617                         lock = &lgr->sndbufs_lock;
618                         buf_list = &lgr->sndbufs[bufsize_short];
619                 }
620                 bufsize = smc_uncompress_bufsize(bufsize_short);
621                 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
622                         continue;
623
624                 /* check for reusable slot in the link group */
625                 buf_desc = smc_buf_get_slot(lgr, bufsize_short, lock, buf_list);
626                 if (buf_desc) {
627                         memset(buf_desc->cpu_addr, 0, bufsize);
628                         break; /* found reusable slot */
629                 }
630
631                 buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize);
632                 if (PTR_ERR(buf_desc) == -ENOMEM)
633                         break;
634                 if (IS_ERR(buf_desc))
635                         continue;
636
637                 buf_desc->used = 1;
638                 write_lock_bh(lock);
639                 list_add(&buf_desc->list, buf_list);
640                 write_unlock_bh(lock);
641                 break; /* found */
642         }
643
644         if (IS_ERR(buf_desc))
645                 return -ENOMEM;
646
647         if (is_rmb) {
648                 conn->rmb_desc = buf_desc;
649                 conn->rmbe_size = bufsize;
650                 conn->rmbe_size_short = bufsize_short;
651                 smc->sk.sk_rcvbuf = bufsize * 2;
652                 atomic_set(&conn->bytes_to_rcv, 0);
653                 conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize);
654         } else {
655                 conn->sndbuf_desc = buf_desc;
656                 conn->sndbuf_size = bufsize;
657                 smc->sk.sk_sndbuf = bufsize * 2;
658                 atomic_set(&conn->sndbuf_space, bufsize);
659         }
660         return 0;
661 }
662
663 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
664 {
665         struct smc_link_group *lgr = conn->lgr;
666
667         smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
668                                conn->sndbuf_desc, DMA_TO_DEVICE);
669 }
670
671 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
672 {
673         struct smc_link_group *lgr = conn->lgr;
674
675         smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
676                                   conn->sndbuf_desc, DMA_TO_DEVICE);
677 }
678
679 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
680 {
681         struct smc_link_group *lgr = conn->lgr;
682
683         smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
684                                conn->rmb_desc, DMA_FROM_DEVICE);
685 }
686
687 void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
688 {
689         struct smc_link_group *lgr = conn->lgr;
690
691         smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
692                                   conn->rmb_desc, DMA_FROM_DEVICE);
693 }
694
695 /* create the send and receive buffer for an SMC socket;
696  * receive buffers are called RMBs;
697  * (even though the SMC protocol allows more than one RMB-element per RMB,
698  * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
699  * extra RMB for every connection in a link group
700  */
701 int smc_buf_create(struct smc_sock *smc)
702 {
703         int rc;
704
705         /* create send buffer */
706         rc = __smc_buf_create(smc, false);
707         if (rc)
708                 return rc;
709         /* create rmb */
710         rc = __smc_buf_create(smc, true);
711         if (rc)
712                 smc_buf_free(smc->conn.sndbuf_desc,
713                              &smc->conn.lgr->lnk[SMC_SINGLE_LINK], false);
714         return rc;
715 }
716
717 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
718 {
719         int i;
720
721         for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
722                 if (!test_and_set_bit(i, lgr->rtokens_used_mask))
723                         return i;
724         }
725         return -ENOSPC;
726 }
727
728 /* add a new rtoken from peer */
729 int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
730 {
731         u64 dma_addr = be64_to_cpu(nw_vaddr);
732         u32 rkey = ntohl(nw_rkey);
733         int i;
734
735         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
736                 if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
737                     (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
738                     test_bit(i, lgr->rtokens_used_mask)) {
739                         /* already in list */
740                         return i;
741                 }
742         }
743         i = smc_rmb_reserve_rtoken_idx(lgr);
744         if (i < 0)
745                 return i;
746         lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
747         lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
748         return i;
749 }
750
751 /* delete an rtoken */
752 int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
753 {
754         u32 rkey = ntohl(nw_rkey);
755         int i;
756
757         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
758                 if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
759                     test_bit(i, lgr->rtokens_used_mask)) {
760                         lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
761                         lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
762
763                         clear_bit(i, lgr->rtokens_used_mask);
764                         return 0;
765                 }
766         }
767         return -ENOENT;
768 }
769
770 /* save rkey and dma_addr received from peer during clc handshake */
771 int smc_rmb_rtoken_handling(struct smc_connection *conn,
772                             struct smc_clc_msg_accept_confirm *clc)
773 {
774         conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
775                                           clc->rmb_rkey);
776         if (conn->rtoken_idx < 0)
777                 return conn->rtoken_idx;
778         return 0;
779 }