IB/hfi1: Fix expected receive setup error exit issues
[platform/kernel/linux-rpi.git] / drivers / infiniband / core / sa_query.c
1 /*
2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
4  * Copyright (c) 2006 Intel Corporation.  All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/err.h>
38 #include <linux/random.h>
39 #include <linux/spinlock.h>
40 #include <linux/slab.h>
41 #include <linux/dma-mapping.h>
42 #include <linux/kref.h>
43 #include <linux/xarray.h>
44 #include <linux/workqueue.h>
45 #include <uapi/linux/if_ether.h>
46 #include <rdma/ib_pack.h>
47 #include <rdma/ib_cache.h>
48 #include <rdma/rdma_netlink.h>
49 #include <net/netlink.h>
50 #include <uapi/rdma/ib_user_sa.h>
51 #include <rdma/ib_marshall.h>
52 #include <rdma/ib_addr.h>
53 #include <rdma/opa_addr.h>
54 #include "sa.h"
55 #include "core_priv.h"
56
57 #define IB_SA_LOCAL_SVC_TIMEOUT_MIN             100
58 #define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT         2000
59 #define IB_SA_LOCAL_SVC_TIMEOUT_MAX             200000
60 #define IB_SA_CPI_MAX_RETRY_CNT                 3
61 #define IB_SA_CPI_RETRY_WAIT                    1000 /*msecs */
62 static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT;
63
64 struct ib_sa_sm_ah {
65         struct ib_ah        *ah;
66         struct kref          ref;
67         u16                  pkey_index;
68         u8                   src_path_mask;
69 };
70
71 enum rdma_class_port_info_type {
72         RDMA_CLASS_PORT_INFO_IB,
73         RDMA_CLASS_PORT_INFO_OPA
74 };
75
76 struct rdma_class_port_info {
77         enum rdma_class_port_info_type type;
78         union {
79                 struct ib_class_port_info ib;
80                 struct opa_class_port_info opa;
81         };
82 };
83
84 struct ib_sa_classport_cache {
85         bool valid;
86         int retry_cnt;
87         struct rdma_class_port_info data;
88 };
89
90 struct ib_sa_port {
91         struct ib_mad_agent *agent;
92         struct ib_sa_sm_ah  *sm_ah;
93         struct work_struct   update_task;
94         struct ib_sa_classport_cache classport_info;
95         struct delayed_work ib_cpi_work;
96         spinlock_t                   classport_lock; /* protects class port info set */
97         spinlock_t           ah_lock;
98         u32                  port_num;
99 };
100
101 struct ib_sa_device {
102         int                     start_port, end_port;
103         struct ib_event_handler event_handler;
104         struct ib_sa_port port[];
105 };
106
107 struct ib_sa_query {
108         void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
109         void (*release)(struct ib_sa_query *);
110         struct ib_sa_client    *client;
111         struct ib_sa_port      *port;
112         struct ib_mad_send_buf *mad_buf;
113         struct ib_sa_sm_ah     *sm_ah;
114         int                     id;
115         u32                     flags;
116         struct list_head        list; /* Local svc request list */
117         u32                     seq; /* Local svc request sequence number */
118         unsigned long           timeout; /* Local svc timeout */
119         u8                      path_use; /* How will the pathrecord be used */
120 };
121
122 #define IB_SA_ENABLE_LOCAL_SERVICE      0x00000001
123 #define IB_SA_CANCEL                    0x00000002
124 #define IB_SA_QUERY_OPA                 0x00000004
125
126 struct ib_sa_path_query {
127         void (*callback)(int, struct sa_path_rec *, void *);
128         void *context;
129         struct ib_sa_query sa_query;
130         struct sa_path_rec *conv_pr;
131 };
132
133 struct ib_sa_guidinfo_query {
134         void (*callback)(int, struct ib_sa_guidinfo_rec *, void *);
135         void *context;
136         struct ib_sa_query sa_query;
137 };
138
139 struct ib_sa_classport_info_query {
140         void (*callback)(void *);
141         void *context;
142         struct ib_sa_query sa_query;
143 };
144
145 struct ib_sa_mcmember_query {
146         void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
147         void *context;
148         struct ib_sa_query sa_query;
149 };
150
151 static LIST_HEAD(ib_nl_request_list);
152 static DEFINE_SPINLOCK(ib_nl_request_lock);
153 static atomic_t ib_nl_sa_request_seq;
154 static struct workqueue_struct *ib_nl_wq;
155 static struct delayed_work ib_nl_timed_work;
156 static const struct nla_policy ib_nl_policy[LS_NLA_TYPE_MAX] = {
157         [LS_NLA_TYPE_PATH_RECORD]       = {.type = NLA_BINARY,
158                 .len = sizeof(struct ib_path_rec_data)},
159         [LS_NLA_TYPE_TIMEOUT]           = {.type = NLA_U32},
160         [LS_NLA_TYPE_SERVICE_ID]        = {.type = NLA_U64},
161         [LS_NLA_TYPE_DGID]              = {.type = NLA_BINARY,
162                 .len = sizeof(struct rdma_nla_ls_gid)},
163         [LS_NLA_TYPE_SGID]              = {.type = NLA_BINARY,
164                 .len = sizeof(struct rdma_nla_ls_gid)},
165         [LS_NLA_TYPE_TCLASS]            = {.type = NLA_U8},
166         [LS_NLA_TYPE_PKEY]              = {.type = NLA_U16},
167         [LS_NLA_TYPE_QOS_CLASS]         = {.type = NLA_U16},
168 };
169
170
171 static int ib_sa_add_one(struct ib_device *device);
172 static void ib_sa_remove_one(struct ib_device *device, void *client_data);
173
174 static struct ib_client sa_client = {
175         .name   = "sa",
176         .add    = ib_sa_add_one,
177         .remove = ib_sa_remove_one
178 };
179
180 static DEFINE_XARRAY_FLAGS(queries, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
181
182 static DEFINE_SPINLOCK(tid_lock);
183 static u32 tid;
184
185 #define PATH_REC_FIELD(field) \
186         .struct_offset_bytes = offsetof(struct sa_path_rec, field),     \
187         .struct_size_bytes   = sizeof_field(struct sa_path_rec, field), \
188         .field_name          = "sa_path_rec:" #field
189
190 static const struct ib_field path_rec_table[] = {
191         { PATH_REC_FIELD(service_id),
192           .offset_words = 0,
193           .offset_bits  = 0,
194           .size_bits    = 64 },
195         { PATH_REC_FIELD(dgid),
196           .offset_words = 2,
197           .offset_bits  = 0,
198           .size_bits    = 128 },
199         { PATH_REC_FIELD(sgid),
200           .offset_words = 6,
201           .offset_bits  = 0,
202           .size_bits    = 128 },
203         { PATH_REC_FIELD(ib.dlid),
204           .offset_words = 10,
205           .offset_bits  = 0,
206           .size_bits    = 16 },
207         { PATH_REC_FIELD(ib.slid),
208           .offset_words = 10,
209           .offset_bits  = 16,
210           .size_bits    = 16 },
211         { PATH_REC_FIELD(ib.raw_traffic),
212           .offset_words = 11,
213           .offset_bits  = 0,
214           .size_bits    = 1 },
215         { RESERVED,
216           .offset_words = 11,
217           .offset_bits  = 1,
218           .size_bits    = 3 },
219         { PATH_REC_FIELD(flow_label),
220           .offset_words = 11,
221           .offset_bits  = 4,
222           .size_bits    = 20 },
223         { PATH_REC_FIELD(hop_limit),
224           .offset_words = 11,
225           .offset_bits  = 24,
226           .size_bits    = 8 },
227         { PATH_REC_FIELD(traffic_class),
228           .offset_words = 12,
229           .offset_bits  = 0,
230           .size_bits    = 8 },
231         { PATH_REC_FIELD(reversible),
232           .offset_words = 12,
233           .offset_bits  = 8,
234           .size_bits    = 1 },
235         { PATH_REC_FIELD(numb_path),
236           .offset_words = 12,
237           .offset_bits  = 9,
238           .size_bits    = 7 },
239         { PATH_REC_FIELD(pkey),
240           .offset_words = 12,
241           .offset_bits  = 16,
242           .size_bits    = 16 },
243         { PATH_REC_FIELD(qos_class),
244           .offset_words = 13,
245           .offset_bits  = 0,
246           .size_bits    = 12 },
247         { PATH_REC_FIELD(sl),
248           .offset_words = 13,
249           .offset_bits  = 12,
250           .size_bits    = 4 },
251         { PATH_REC_FIELD(mtu_selector),
252           .offset_words = 13,
253           .offset_bits  = 16,
254           .size_bits    = 2 },
255         { PATH_REC_FIELD(mtu),
256           .offset_words = 13,
257           .offset_bits  = 18,
258           .size_bits    = 6 },
259         { PATH_REC_FIELD(rate_selector),
260           .offset_words = 13,
261           .offset_bits  = 24,
262           .size_bits    = 2 },
263         { PATH_REC_FIELD(rate),
264           .offset_words = 13,
265           .offset_bits  = 26,
266           .size_bits    = 6 },
267         { PATH_REC_FIELD(packet_life_time_selector),
268           .offset_words = 14,
269           .offset_bits  = 0,
270           .size_bits    = 2 },
271         { PATH_REC_FIELD(packet_life_time),
272           .offset_words = 14,
273           .offset_bits  = 2,
274           .size_bits    = 6 },
275         { PATH_REC_FIELD(preference),
276           .offset_words = 14,
277           .offset_bits  = 8,
278           .size_bits    = 8 },
279         { RESERVED,
280           .offset_words = 14,
281           .offset_bits  = 16,
282           .size_bits    = 48 },
283 };
284
285 #define OPA_PATH_REC_FIELD(field) \
286         .struct_offset_bytes = \
287                 offsetof(struct sa_path_rec, field), \
288         .struct_size_bytes   = \
289                 sizeof_field(struct sa_path_rec, field),        \
290         .field_name          = "sa_path_rec:" #field
291
292 static const struct ib_field opa_path_rec_table[] = {
293         { OPA_PATH_REC_FIELD(service_id),
294           .offset_words = 0,
295           .offset_bits  = 0,
296           .size_bits    = 64 },
297         { OPA_PATH_REC_FIELD(dgid),
298           .offset_words = 2,
299           .offset_bits  = 0,
300           .size_bits    = 128 },
301         { OPA_PATH_REC_FIELD(sgid),
302           .offset_words = 6,
303           .offset_bits  = 0,
304           .size_bits    = 128 },
305         { OPA_PATH_REC_FIELD(opa.dlid),
306           .offset_words = 10,
307           .offset_bits  = 0,
308           .size_bits    = 32 },
309         { OPA_PATH_REC_FIELD(opa.slid),
310           .offset_words = 11,
311           .offset_bits  = 0,
312           .size_bits    = 32 },
313         { OPA_PATH_REC_FIELD(opa.raw_traffic),
314           .offset_words = 12,
315           .offset_bits  = 0,
316           .size_bits    = 1 },
317         { RESERVED,
318           .offset_words = 12,
319           .offset_bits  = 1,
320           .size_bits    = 3 },
321         { OPA_PATH_REC_FIELD(flow_label),
322           .offset_words = 12,
323           .offset_bits  = 4,
324           .size_bits    = 20 },
325         { OPA_PATH_REC_FIELD(hop_limit),
326           .offset_words = 12,
327           .offset_bits  = 24,
328           .size_bits    = 8 },
329         { OPA_PATH_REC_FIELD(traffic_class),
330           .offset_words = 13,
331           .offset_bits  = 0,
332           .size_bits    = 8 },
333         { OPA_PATH_REC_FIELD(reversible),
334           .offset_words = 13,
335           .offset_bits  = 8,
336           .size_bits    = 1 },
337         { OPA_PATH_REC_FIELD(numb_path),
338           .offset_words = 13,
339           .offset_bits  = 9,
340           .size_bits    = 7 },
341         { OPA_PATH_REC_FIELD(pkey),
342           .offset_words = 13,
343           .offset_bits  = 16,
344           .size_bits    = 16 },
345         { OPA_PATH_REC_FIELD(opa.l2_8B),
346           .offset_words = 14,
347           .offset_bits  = 0,
348           .size_bits    = 1 },
349         { OPA_PATH_REC_FIELD(opa.l2_10B),
350           .offset_words = 14,
351           .offset_bits  = 1,
352           .size_bits    = 1 },
353         { OPA_PATH_REC_FIELD(opa.l2_9B),
354           .offset_words = 14,
355           .offset_bits  = 2,
356           .size_bits    = 1 },
357         { OPA_PATH_REC_FIELD(opa.l2_16B),
358           .offset_words = 14,
359           .offset_bits  = 3,
360           .size_bits    = 1 },
361         { RESERVED,
362           .offset_words = 14,
363           .offset_bits  = 4,
364           .size_bits    = 2 },
365         { OPA_PATH_REC_FIELD(opa.qos_type),
366           .offset_words = 14,
367           .offset_bits  = 6,
368           .size_bits    = 2 },
369         { OPA_PATH_REC_FIELD(opa.qos_priority),
370           .offset_words = 14,
371           .offset_bits  = 8,
372           .size_bits    = 8 },
373         { RESERVED,
374           .offset_words = 14,
375           .offset_bits  = 16,
376           .size_bits    = 3 },
377         { OPA_PATH_REC_FIELD(sl),
378           .offset_words = 14,
379           .offset_bits  = 19,
380           .size_bits    = 5 },
381         { RESERVED,
382           .offset_words = 14,
383           .offset_bits  = 24,
384           .size_bits    = 8 },
385         { OPA_PATH_REC_FIELD(mtu_selector),
386           .offset_words = 15,
387           .offset_bits  = 0,
388           .size_bits    = 2 },
389         { OPA_PATH_REC_FIELD(mtu),
390           .offset_words = 15,
391           .offset_bits  = 2,
392           .size_bits    = 6 },
393         { OPA_PATH_REC_FIELD(rate_selector),
394           .offset_words = 15,
395           .offset_bits  = 8,
396           .size_bits    = 2 },
397         { OPA_PATH_REC_FIELD(rate),
398           .offset_words = 15,
399           .offset_bits  = 10,
400           .size_bits    = 6 },
401         { OPA_PATH_REC_FIELD(packet_life_time_selector),
402           .offset_words = 15,
403           .offset_bits  = 16,
404           .size_bits    = 2 },
405         { OPA_PATH_REC_FIELD(packet_life_time),
406           .offset_words = 15,
407           .offset_bits  = 18,
408           .size_bits    = 6 },
409         { OPA_PATH_REC_FIELD(preference),
410           .offset_words = 15,
411           .offset_bits  = 24,
412           .size_bits    = 8 },
413 };
414
415 #define MCMEMBER_REC_FIELD(field) \
416         .struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field),      \
417         .struct_size_bytes   = sizeof_field(struct ib_sa_mcmember_rec, field),  \
418         .field_name          = "sa_mcmember_rec:" #field
419
420 static const struct ib_field mcmember_rec_table[] = {
421         { MCMEMBER_REC_FIELD(mgid),
422           .offset_words = 0,
423           .offset_bits  = 0,
424           .size_bits    = 128 },
425         { MCMEMBER_REC_FIELD(port_gid),
426           .offset_words = 4,
427           .offset_bits  = 0,
428           .size_bits    = 128 },
429         { MCMEMBER_REC_FIELD(qkey),
430           .offset_words = 8,
431           .offset_bits  = 0,
432           .size_bits    = 32 },
433         { MCMEMBER_REC_FIELD(mlid),
434           .offset_words = 9,
435           .offset_bits  = 0,
436           .size_bits    = 16 },
437         { MCMEMBER_REC_FIELD(mtu_selector),
438           .offset_words = 9,
439           .offset_bits  = 16,
440           .size_bits    = 2 },
441         { MCMEMBER_REC_FIELD(mtu),
442           .offset_words = 9,
443           .offset_bits  = 18,
444           .size_bits    = 6 },
445         { MCMEMBER_REC_FIELD(traffic_class),
446           .offset_words = 9,
447           .offset_bits  = 24,
448           .size_bits    = 8 },
449         { MCMEMBER_REC_FIELD(pkey),
450           .offset_words = 10,
451           .offset_bits  = 0,
452           .size_bits    = 16 },
453         { MCMEMBER_REC_FIELD(rate_selector),
454           .offset_words = 10,
455           .offset_bits  = 16,
456           .size_bits    = 2 },
457         { MCMEMBER_REC_FIELD(rate),
458           .offset_words = 10,
459           .offset_bits  = 18,
460           .size_bits    = 6 },
461         { MCMEMBER_REC_FIELD(packet_life_time_selector),
462           .offset_words = 10,
463           .offset_bits  = 24,
464           .size_bits    = 2 },
465         { MCMEMBER_REC_FIELD(packet_life_time),
466           .offset_words = 10,
467           .offset_bits  = 26,
468           .size_bits    = 6 },
469         { MCMEMBER_REC_FIELD(sl),
470           .offset_words = 11,
471           .offset_bits  = 0,
472           .size_bits    = 4 },
473         { MCMEMBER_REC_FIELD(flow_label),
474           .offset_words = 11,
475           .offset_bits  = 4,
476           .size_bits    = 20 },
477         { MCMEMBER_REC_FIELD(hop_limit),
478           .offset_words = 11,
479           .offset_bits  = 24,
480           .size_bits    = 8 },
481         { MCMEMBER_REC_FIELD(scope),
482           .offset_words = 12,
483           .offset_bits  = 0,
484           .size_bits    = 4 },
485         { MCMEMBER_REC_FIELD(join_state),
486           .offset_words = 12,
487           .offset_bits  = 4,
488           .size_bits    = 4 },
489         { MCMEMBER_REC_FIELD(proxy_join),
490           .offset_words = 12,
491           .offset_bits  = 8,
492           .size_bits    = 1 },
493         { RESERVED,
494           .offset_words = 12,
495           .offset_bits  = 9,
496           .size_bits    = 23 },
497 };
498
499 #define CLASSPORTINFO_REC_FIELD(field) \
500         .struct_offset_bytes = offsetof(struct ib_class_port_info, field),      \
501         .struct_size_bytes   = sizeof_field(struct ib_class_port_info, field),  \
502         .field_name          = "ib_class_port_info:" #field
503
504 static const struct ib_field ib_classport_info_rec_table[] = {
505         { CLASSPORTINFO_REC_FIELD(base_version),
506           .offset_words = 0,
507           .offset_bits  = 0,
508           .size_bits    = 8 },
509         { CLASSPORTINFO_REC_FIELD(class_version),
510           .offset_words = 0,
511           .offset_bits  = 8,
512           .size_bits    = 8 },
513         { CLASSPORTINFO_REC_FIELD(capability_mask),
514           .offset_words = 0,
515           .offset_bits  = 16,
516           .size_bits    = 16 },
517         { CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
518           .offset_words = 1,
519           .offset_bits  = 0,
520           .size_bits    = 32 },
521         { CLASSPORTINFO_REC_FIELD(redirect_gid),
522           .offset_words = 2,
523           .offset_bits  = 0,
524           .size_bits    = 128 },
525         { CLASSPORTINFO_REC_FIELD(redirect_tcslfl),
526           .offset_words = 6,
527           .offset_bits  = 0,
528           .size_bits    = 32 },
529         { CLASSPORTINFO_REC_FIELD(redirect_lid),
530           .offset_words = 7,
531           .offset_bits  = 0,
532           .size_bits    = 16 },
533         { CLASSPORTINFO_REC_FIELD(redirect_pkey),
534           .offset_words = 7,
535           .offset_bits  = 16,
536           .size_bits    = 16 },
537
538         { CLASSPORTINFO_REC_FIELD(redirect_qp),
539           .offset_words = 8,
540           .offset_bits  = 0,
541           .size_bits    = 32 },
542         { CLASSPORTINFO_REC_FIELD(redirect_qkey),
543           .offset_words = 9,
544           .offset_bits  = 0,
545           .size_bits    = 32 },
546
547         { CLASSPORTINFO_REC_FIELD(trap_gid),
548           .offset_words = 10,
549           .offset_bits  = 0,
550           .size_bits    = 128 },
551         { CLASSPORTINFO_REC_FIELD(trap_tcslfl),
552           .offset_words = 14,
553           .offset_bits  = 0,
554           .size_bits    = 32 },
555
556         { CLASSPORTINFO_REC_FIELD(trap_lid),
557           .offset_words = 15,
558           .offset_bits  = 0,
559           .size_bits    = 16 },
560         { CLASSPORTINFO_REC_FIELD(trap_pkey),
561           .offset_words = 15,
562           .offset_bits  = 16,
563           .size_bits    = 16 },
564
565         { CLASSPORTINFO_REC_FIELD(trap_hlqp),
566           .offset_words = 16,
567           .offset_bits  = 0,
568           .size_bits    = 32 },
569         { CLASSPORTINFO_REC_FIELD(trap_qkey),
570           .offset_words = 17,
571           .offset_bits  = 0,
572           .size_bits    = 32 },
573 };
574
575 #define OPA_CLASSPORTINFO_REC_FIELD(field) \
576         .struct_offset_bytes =\
577                 offsetof(struct opa_class_port_info, field),    \
578         .struct_size_bytes   = \
579                 sizeof_field(struct opa_class_port_info, field),        \
580         .field_name          = "opa_class_port_info:" #field
581
582 static const struct ib_field opa_classport_info_rec_table[] = {
583         { OPA_CLASSPORTINFO_REC_FIELD(base_version),
584           .offset_words = 0,
585           .offset_bits  = 0,
586           .size_bits    = 8 },
587         { OPA_CLASSPORTINFO_REC_FIELD(class_version),
588           .offset_words = 0,
589           .offset_bits  = 8,
590           .size_bits    = 8 },
591         { OPA_CLASSPORTINFO_REC_FIELD(cap_mask),
592           .offset_words = 0,
593           .offset_bits  = 16,
594           .size_bits    = 16 },
595         { OPA_CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
596           .offset_words = 1,
597           .offset_bits  = 0,
598           .size_bits    = 32 },
599         { OPA_CLASSPORTINFO_REC_FIELD(redirect_gid),
600           .offset_words = 2,
601           .offset_bits  = 0,
602           .size_bits    = 128 },
603         { OPA_CLASSPORTINFO_REC_FIELD(redirect_tc_fl),
604           .offset_words = 6,
605           .offset_bits  = 0,
606           .size_bits    = 32 },
607         { OPA_CLASSPORTINFO_REC_FIELD(redirect_lid),
608           .offset_words = 7,
609           .offset_bits  = 0,
610           .size_bits    = 32 },
611         { OPA_CLASSPORTINFO_REC_FIELD(redirect_sl_qp),
612           .offset_words = 8,
613           .offset_bits  = 0,
614           .size_bits    = 32 },
615         { OPA_CLASSPORTINFO_REC_FIELD(redirect_qkey),
616           .offset_words = 9,
617           .offset_bits  = 0,
618           .size_bits    = 32 },
619         { OPA_CLASSPORTINFO_REC_FIELD(trap_gid),
620           .offset_words = 10,
621           .offset_bits  = 0,
622           .size_bits    = 128 },
623         { OPA_CLASSPORTINFO_REC_FIELD(trap_tc_fl),
624           .offset_words = 14,
625           .offset_bits  = 0,
626           .size_bits    = 32 },
627         { OPA_CLASSPORTINFO_REC_FIELD(trap_lid),
628           .offset_words = 15,
629           .offset_bits  = 0,
630           .size_bits    = 32 },
631         { OPA_CLASSPORTINFO_REC_FIELD(trap_hl_qp),
632           .offset_words = 16,
633           .offset_bits  = 0,
634           .size_bits    = 32 },
635         { OPA_CLASSPORTINFO_REC_FIELD(trap_qkey),
636           .offset_words = 17,
637           .offset_bits  = 0,
638           .size_bits    = 32 },
639         { OPA_CLASSPORTINFO_REC_FIELD(trap_pkey),
640           .offset_words = 18,
641           .offset_bits  = 0,
642           .size_bits    = 16 },
643         { OPA_CLASSPORTINFO_REC_FIELD(redirect_pkey),
644           .offset_words = 18,
645           .offset_bits  = 16,
646           .size_bits    = 16 },
647         { OPA_CLASSPORTINFO_REC_FIELD(trap_sl_rsvd),
648           .offset_words = 19,
649           .offset_bits  = 0,
650           .size_bits    = 8 },
651         { RESERVED,
652           .offset_words = 19,
653           .offset_bits  = 8,
654           .size_bits    = 24 },
655 };
656
657 #define GUIDINFO_REC_FIELD(field) \
658         .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field),      \
659         .struct_size_bytes   = sizeof_field(struct ib_sa_guidinfo_rec, field),  \
660         .field_name          = "sa_guidinfo_rec:" #field
661
662 static const struct ib_field guidinfo_rec_table[] = {
663         { GUIDINFO_REC_FIELD(lid),
664           .offset_words = 0,
665           .offset_bits  = 0,
666           .size_bits    = 16 },
667         { GUIDINFO_REC_FIELD(block_num),
668           .offset_words = 0,
669           .offset_bits  = 16,
670           .size_bits    = 8 },
671         { GUIDINFO_REC_FIELD(res1),
672           .offset_words = 0,
673           .offset_bits  = 24,
674           .size_bits    = 8 },
675         { GUIDINFO_REC_FIELD(res2),
676           .offset_words = 1,
677           .offset_bits  = 0,
678           .size_bits    = 32 },
679         { GUIDINFO_REC_FIELD(guid_info_list),
680           .offset_words = 2,
681           .offset_bits  = 0,
682           .size_bits    = 512 },
683 };
684
685 static inline void ib_sa_disable_local_svc(struct ib_sa_query *query)
686 {
687         query->flags &= ~IB_SA_ENABLE_LOCAL_SERVICE;
688 }
689
690 static inline int ib_sa_query_cancelled(struct ib_sa_query *query)
691 {
692         return (query->flags & IB_SA_CANCEL);
693 }
694
695 static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
696                                      struct ib_sa_query *query)
697 {
698         struct sa_path_rec *sa_rec = query->mad_buf->context[1];
699         struct ib_sa_mad *mad = query->mad_buf->mad;
700         ib_sa_comp_mask comp_mask = mad->sa_hdr.comp_mask;
701         u16 val16;
702         u64 val64;
703         struct rdma_ls_resolve_header *header;
704
705         query->mad_buf->context[1] = NULL;
706
707         /* Construct the family header first */
708         header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
709         strscpy_pad(header->device_name,
710                     dev_name(&query->port->agent->device->dev),
711                     LS_DEVICE_NAME_MAX);
712         header->port_num = query->port->port_num;
713
714         if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) &&
715             sa_rec->reversible != 0)
716                 query->path_use = LS_RESOLVE_PATH_USE_GMP;
717         else
718                 query->path_use = LS_RESOLVE_PATH_USE_UNIDIRECTIONAL;
719         header->path_use = query->path_use;
720
721         /* Now build the attributes */
722         if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) {
723                 val64 = be64_to_cpu(sa_rec->service_id);
724                 nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SERVICE_ID,
725                         sizeof(val64), &val64);
726         }
727         if (comp_mask & IB_SA_PATH_REC_DGID)
728                 nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_DGID,
729                         sizeof(sa_rec->dgid), &sa_rec->dgid);
730         if (comp_mask & IB_SA_PATH_REC_SGID)
731                 nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SGID,
732                         sizeof(sa_rec->sgid), &sa_rec->sgid);
733         if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS)
734                 nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_TCLASS,
735                         sizeof(sa_rec->traffic_class), &sa_rec->traffic_class);
736
737         if (comp_mask & IB_SA_PATH_REC_PKEY) {
738                 val16 = be16_to_cpu(sa_rec->pkey);
739                 nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_PKEY,
740                         sizeof(val16), &val16);
741         }
742         if (comp_mask & IB_SA_PATH_REC_QOS_CLASS) {
743                 val16 = be16_to_cpu(sa_rec->qos_class);
744                 nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_QOS_CLASS,
745                         sizeof(val16), &val16);
746         }
747 }
748
749 static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask)
750 {
751         int len = 0;
752
753         if (comp_mask & IB_SA_PATH_REC_SERVICE_ID)
754                 len += nla_total_size(sizeof(u64));
755         if (comp_mask & IB_SA_PATH_REC_DGID)
756                 len += nla_total_size(sizeof(struct rdma_nla_ls_gid));
757         if (comp_mask & IB_SA_PATH_REC_SGID)
758                 len += nla_total_size(sizeof(struct rdma_nla_ls_gid));
759         if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS)
760                 len += nla_total_size(sizeof(u8));
761         if (comp_mask & IB_SA_PATH_REC_PKEY)
762                 len += nla_total_size(sizeof(u16));
763         if (comp_mask & IB_SA_PATH_REC_QOS_CLASS)
764                 len += nla_total_size(sizeof(u16));
765
766         /*
767          * Make sure that at least some of the required comp_mask bits are
768          * set.
769          */
770         if (WARN_ON(len == 0))
771                 return len;
772
773         /* Add the family header */
774         len += NLMSG_ALIGN(sizeof(struct rdma_ls_resolve_header));
775
776         return len;
777 }
778
779 static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
780 {
781         struct sk_buff *skb = NULL;
782         struct nlmsghdr *nlh;
783         void *data;
784         struct ib_sa_mad *mad;
785         int len;
786         unsigned long flags;
787         unsigned long delay;
788         gfp_t gfp_flag;
789         int ret;
790
791         INIT_LIST_HEAD(&query->list);
792         query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
793
794         mad = query->mad_buf->mad;
795         len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask);
796         if (len <= 0)
797                 return -EMSGSIZE;
798
799         skb = nlmsg_new(len, gfp_mask);
800         if (!skb)
801                 return -ENOMEM;
802
803         /* Put nlmsg header only for now */
804         data = ibnl_put_msg(skb, &nlh, query->seq, 0, RDMA_NL_LS,
805                             RDMA_NL_LS_OP_RESOLVE, NLM_F_REQUEST);
806         if (!data) {
807                 nlmsg_free(skb);
808                 return -EMSGSIZE;
809         }
810
811         /* Add attributes */
812         ib_nl_set_path_rec_attrs(skb, query);
813
814         /* Repair the nlmsg header length */
815         nlmsg_end(skb, nlh);
816
817         gfp_flag = ((gfp_mask & GFP_ATOMIC) == GFP_ATOMIC) ? GFP_ATOMIC :
818                 GFP_NOWAIT;
819
820         spin_lock_irqsave(&ib_nl_request_lock, flags);
821         ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_flag);
822
823         if (ret)
824                 goto out;
825
826         /* Put the request on the list.*/
827         delay = msecs_to_jiffies(sa_local_svc_timeout_ms);
828         query->timeout = delay + jiffies;
829         list_add_tail(&query->list, &ib_nl_request_list);
830         /* Start the timeout if this is the only request */
831         if (ib_nl_request_list.next == &query->list)
832                 queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
833
834 out:
835         spin_unlock_irqrestore(&ib_nl_request_lock, flags);
836
837         return ret;
838 }
839
840 static int ib_nl_cancel_request(struct ib_sa_query *query)
841 {
842         unsigned long flags;
843         struct ib_sa_query *wait_query;
844         int found = 0;
845
846         spin_lock_irqsave(&ib_nl_request_lock, flags);
847         list_for_each_entry(wait_query, &ib_nl_request_list, list) {
848                 /* Let the timeout to take care of the callback */
849                 if (query == wait_query) {
850                         query->flags |= IB_SA_CANCEL;
851                         query->timeout = jiffies;
852                         list_move(&query->list, &ib_nl_request_list);
853                         found = 1;
854                         mod_delayed_work(ib_nl_wq, &ib_nl_timed_work, 1);
855                         break;
856                 }
857         }
858         spin_unlock_irqrestore(&ib_nl_request_lock, flags);
859
860         return found;
861 }
862
863 static void send_handler(struct ib_mad_agent *agent,
864                          struct ib_mad_send_wc *mad_send_wc);
865
866 static void ib_nl_process_good_resolve_rsp(struct ib_sa_query *query,
867                                            const struct nlmsghdr *nlh)
868 {
869         struct ib_mad_send_wc mad_send_wc;
870         struct ib_sa_mad *mad = NULL;
871         const struct nlattr *head, *curr;
872         struct ib_path_rec_data  *rec;
873         int len, rem;
874         u32 mask = 0;
875         int status = -EIO;
876
877         if (query->callback) {
878                 head = (const struct nlattr *) nlmsg_data(nlh);
879                 len = nlmsg_len(nlh);
880                 switch (query->path_use) {
881                 case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL:
882                         mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND;
883                         break;
884
885                 case LS_RESOLVE_PATH_USE_ALL:
886                 case LS_RESOLVE_PATH_USE_GMP:
887                 default:
888                         mask = IB_PATH_PRIMARY | IB_PATH_GMP |
889                                 IB_PATH_BIDIRECTIONAL;
890                         break;
891                 }
892                 nla_for_each_attr(curr, head, len, rem) {
893                         if (curr->nla_type == LS_NLA_TYPE_PATH_RECORD) {
894                                 rec = nla_data(curr);
895                                 /*
896                                  * Get the first one. In the future, we may
897                                  * need to get up to 6 pathrecords.
898                                  */
899                                 if ((rec->flags & mask) == mask) {
900                                         mad = query->mad_buf->mad;
901                                         mad->mad_hdr.method |=
902                                                 IB_MGMT_METHOD_RESP;
903                                         memcpy(mad->data, rec->path_rec,
904                                                sizeof(rec->path_rec));
905                                         status = 0;
906                                         break;
907                                 }
908                         }
909                 }
910                 query->callback(query, status, mad);
911         }
912
913         mad_send_wc.send_buf = query->mad_buf;
914         mad_send_wc.status = IB_WC_SUCCESS;
915         send_handler(query->mad_buf->mad_agent, &mad_send_wc);
916 }
917
918 static void ib_nl_request_timeout(struct work_struct *work)
919 {
920         unsigned long flags;
921         struct ib_sa_query *query;
922         unsigned long delay;
923         struct ib_mad_send_wc mad_send_wc;
924         int ret;
925
926         spin_lock_irqsave(&ib_nl_request_lock, flags);
927         while (!list_empty(&ib_nl_request_list)) {
928                 query = list_entry(ib_nl_request_list.next,
929                                    struct ib_sa_query, list);
930
931                 if (time_after(query->timeout, jiffies)) {
932                         delay = query->timeout - jiffies;
933                         if ((long)delay <= 0)
934                                 delay = 1;
935                         queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
936                         break;
937                 }
938
939                 list_del(&query->list);
940                 ib_sa_disable_local_svc(query);
941                 /* Hold the lock to protect against query cancellation */
942                 if (ib_sa_query_cancelled(query))
943                         ret = -1;
944                 else
945                         ret = ib_post_send_mad(query->mad_buf, NULL);
946                 if (ret) {
947                         mad_send_wc.send_buf = query->mad_buf;
948                         mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
949                         spin_unlock_irqrestore(&ib_nl_request_lock, flags);
950                         send_handler(query->port->agent, &mad_send_wc);
951                         spin_lock_irqsave(&ib_nl_request_lock, flags);
952                 }
953         }
954         spin_unlock_irqrestore(&ib_nl_request_lock, flags);
955 }
956
957 int ib_nl_handle_set_timeout(struct sk_buff *skb,
958                              struct nlmsghdr *nlh,
959                              struct netlink_ext_ack *extack)
960 {
961         int timeout, delta, abs_delta;
962         const struct nlattr *attr;
963         unsigned long flags;
964         struct ib_sa_query *query;
965         long delay = 0;
966         struct nlattr *tb[LS_NLA_TYPE_MAX];
967         int ret;
968
969         if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
970             !(NETLINK_CB(skb).sk))
971                 return -EPERM;
972
973         ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
974                                    nlmsg_len(nlh), ib_nl_policy, NULL);
975         attr = (const struct nlattr *)tb[LS_NLA_TYPE_TIMEOUT];
976         if (ret || !attr)
977                 goto settimeout_out;
978
979         timeout = *(int *) nla_data(attr);
980         if (timeout < IB_SA_LOCAL_SVC_TIMEOUT_MIN)
981                 timeout = IB_SA_LOCAL_SVC_TIMEOUT_MIN;
982         if (timeout > IB_SA_LOCAL_SVC_TIMEOUT_MAX)
983                 timeout = IB_SA_LOCAL_SVC_TIMEOUT_MAX;
984
985         delta = timeout - sa_local_svc_timeout_ms;
986         if (delta < 0)
987                 abs_delta = -delta;
988         else
989                 abs_delta = delta;
990
991         if (delta != 0) {
992                 spin_lock_irqsave(&ib_nl_request_lock, flags);
993                 sa_local_svc_timeout_ms = timeout;
994                 list_for_each_entry(query, &ib_nl_request_list, list) {
995                         if (delta < 0 && abs_delta > query->timeout)
996                                 query->timeout = 0;
997                         else
998                                 query->timeout += delta;
999
1000                         /* Get the new delay from the first entry */
1001                         if (!delay) {
1002                                 delay = query->timeout - jiffies;
1003                                 if (delay <= 0)
1004                                         delay = 1;
1005                         }
1006                 }
1007                 if (delay)
1008                         mod_delayed_work(ib_nl_wq, &ib_nl_timed_work,
1009                                          (unsigned long)delay);
1010                 spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1011         }
1012
1013 settimeout_out:
1014         return 0;
1015 }
1016
1017 static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
1018 {
1019         struct nlattr *tb[LS_NLA_TYPE_MAX];
1020         int ret;
1021
1022         if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
1023                 return 0;
1024
1025         ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
1026                                    nlmsg_len(nlh), ib_nl_policy, NULL);
1027         if (ret)
1028                 return 0;
1029
1030         return 1;
1031 }
1032
1033 int ib_nl_handle_resolve_resp(struct sk_buff *skb,
1034                               struct nlmsghdr *nlh,
1035                               struct netlink_ext_ack *extack)
1036 {
1037         unsigned long flags;
1038         struct ib_sa_query *query;
1039         struct ib_mad_send_buf *send_buf;
1040         struct ib_mad_send_wc mad_send_wc;
1041         int found = 0;
1042         int ret;
1043
1044         if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
1045             !(NETLINK_CB(skb).sk))
1046                 return -EPERM;
1047
1048         spin_lock_irqsave(&ib_nl_request_lock, flags);
1049         list_for_each_entry(query, &ib_nl_request_list, list) {
1050                 /*
1051                  * If the query is cancelled, let the timeout routine
1052                  * take care of it.
1053                  */
1054                 if (nlh->nlmsg_seq == query->seq) {
1055                         found = !ib_sa_query_cancelled(query);
1056                         if (found)
1057                                 list_del(&query->list);
1058                         break;
1059                 }
1060         }
1061
1062         if (!found) {
1063                 spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1064                 goto resp_out;
1065         }
1066
1067         send_buf = query->mad_buf;
1068
1069         if (!ib_nl_is_good_resolve_resp(nlh)) {
1070                 /* if the result is a failure, send out the packet via IB */
1071                 ib_sa_disable_local_svc(query);
1072                 ret = ib_post_send_mad(query->mad_buf, NULL);
1073                 spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1074                 if (ret) {
1075                         mad_send_wc.send_buf = send_buf;
1076                         mad_send_wc.status = IB_WC_GENERAL_ERR;
1077                         send_handler(query->port->agent, &mad_send_wc);
1078                 }
1079         } else {
1080                 spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1081                 ib_nl_process_good_resolve_rsp(query, nlh);
1082         }
1083
1084 resp_out:
1085         return 0;
1086 }
1087
1088 static void free_sm_ah(struct kref *kref)
1089 {
1090         struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
1091
1092         rdma_destroy_ah(sm_ah->ah, 0);
1093         kfree(sm_ah);
1094 }
1095
1096 void ib_sa_register_client(struct ib_sa_client *client)
1097 {
1098         atomic_set(&client->users, 1);
1099         init_completion(&client->comp);
1100 }
1101 EXPORT_SYMBOL(ib_sa_register_client);
1102
1103 void ib_sa_unregister_client(struct ib_sa_client *client)
1104 {
1105         ib_sa_client_put(client);
1106         wait_for_completion(&client->comp);
1107 }
1108 EXPORT_SYMBOL(ib_sa_unregister_client);
1109
1110 /**
1111  * ib_sa_cancel_query - try to cancel an SA query
1112  * @id:ID of query to cancel
1113  * @query:query pointer to cancel
1114  *
1115  * Try to cancel an SA query.  If the id and query don't match up or
1116  * the query has already completed, nothing is done.  Otherwise the
1117  * query is canceled and will complete with a status of -EINTR.
1118  */
1119 void ib_sa_cancel_query(int id, struct ib_sa_query *query)
1120 {
1121         unsigned long flags;
1122         struct ib_mad_send_buf *mad_buf;
1123
1124         xa_lock_irqsave(&queries, flags);
1125         if (xa_load(&queries, id) != query) {
1126                 xa_unlock_irqrestore(&queries, flags);
1127                 return;
1128         }
1129         mad_buf = query->mad_buf;
1130         xa_unlock_irqrestore(&queries, flags);
1131
1132         /*
1133          * If the query is still on the netlink request list, schedule
1134          * it to be cancelled by the timeout routine. Otherwise, it has been
1135          * sent to the MAD layer and has to be cancelled from there.
1136          */
1137         if (!ib_nl_cancel_request(query))
1138                 ib_cancel_mad(mad_buf);
1139 }
1140 EXPORT_SYMBOL(ib_sa_cancel_query);
1141
1142 static u8 get_src_path_mask(struct ib_device *device, u32 port_num)
1143 {
1144         struct ib_sa_device *sa_dev;
1145         struct ib_sa_port   *port;
1146         unsigned long flags;
1147         u8 src_path_mask;
1148
1149         sa_dev = ib_get_client_data(device, &sa_client);
1150         if (!sa_dev)
1151                 return 0x7f;
1152
1153         port  = &sa_dev->port[port_num - sa_dev->start_port];
1154         spin_lock_irqsave(&port->ah_lock, flags);
1155         src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f;
1156         spin_unlock_irqrestore(&port->ah_lock, flags);
1157
1158         return src_path_mask;
1159 }
1160
1161 static int init_ah_attr_grh_fields(struct ib_device *device, u32 port_num,
1162                                    struct sa_path_rec *rec,
1163                                    struct rdma_ah_attr *ah_attr,
1164                                    const struct ib_gid_attr *gid_attr)
1165 {
1166         enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
1167
1168         if (!gid_attr) {
1169                 gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type,
1170                                                  port_num, NULL);
1171                 if (IS_ERR(gid_attr))
1172                         return PTR_ERR(gid_attr);
1173         } else
1174                 rdma_hold_gid_attr(gid_attr);
1175
1176         rdma_move_grh_sgid_attr(ah_attr, &rec->dgid,
1177                                 be32_to_cpu(rec->flow_label),
1178                                 rec->hop_limit, rec->traffic_class,
1179                                 gid_attr);
1180         return 0;
1181 }
1182
1183 /**
1184  * ib_init_ah_attr_from_path - Initialize address handle attributes based on
1185  *   an SA path record.
1186  * @device: Device associated ah attributes initialization.
1187  * @port_num: Port on the specified device.
1188  * @rec: path record entry to use for ah attributes initialization.
1189  * @ah_attr: address handle attributes to initialization from path record.
1190  * @gid_attr: SGID attribute to consider during initialization.
1191  *
1192  * When ib_init_ah_attr_from_path() returns success,
1193  * (a) for IB link layer it optionally contains a reference to SGID attribute
1194  * when GRH is present for IB link layer.
1195  * (b) for RoCE link layer it contains a reference to SGID attribute.
1196  * User must invoke rdma_destroy_ah_attr() to release reference to SGID
1197  * attributes which are initialized using ib_init_ah_attr_from_path().
1198  */
1199 int ib_init_ah_attr_from_path(struct ib_device *device, u32 port_num,
1200                               struct sa_path_rec *rec,
1201                               struct rdma_ah_attr *ah_attr,
1202                               const struct ib_gid_attr *gid_attr)
1203 {
1204         int ret = 0;
1205
1206         memset(ah_attr, 0, sizeof(*ah_attr));
1207         ah_attr->type = rdma_ah_find_type(device, port_num);
1208         rdma_ah_set_sl(ah_attr, rec->sl);
1209         rdma_ah_set_port_num(ah_attr, port_num);
1210         rdma_ah_set_static_rate(ah_attr, rec->rate);
1211
1212         if (sa_path_is_roce(rec)) {
1213                 ret = roce_resolve_route_from_path(rec, gid_attr);
1214                 if (ret)
1215                         return ret;
1216
1217                 memcpy(ah_attr->roce.dmac, sa_path_get_dmac(rec), ETH_ALEN);
1218         } else {
1219                 rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
1220                 if (sa_path_is_opa(rec) &&
1221                     rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE))
1222                         rdma_ah_set_make_grd(ah_attr, true);
1223
1224                 rdma_ah_set_path_bits(ah_attr,
1225                                       be32_to_cpu(sa_path_get_slid(rec)) &
1226                                       get_src_path_mask(device, port_num));
1227         }
1228
1229         if (rec->hop_limit > 0 || sa_path_is_roce(rec))
1230                 ret = init_ah_attr_grh_fields(device, port_num,
1231                                               rec, ah_attr, gid_attr);
1232         return ret;
1233 }
1234 EXPORT_SYMBOL(ib_init_ah_attr_from_path);
1235
1236 static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
1237 {
1238         struct rdma_ah_attr ah_attr;
1239         unsigned long flags;
1240
1241         spin_lock_irqsave(&query->port->ah_lock, flags);
1242         if (!query->port->sm_ah) {
1243                 spin_unlock_irqrestore(&query->port->ah_lock, flags);
1244                 return -EAGAIN;
1245         }
1246         kref_get(&query->port->sm_ah->ref);
1247         query->sm_ah = query->port->sm_ah;
1248         spin_unlock_irqrestore(&query->port->ah_lock, flags);
1249
1250         /*
1251          * Always check if sm_ah has valid dlid assigned,
1252          * before querying for class port info
1253          */
1254         if ((rdma_query_ah(query->sm_ah->ah, &ah_attr) < 0) ||
1255             !rdma_is_valid_unicast_lid(&ah_attr)) {
1256                 kref_put(&query->sm_ah->ref, free_sm_ah);
1257                 return -EAGAIN;
1258         }
1259         query->mad_buf = ib_create_send_mad(query->port->agent, 1,
1260                                             query->sm_ah->pkey_index,
1261                                             0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
1262                                             gfp_mask,
1263                                             ((query->flags & IB_SA_QUERY_OPA) ?
1264                                              OPA_MGMT_BASE_VERSION :
1265                                              IB_MGMT_BASE_VERSION));
1266         if (IS_ERR(query->mad_buf)) {
1267                 kref_put(&query->sm_ah->ref, free_sm_ah);
1268                 return -ENOMEM;
1269         }
1270
1271         query->mad_buf->ah = query->sm_ah->ah;
1272
1273         return 0;
1274 }
1275
1276 static void free_mad(struct ib_sa_query *query)
1277 {
1278         ib_free_send_mad(query->mad_buf);
1279         kref_put(&query->sm_ah->ref, free_sm_ah);
1280 }
1281
1282 static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent)
1283 {
1284         struct ib_sa_mad *mad = query->mad_buf->mad;
1285         unsigned long flags;
1286
1287         memset(mad, 0, sizeof *mad);
1288
1289         if (query->flags & IB_SA_QUERY_OPA) {
1290                 mad->mad_hdr.base_version  = OPA_MGMT_BASE_VERSION;
1291                 mad->mad_hdr.class_version = OPA_SA_CLASS_VERSION;
1292         } else {
1293                 mad->mad_hdr.base_version  = IB_MGMT_BASE_VERSION;
1294                 mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
1295         }
1296         mad->mad_hdr.mgmt_class    = IB_MGMT_CLASS_SUBN_ADM;
1297         spin_lock_irqsave(&tid_lock, flags);
1298         mad->mad_hdr.tid           =
1299                 cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
1300         spin_unlock_irqrestore(&tid_lock, flags);
1301 }
1302
1303 static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
1304                     gfp_t gfp_mask)
1305 {
1306         unsigned long flags;
1307         int ret, id;
1308         const int nmbr_sa_query_retries = 10;
1309
1310         xa_lock_irqsave(&queries, flags);
1311         ret = __xa_alloc(&queries, &id, query, xa_limit_32b, gfp_mask);
1312         xa_unlock_irqrestore(&queries, flags);
1313         if (ret < 0)
1314                 return ret;
1315
1316         query->mad_buf->timeout_ms  = timeout_ms / nmbr_sa_query_retries;
1317         query->mad_buf->retries = nmbr_sa_query_retries;
1318         if (!query->mad_buf->timeout_ms) {
1319                 /* Special case, very small timeout_ms */
1320                 query->mad_buf->timeout_ms = 1;
1321                 query->mad_buf->retries = timeout_ms;
1322         }
1323         query->mad_buf->context[0] = query;
1324         query->id = id;
1325
1326         if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) &&
1327             (!(query->flags & IB_SA_QUERY_OPA))) {
1328                 if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) {
1329                         if (!ib_nl_make_request(query, gfp_mask))
1330                                 return id;
1331                 }
1332                 ib_sa_disable_local_svc(query);
1333         }
1334
1335         ret = ib_post_send_mad(query->mad_buf, NULL);
1336         if (ret) {
1337                 xa_lock_irqsave(&queries, flags);
1338                 __xa_erase(&queries, id);
1339                 xa_unlock_irqrestore(&queries, flags);
1340         }
1341
1342         /*
1343          * It's not safe to dereference query any more, because the
1344          * send may already have completed and freed the query in
1345          * another context.
1346          */
1347         return ret ? ret : id;
1348 }
1349
1350 void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec)
1351 {
1352         ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
1353 }
1354 EXPORT_SYMBOL(ib_sa_unpack_path);
1355
1356 void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute)
1357 {
1358         ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
1359 }
1360 EXPORT_SYMBOL(ib_sa_pack_path);
1361
1362 static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client,
1363                                          struct ib_sa_device *sa_dev,
1364                                          u32 port_num)
1365 {
1366         struct ib_sa_port *port;
1367         unsigned long flags;
1368         bool ret = false;
1369
1370         port = &sa_dev->port[port_num - sa_dev->start_port];
1371         spin_lock_irqsave(&port->classport_lock, flags);
1372         if (!port->classport_info.valid)
1373                 goto ret;
1374
1375         if (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_OPA)
1376                 ret = opa_get_cpi_capmask2(&port->classport_info.data.opa) &
1377                         OPA_CLASS_PORT_INFO_PR_SUPPORT;
1378 ret:
1379         spin_unlock_irqrestore(&port->classport_lock, flags);
1380         return ret;
1381 }
1382
1383 enum opa_pr_supported {
1384         PR_NOT_SUPPORTED,
1385         PR_OPA_SUPPORTED,
1386         PR_IB_SUPPORTED
1387 };
1388
1389 /*
1390  * opa_pr_query_possible - Check if current PR query can be an OPA query.
1391  *
1392  * Retuns PR_NOT_SUPPORTED if a path record query is not
1393  * possible, PR_OPA_SUPPORTED if an OPA path record query
1394  * is possible and PR_IB_SUPPORTED if an IB path record
1395  * query is possible.
1396  */
1397 static int opa_pr_query_possible(struct ib_sa_client *client,
1398                                  struct ib_sa_device *sa_dev,
1399                                  struct ib_device *device, u32 port_num)
1400 {
1401         struct ib_port_attr port_attr;
1402
1403         if (ib_query_port(device, port_num, &port_attr))
1404                 return PR_NOT_SUPPORTED;
1405
1406         if (ib_sa_opa_pathrecord_support(client, sa_dev, port_num))
1407                 return PR_OPA_SUPPORTED;
1408
1409         if (port_attr.lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
1410                 return PR_NOT_SUPPORTED;
1411         else
1412                 return PR_IB_SUPPORTED;
1413 }
1414
1415 static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
1416                                     int status,
1417                                     struct ib_sa_mad *mad)
1418 {
1419         struct ib_sa_path_query *query =
1420                 container_of(sa_query, struct ib_sa_path_query, sa_query);
1421
1422         if (mad) {
1423                 struct sa_path_rec rec;
1424
1425                 if (sa_query->flags & IB_SA_QUERY_OPA) {
1426                         ib_unpack(opa_path_rec_table,
1427                                   ARRAY_SIZE(opa_path_rec_table),
1428                                   mad->data, &rec);
1429                         rec.rec_type = SA_PATH_REC_TYPE_OPA;
1430                         query->callback(status, &rec, query->context);
1431                 } else {
1432                         ib_unpack(path_rec_table,
1433                                   ARRAY_SIZE(path_rec_table),
1434                                   mad->data, &rec);
1435                         rec.rec_type = SA_PATH_REC_TYPE_IB;
1436                         sa_path_set_dmac_zero(&rec);
1437
1438                         if (query->conv_pr) {
1439                                 struct sa_path_rec opa;
1440
1441                                 memset(&opa, 0, sizeof(struct sa_path_rec));
1442                                 sa_convert_path_ib_to_opa(&opa, &rec);
1443                                 query->callback(status, &opa, query->context);
1444                         } else {
1445                                 query->callback(status, &rec, query->context);
1446                         }
1447                 }
1448         } else
1449                 query->callback(status, NULL, query->context);
1450 }
1451
1452 static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
1453 {
1454         struct ib_sa_path_query *query =
1455                 container_of(sa_query, struct ib_sa_path_query, sa_query);
1456
1457         kfree(query->conv_pr);
1458         kfree(query);
1459 }
1460
1461 /**
1462  * ib_sa_path_rec_get - Start a Path get query
1463  * @client:SA client
1464  * @device:device to send query on
1465  * @port_num: port number to send query on
1466  * @rec:Path Record to send in query
1467  * @comp_mask:component mask to send in query
1468  * @timeout_ms:time to wait for response
1469  * @gfp_mask:GFP mask to use for internal allocations
1470  * @callback:function called when query completes, times out or is
1471  * canceled
1472  * @context:opaque user context passed to callback
1473  * @sa_query:query context, used to cancel query
1474  *
1475  * Send a Path Record Get query to the SA to look up a path.  The
1476  * callback function will be called when the query completes (or
1477  * fails); status is 0 for a successful response, -EINTR if the query
1478  * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
1479  * occurred sending the query.  The resp parameter of the callback is
1480  * only valid if status is 0.
1481  *
1482  * If the return value of ib_sa_path_rec_get() is negative, it is an
1483  * error code.  Otherwise it is a query ID that can be used to cancel
1484  * the query.
1485  */
1486 int ib_sa_path_rec_get(struct ib_sa_client *client,
1487                        struct ib_device *device, u32 port_num,
1488                        struct sa_path_rec *rec,
1489                        ib_sa_comp_mask comp_mask,
1490                        unsigned long timeout_ms, gfp_t gfp_mask,
1491                        void (*callback)(int status,
1492                                         struct sa_path_rec *resp,
1493                                         void *context),
1494                        void *context,
1495                        struct ib_sa_query **sa_query)
1496 {
1497         struct ib_sa_path_query *query;
1498         struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1499         struct ib_sa_port   *port;
1500         struct ib_mad_agent *agent;
1501         struct ib_sa_mad *mad;
1502         enum opa_pr_supported status;
1503         int ret;
1504
1505         if (!sa_dev)
1506                 return -ENODEV;
1507
1508         if ((rec->rec_type != SA_PATH_REC_TYPE_IB) &&
1509             (rec->rec_type != SA_PATH_REC_TYPE_OPA))
1510                 return -EINVAL;
1511
1512         port  = &sa_dev->port[port_num - sa_dev->start_port];
1513         agent = port->agent;
1514
1515         query = kzalloc(sizeof(*query), gfp_mask);
1516         if (!query)
1517                 return -ENOMEM;
1518
1519         query->sa_query.port     = port;
1520         if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
1521                 status = opa_pr_query_possible(client, sa_dev, device, port_num);
1522                 if (status == PR_NOT_SUPPORTED) {
1523                         ret = -EINVAL;
1524                         goto err1;
1525                 } else if (status == PR_OPA_SUPPORTED) {
1526                         query->sa_query.flags |= IB_SA_QUERY_OPA;
1527                 } else {
1528                         query->conv_pr =
1529                                 kmalloc(sizeof(*query->conv_pr), gfp_mask);
1530                         if (!query->conv_pr) {
1531                                 ret = -ENOMEM;
1532                                 goto err1;
1533                         }
1534                 }
1535         }
1536
1537         ret = alloc_mad(&query->sa_query, gfp_mask);
1538         if (ret)
1539                 goto err2;
1540
1541         ib_sa_client_get(client);
1542         query->sa_query.client = client;
1543         query->callback        = callback;
1544         query->context         = context;
1545
1546         mad = query->sa_query.mad_buf->mad;
1547         init_mad(&query->sa_query, agent);
1548
1549         query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
1550         query->sa_query.release  = ib_sa_path_rec_release;
1551         mad->mad_hdr.method      = IB_MGMT_METHOD_GET;
1552         mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_PATH_REC);
1553         mad->sa_hdr.comp_mask    = comp_mask;
1554
1555         if (query->sa_query.flags & IB_SA_QUERY_OPA) {
1556                 ib_pack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table),
1557                         rec, mad->data);
1558         } else if (query->conv_pr) {
1559                 sa_convert_path_opa_to_ib(query->conv_pr, rec);
1560                 ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
1561                         query->conv_pr, mad->data);
1562         } else {
1563                 ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
1564                         rec, mad->data);
1565         }
1566
1567         *sa_query = &query->sa_query;
1568
1569         query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE;
1570         query->sa_query.mad_buf->context[1] = (query->conv_pr) ?
1571                                                 query->conv_pr : rec;
1572
1573         ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1574         if (ret < 0)
1575                 goto err3;
1576
1577         return ret;
1578
1579 err3:
1580         *sa_query = NULL;
1581         ib_sa_client_put(query->sa_query.client);
1582         free_mad(&query->sa_query);
1583 err2:
1584         kfree(query->conv_pr);
1585 err1:
1586         kfree(query);
1587         return ret;
1588 }
1589 EXPORT_SYMBOL(ib_sa_path_rec_get);
1590
1591 static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
1592                                         int status,
1593                                         struct ib_sa_mad *mad)
1594 {
1595         struct ib_sa_mcmember_query *query =
1596                 container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
1597
1598         if (mad) {
1599                 struct ib_sa_mcmember_rec rec;
1600
1601                 ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
1602                           mad->data, &rec);
1603                 query->callback(status, &rec, query->context);
1604         } else
1605                 query->callback(status, NULL, query->context);
1606 }
1607
1608 static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
1609 {
1610         kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
1611 }
1612
1613 int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
1614                              struct ib_device *device, u32 port_num,
1615                              u8 method,
1616                              struct ib_sa_mcmember_rec *rec,
1617                              ib_sa_comp_mask comp_mask,
1618                              unsigned long timeout_ms, gfp_t gfp_mask,
1619                              void (*callback)(int status,
1620                                               struct ib_sa_mcmember_rec *resp,
1621                                               void *context),
1622                              void *context,
1623                              struct ib_sa_query **sa_query)
1624 {
1625         struct ib_sa_mcmember_query *query;
1626         struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1627         struct ib_sa_port   *port;
1628         struct ib_mad_agent *agent;
1629         struct ib_sa_mad *mad;
1630         int ret;
1631
1632         if (!sa_dev)
1633                 return -ENODEV;
1634
1635         port  = &sa_dev->port[port_num - sa_dev->start_port];
1636         agent = port->agent;
1637
1638         query = kzalloc(sizeof(*query), gfp_mask);
1639         if (!query)
1640                 return -ENOMEM;
1641
1642         query->sa_query.port     = port;
1643         ret = alloc_mad(&query->sa_query, gfp_mask);
1644         if (ret)
1645                 goto err1;
1646
1647         ib_sa_client_get(client);
1648         query->sa_query.client = client;
1649         query->callback        = callback;
1650         query->context         = context;
1651
1652         mad = query->sa_query.mad_buf->mad;
1653         init_mad(&query->sa_query, agent);
1654
1655         query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
1656         query->sa_query.release  = ib_sa_mcmember_rec_release;
1657         mad->mad_hdr.method      = method;
1658         mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
1659         mad->sa_hdr.comp_mask    = comp_mask;
1660
1661         ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
1662                 rec, mad->data);
1663
1664         *sa_query = &query->sa_query;
1665
1666         ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1667         if (ret < 0)
1668                 goto err2;
1669
1670         return ret;
1671
1672 err2:
1673         *sa_query = NULL;
1674         ib_sa_client_put(query->sa_query.client);
1675         free_mad(&query->sa_query);
1676
1677 err1:
1678         kfree(query);
1679         return ret;
1680 }
1681
1682 /* Support GuidInfoRecord */
1683 static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query,
1684                                         int status,
1685                                         struct ib_sa_mad *mad)
1686 {
1687         struct ib_sa_guidinfo_query *query =
1688                 container_of(sa_query, struct ib_sa_guidinfo_query, sa_query);
1689
1690         if (mad) {
1691                 struct ib_sa_guidinfo_rec rec;
1692
1693                 ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table),
1694                           mad->data, &rec);
1695                 query->callback(status, &rec, query->context);
1696         } else
1697                 query->callback(status, NULL, query->context);
1698 }
1699
1700 static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query)
1701 {
1702         kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query));
1703 }
1704
1705 int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
1706                               struct ib_device *device, u32 port_num,
1707                               struct ib_sa_guidinfo_rec *rec,
1708                               ib_sa_comp_mask comp_mask, u8 method,
1709                               unsigned long timeout_ms, gfp_t gfp_mask,
1710                               void (*callback)(int status,
1711                                                struct ib_sa_guidinfo_rec *resp,
1712                                                void *context),
1713                               void *context,
1714                               struct ib_sa_query **sa_query)
1715 {
1716         struct ib_sa_guidinfo_query *query;
1717         struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1718         struct ib_sa_port *port;
1719         struct ib_mad_agent *agent;
1720         struct ib_sa_mad *mad;
1721         int ret;
1722
1723         if (!sa_dev)
1724                 return -ENODEV;
1725
1726         if (method != IB_MGMT_METHOD_GET &&
1727             method != IB_MGMT_METHOD_SET &&
1728             method != IB_SA_METHOD_DELETE) {
1729                 return -EINVAL;
1730         }
1731
1732         port  = &sa_dev->port[port_num - sa_dev->start_port];
1733         agent = port->agent;
1734
1735         query = kzalloc(sizeof(*query), gfp_mask);
1736         if (!query)
1737                 return -ENOMEM;
1738
1739         query->sa_query.port = port;
1740         ret = alloc_mad(&query->sa_query, gfp_mask);
1741         if (ret)
1742                 goto err1;
1743
1744         ib_sa_client_get(client);
1745         query->sa_query.client = client;
1746         query->callback        = callback;
1747         query->context         = context;
1748
1749         mad = query->sa_query.mad_buf->mad;
1750         init_mad(&query->sa_query, agent);
1751
1752         query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL;
1753         query->sa_query.release  = ib_sa_guidinfo_rec_release;
1754
1755         mad->mad_hdr.method      = method;
1756         mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC);
1757         mad->sa_hdr.comp_mask    = comp_mask;
1758
1759         ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec,
1760                 mad->data);
1761
1762         *sa_query = &query->sa_query;
1763
1764         ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1765         if (ret < 0)
1766                 goto err2;
1767
1768         return ret;
1769
1770 err2:
1771         *sa_query = NULL;
1772         ib_sa_client_put(query->sa_query.client);
1773         free_mad(&query->sa_query);
1774
1775 err1:
1776         kfree(query);
1777         return ret;
1778 }
1779 EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
1780
1781 struct ib_classport_info_context {
1782         struct completion       done;
1783         struct ib_sa_query      *sa_query;
1784 };
1785
1786 static void ib_classportinfo_cb(void *context)
1787 {
1788         struct ib_classport_info_context *cb_ctx = context;
1789
1790         complete(&cb_ctx->done);
1791 }
1792
1793 static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
1794                                               int status,
1795                                               struct ib_sa_mad *mad)
1796 {
1797         unsigned long flags;
1798         struct ib_sa_classport_info_query *query =
1799                 container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
1800         struct ib_sa_classport_cache *info = &sa_query->port->classport_info;
1801
1802         if (mad) {
1803                 if (sa_query->flags & IB_SA_QUERY_OPA) {
1804                         struct opa_class_port_info rec;
1805
1806                         ib_unpack(opa_classport_info_rec_table,
1807                                   ARRAY_SIZE(opa_classport_info_rec_table),
1808                                   mad->data, &rec);
1809
1810                         spin_lock_irqsave(&sa_query->port->classport_lock,
1811                                           flags);
1812                         if (!status && !info->valid) {
1813                                 memcpy(&info->data.opa, &rec,
1814                                        sizeof(info->data.opa));
1815
1816                                 info->valid = true;
1817                                 info->data.type = RDMA_CLASS_PORT_INFO_OPA;
1818                         }
1819                         spin_unlock_irqrestore(&sa_query->port->classport_lock,
1820                                                flags);
1821
1822                 } else {
1823                         struct ib_class_port_info rec;
1824
1825                         ib_unpack(ib_classport_info_rec_table,
1826                                   ARRAY_SIZE(ib_classport_info_rec_table),
1827                                   mad->data, &rec);
1828
1829                         spin_lock_irqsave(&sa_query->port->classport_lock,
1830                                           flags);
1831                         if (!status && !info->valid) {
1832                                 memcpy(&info->data.ib, &rec,
1833                                        sizeof(info->data.ib));
1834
1835                                 info->valid = true;
1836                                 info->data.type = RDMA_CLASS_PORT_INFO_IB;
1837                         }
1838                         spin_unlock_irqrestore(&sa_query->port->classport_lock,
1839                                                flags);
1840                 }
1841         }
1842         query->callback(query->context);
1843 }
1844
1845 static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query)
1846 {
1847         kfree(container_of(sa_query, struct ib_sa_classport_info_query,
1848                            sa_query));
1849 }
1850
1851 static int ib_sa_classport_info_rec_query(struct ib_sa_port *port,
1852                                           unsigned long timeout_ms,
1853                                           void (*callback)(void *context),
1854                                           void *context,
1855                                           struct ib_sa_query **sa_query)
1856 {
1857         struct ib_mad_agent *agent;
1858         struct ib_sa_classport_info_query *query;
1859         struct ib_sa_mad *mad;
1860         gfp_t gfp_mask = GFP_KERNEL;
1861         int ret;
1862
1863         agent = port->agent;
1864
1865         query = kzalloc(sizeof(*query), gfp_mask);
1866         if (!query)
1867                 return -ENOMEM;
1868
1869         query->sa_query.port = port;
1870         query->sa_query.flags |= rdma_cap_opa_ah(port->agent->device,
1871                                                  port->port_num) ?
1872                                  IB_SA_QUERY_OPA : 0;
1873         ret = alloc_mad(&query->sa_query, gfp_mask);
1874         if (ret)
1875                 goto err_free;
1876
1877         query->callback = callback;
1878         query->context = context;
1879
1880         mad = query->sa_query.mad_buf->mad;
1881         init_mad(&query->sa_query, agent);
1882
1883         query->sa_query.callback = ib_sa_classport_info_rec_callback;
1884         query->sa_query.release  = ib_sa_classport_info_rec_release;
1885         mad->mad_hdr.method      = IB_MGMT_METHOD_GET;
1886         mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO);
1887         mad->sa_hdr.comp_mask    = 0;
1888         *sa_query = &query->sa_query;
1889
1890         ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1891         if (ret < 0)
1892                 goto err_free_mad;
1893
1894         return ret;
1895
1896 err_free_mad:
1897         *sa_query = NULL;
1898         free_mad(&query->sa_query);
1899
1900 err_free:
1901         kfree(query);
1902         return ret;
1903 }
1904
1905 static void update_ib_cpi(struct work_struct *work)
1906 {
1907         struct ib_sa_port *port =
1908                 container_of(work, struct ib_sa_port, ib_cpi_work.work);
1909         struct ib_classport_info_context *cb_context;
1910         unsigned long flags;
1911         int ret;
1912
1913         /* If the classport info is valid, nothing
1914          * to do here.
1915          */
1916         spin_lock_irqsave(&port->classport_lock, flags);
1917         if (port->classport_info.valid) {
1918                 spin_unlock_irqrestore(&port->classport_lock, flags);
1919                 return;
1920         }
1921         spin_unlock_irqrestore(&port->classport_lock, flags);
1922
1923         cb_context = kmalloc(sizeof(*cb_context), GFP_KERNEL);
1924         if (!cb_context)
1925                 goto err_nomem;
1926
1927         init_completion(&cb_context->done);
1928
1929         ret = ib_sa_classport_info_rec_query(port, 3000,
1930                                              ib_classportinfo_cb, cb_context,
1931                                              &cb_context->sa_query);
1932         if (ret < 0)
1933                 goto free_cb_err;
1934         wait_for_completion(&cb_context->done);
1935 free_cb_err:
1936         kfree(cb_context);
1937         spin_lock_irqsave(&port->classport_lock, flags);
1938
1939         /* If the classport info is still not valid, the query should have
1940          * failed for some reason. Retry issuing the query
1941          */
1942         if (!port->classport_info.valid) {
1943                 port->classport_info.retry_cnt++;
1944                 if (port->classport_info.retry_cnt <=
1945                     IB_SA_CPI_MAX_RETRY_CNT) {
1946                         unsigned long delay =
1947                                 msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
1948
1949                         queue_delayed_work(ib_wq, &port->ib_cpi_work, delay);
1950                 }
1951         }
1952         spin_unlock_irqrestore(&port->classport_lock, flags);
1953
1954 err_nomem:
1955         return;
1956 }
1957
1958 static void send_handler(struct ib_mad_agent *agent,
1959                          struct ib_mad_send_wc *mad_send_wc)
1960 {
1961         struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
1962         unsigned long flags;
1963
1964         if (query->callback)
1965                 switch (mad_send_wc->status) {
1966                 case IB_WC_SUCCESS:
1967                         /* No callback -- already got recv */
1968                         break;
1969                 case IB_WC_RESP_TIMEOUT_ERR:
1970                         query->callback(query, -ETIMEDOUT, NULL);
1971                         break;
1972                 case IB_WC_WR_FLUSH_ERR:
1973                         query->callback(query, -EINTR, NULL);
1974                         break;
1975                 default:
1976                         query->callback(query, -EIO, NULL);
1977                         break;
1978                 }
1979
1980         xa_lock_irqsave(&queries, flags);
1981         __xa_erase(&queries, query->id);
1982         xa_unlock_irqrestore(&queries, flags);
1983
1984         free_mad(query);
1985         if (query->client)
1986                 ib_sa_client_put(query->client);
1987         query->release(query);
1988 }
1989
1990 static void recv_handler(struct ib_mad_agent *mad_agent,
1991                          struct ib_mad_send_buf *send_buf,
1992                          struct ib_mad_recv_wc *mad_recv_wc)
1993 {
1994         struct ib_sa_query *query;
1995
1996         if (!send_buf)
1997                 return;
1998
1999         query = send_buf->context[0];
2000         if (query->callback) {
2001                 if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
2002                         query->callback(query,
2003                                         mad_recv_wc->recv_buf.mad->mad_hdr.status ?
2004                                         -EINVAL : 0,
2005                                         (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
2006                 else
2007                         query->callback(query, -EIO, NULL);
2008         }
2009
2010         ib_free_recv_mad(mad_recv_wc);
2011 }
2012
2013 static void update_sm_ah(struct work_struct *work)
2014 {
2015         struct ib_sa_port *port =
2016                 container_of(work, struct ib_sa_port, update_task);
2017         struct ib_sa_sm_ah *new_ah;
2018         struct ib_port_attr port_attr;
2019         struct rdma_ah_attr   ah_attr;
2020         bool grh_required;
2021
2022         if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
2023                 pr_warn("Couldn't query port\n");
2024                 return;
2025         }
2026
2027         new_ah = kmalloc(sizeof(*new_ah), GFP_KERNEL);
2028         if (!new_ah)
2029                 return;
2030
2031         kref_init(&new_ah->ref);
2032         new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
2033
2034         new_ah->pkey_index = 0;
2035         if (ib_find_pkey(port->agent->device, port->port_num,
2036                          IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
2037                 pr_err("Couldn't find index for default PKey\n");
2038
2039         memset(&ah_attr, 0, sizeof(ah_attr));
2040         ah_attr.type = rdma_ah_find_type(port->agent->device,
2041                                          port->port_num);
2042         rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid);
2043         rdma_ah_set_sl(&ah_attr, port_attr.sm_sl);
2044         rdma_ah_set_port_num(&ah_attr, port->port_num);
2045
2046         grh_required = rdma_is_grh_required(port->agent->device,
2047                                             port->port_num);
2048
2049         /*
2050          * The OPA sm_lid of 0xFFFF needs special handling so that it can be
2051          * differentiated from a permissive LID of 0xFFFF.  We set the
2052          * grh_required flag here so the SA can program the DGID in the
2053          * address handle appropriately
2054          */
2055         if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA &&
2056             (grh_required ||
2057              port_attr.sm_lid == be16_to_cpu(IB_LID_PERMISSIVE)))
2058                 rdma_ah_set_make_grd(&ah_attr, true);
2059
2060         if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && grh_required) {
2061                 rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
2062                 rdma_ah_set_subnet_prefix(&ah_attr,
2063                                           cpu_to_be64(port_attr.subnet_prefix));
2064                 rdma_ah_set_interface_id(&ah_attr,
2065                                          cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
2066         }
2067
2068         new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr,
2069                                     RDMA_CREATE_AH_SLEEPABLE);
2070         if (IS_ERR(new_ah->ah)) {
2071                 pr_warn("Couldn't create new SM AH\n");
2072                 kfree(new_ah);
2073                 return;
2074         }
2075
2076         spin_lock_irq(&port->ah_lock);
2077         if (port->sm_ah)
2078                 kref_put(&port->sm_ah->ref, free_sm_ah);
2079         port->sm_ah = new_ah;
2080         spin_unlock_irq(&port->ah_lock);
2081 }
2082
2083 static void ib_sa_event(struct ib_event_handler *handler,
2084                         struct ib_event *event)
2085 {
2086         if (event->event == IB_EVENT_PORT_ERR    ||
2087             event->event == IB_EVENT_PORT_ACTIVE ||
2088             event->event == IB_EVENT_LID_CHANGE  ||
2089             event->event == IB_EVENT_PKEY_CHANGE ||
2090             event->event == IB_EVENT_SM_CHANGE   ||
2091             event->event == IB_EVENT_CLIENT_REREGISTER) {
2092                 unsigned long flags;
2093                 struct ib_sa_device *sa_dev =
2094                         container_of(handler, typeof(*sa_dev), event_handler);
2095                 u32 port_num = event->element.port_num - sa_dev->start_port;
2096                 struct ib_sa_port *port = &sa_dev->port[port_num];
2097
2098                 if (!rdma_cap_ib_sa(handler->device, port->port_num))
2099                         return;
2100
2101                 spin_lock_irqsave(&port->ah_lock, flags);
2102                 if (port->sm_ah)
2103                         kref_put(&port->sm_ah->ref, free_sm_ah);
2104                 port->sm_ah = NULL;
2105                 spin_unlock_irqrestore(&port->ah_lock, flags);
2106
2107                 if (event->event == IB_EVENT_SM_CHANGE ||
2108                     event->event == IB_EVENT_CLIENT_REREGISTER ||
2109                     event->event == IB_EVENT_LID_CHANGE ||
2110                     event->event == IB_EVENT_PORT_ACTIVE) {
2111                         unsigned long delay =
2112                                 msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
2113
2114                         spin_lock_irqsave(&port->classport_lock, flags);
2115                         port->classport_info.valid = false;
2116                         port->classport_info.retry_cnt = 0;
2117                         spin_unlock_irqrestore(&port->classport_lock, flags);
2118                         queue_delayed_work(ib_wq,
2119                                            &port->ib_cpi_work, delay);
2120                 }
2121                 queue_work(ib_wq, &sa_dev->port[port_num].update_task);
2122         }
2123 }
2124
2125 static int ib_sa_add_one(struct ib_device *device)
2126 {
2127         struct ib_sa_device *sa_dev;
2128         int s, e, i;
2129         int count = 0;
2130         int ret;
2131
2132         s = rdma_start_port(device);
2133         e = rdma_end_port(device);
2134
2135         sa_dev = kzalloc(struct_size(sa_dev, port, e - s + 1), GFP_KERNEL);
2136         if (!sa_dev)
2137                 return -ENOMEM;
2138
2139         sa_dev->start_port = s;
2140         sa_dev->end_port   = e;
2141
2142         for (i = 0; i <= e - s; ++i) {
2143                 spin_lock_init(&sa_dev->port[i].ah_lock);
2144                 if (!rdma_cap_ib_sa(device, i + 1))
2145                         continue;
2146
2147                 sa_dev->port[i].sm_ah    = NULL;
2148                 sa_dev->port[i].port_num = i + s;
2149
2150                 spin_lock_init(&sa_dev->port[i].classport_lock);
2151                 sa_dev->port[i].classport_info.valid = false;
2152
2153                 sa_dev->port[i].agent =
2154                         ib_register_mad_agent(device, i + s, IB_QPT_GSI,
2155                                               NULL, 0, send_handler,
2156                                               recv_handler, sa_dev, 0);
2157                 if (IS_ERR(sa_dev->port[i].agent)) {
2158                         ret = PTR_ERR(sa_dev->port[i].agent);
2159                         goto err;
2160                 }
2161
2162                 INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
2163                 INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work,
2164                                   update_ib_cpi);
2165
2166                 count++;
2167         }
2168
2169         if (!count) {
2170                 ret = -EOPNOTSUPP;
2171                 goto free;
2172         }
2173
2174         ib_set_client_data(device, &sa_client, sa_dev);
2175
2176         /*
2177          * We register our event handler after everything is set up,
2178          * and then update our cached info after the event handler is
2179          * registered to avoid any problems if a port changes state
2180          * during our initialization.
2181          */
2182
2183         INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
2184         ib_register_event_handler(&sa_dev->event_handler);
2185
2186         for (i = 0; i <= e - s; ++i) {
2187                 if (rdma_cap_ib_sa(device, i + 1))
2188                         update_sm_ah(&sa_dev->port[i].update_task);
2189         }
2190
2191         return 0;
2192
2193 err:
2194         while (--i >= 0) {
2195                 if (rdma_cap_ib_sa(device, i + 1))
2196                         ib_unregister_mad_agent(sa_dev->port[i].agent);
2197         }
2198 free:
2199         kfree(sa_dev);
2200         return ret;
2201 }
2202
2203 static void ib_sa_remove_one(struct ib_device *device, void *client_data)
2204 {
2205         struct ib_sa_device *sa_dev = client_data;
2206         int i;
2207
2208         ib_unregister_event_handler(&sa_dev->event_handler);
2209         flush_workqueue(ib_wq);
2210
2211         for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
2212                 if (rdma_cap_ib_sa(device, i + 1)) {
2213                         cancel_delayed_work_sync(&sa_dev->port[i].ib_cpi_work);
2214                         ib_unregister_mad_agent(sa_dev->port[i].agent);
2215                         if (sa_dev->port[i].sm_ah)
2216                                 kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
2217                 }
2218
2219         }
2220
2221         kfree(sa_dev);
2222 }
2223
2224 int ib_sa_init(void)
2225 {
2226         int ret;
2227
2228         get_random_bytes(&tid, sizeof tid);
2229
2230         atomic_set(&ib_nl_sa_request_seq, 0);
2231
2232         ret = ib_register_client(&sa_client);
2233         if (ret) {
2234                 pr_err("Couldn't register ib_sa client\n");
2235                 goto err1;
2236         }
2237
2238         ret = mcast_init();
2239         if (ret) {
2240                 pr_err("Couldn't initialize multicast handling\n");
2241                 goto err2;
2242         }
2243
2244         ib_nl_wq = alloc_ordered_workqueue("ib_nl_sa_wq", WQ_MEM_RECLAIM);
2245         if (!ib_nl_wq) {
2246                 ret = -ENOMEM;
2247                 goto err3;
2248         }
2249
2250         INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout);
2251
2252         return 0;
2253
2254 err3:
2255         mcast_cleanup();
2256 err2:
2257         ib_unregister_client(&sa_client);
2258 err1:
2259         return ret;
2260 }
2261
2262 void ib_sa_cleanup(void)
2263 {
2264         cancel_delayed_work(&ib_nl_timed_work);
2265         flush_workqueue(ib_nl_wq);
2266         destroy_workqueue(ib_nl_wq);
2267         mcast_cleanup();
2268         ib_unregister_client(&sa_client);
2269         WARN_ON(!xa_empty(&queries));
2270 }