ksmbd: fix race condition between tree conn lookup and disconnect
[platform/kernel/linux-starfive.git] / drivers / infiniband / hw / mlx5 / fs.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
4  */
5
6 #include <rdma/ib_user_verbs.h>
7 #include <rdma/ib_verbs.h>
8 #include <rdma/uverbs_types.h>
9 #include <rdma/uverbs_ioctl.h>
10 #include <rdma/uverbs_std_types.h>
11 #include <rdma/mlx5_user_ioctl_cmds.h>
12 #include <rdma/mlx5_user_ioctl_verbs.h>
13 #include <rdma/ib_hdrs.h>
14 #include <rdma/ib_umem.h>
15 #include <linux/mlx5/driver.h>
16 #include <linux/mlx5/fs.h>
17 #include <linux/mlx5/fs_helpers.h>
18 #include <linux/mlx5/eswitch.h>
19 #include <net/inet_ecn.h>
20 #include "mlx5_ib.h"
21 #include "counters.h"
22 #include "devx.h"
23 #include "fs.h"
24
25 #define UVERBS_MODULE_NAME mlx5_ib
26 #include <rdma/uverbs_named_ioctl.h>
27
28 enum {
29         MATCH_CRITERIA_ENABLE_OUTER_BIT,
30         MATCH_CRITERIA_ENABLE_MISC_BIT,
31         MATCH_CRITERIA_ENABLE_INNER_BIT,
32         MATCH_CRITERIA_ENABLE_MISC2_BIT
33 };
34
35 #define HEADER_IS_ZERO(match_criteria, headers)                            \
36         !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
37                     0, MLX5_FLD_SZ_BYTES(fte_match_param, headers)))       \
38
39 static u8 get_match_criteria_enable(u32 *match_criteria)
40 {
41         u8 match_criteria_enable;
42
43         match_criteria_enable =
44                 (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
45                 MATCH_CRITERIA_ENABLE_OUTER_BIT;
46         match_criteria_enable |=
47                 (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
48                 MATCH_CRITERIA_ENABLE_MISC_BIT;
49         match_criteria_enable |=
50                 (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
51                 MATCH_CRITERIA_ENABLE_INNER_BIT;
52         match_criteria_enable |=
53                 (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
54                 MATCH_CRITERIA_ENABLE_MISC2_BIT;
55
56         return match_criteria_enable;
57 }
58
59 static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
60 {
61         u8 entry_mask;
62         u8 entry_val;
63         int err = 0;
64
65         if (!mask)
66                 goto out;
67
68         entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
69                               ip_protocol);
70         entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
71                              ip_protocol);
72         if (!entry_mask) {
73                 MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
74                 MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
75                 goto out;
76         }
77         /* Don't override existing ip protocol */
78         if (mask != entry_mask || val != entry_val)
79                 err = -EINVAL;
80 out:
81         return err;
82 }
83
84 static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
85                            bool inner)
86 {
87         if (inner) {
88                 MLX5_SET(fte_match_set_misc,
89                          misc_c, inner_ipv6_flow_label, mask);
90                 MLX5_SET(fte_match_set_misc,
91                          misc_v, inner_ipv6_flow_label, val);
92         } else {
93                 MLX5_SET(fte_match_set_misc,
94                          misc_c, outer_ipv6_flow_label, mask);
95                 MLX5_SET(fte_match_set_misc,
96                          misc_v, outer_ipv6_flow_label, val);
97         }
98 }
99
100 static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
101 {
102         MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
103         MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
104         MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
105         MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
106 }
107
108 static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
109 {
110         if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
111             !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
112                 return -EOPNOTSUPP;
113
114         if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
115             !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
116                 return -EOPNOTSUPP;
117
118         if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
119             !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
120                 return -EOPNOTSUPP;
121
122         if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
123             !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
124                 return -EOPNOTSUPP;
125
126         return 0;
127 }
128
129 #define LAST_ETH_FIELD vlan_tag
130 #define LAST_IPV4_FIELD tos
131 #define LAST_IPV6_FIELD traffic_class
132 #define LAST_TCP_UDP_FIELD src_port
133 #define LAST_TUNNEL_FIELD tunnel_id
134 #define LAST_FLOW_TAG_FIELD tag_id
135 #define LAST_DROP_FIELD size
136 #define LAST_COUNTERS_FIELD counters
137
138 /* Field is the last supported field */
139 #define FIELDS_NOT_SUPPORTED(filter, field)                                    \
140         memchr_inv((void *)&filter.field + sizeof(filter.field), 0,            \
141                    sizeof(filter) - offsetofend(typeof(filter), field))
142
143 int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
144                            bool is_egress,
145                            struct mlx5_flow_act *action)
146 {
147
148         switch (maction->ib_action.type) {
149         case IB_FLOW_ACTION_UNSPECIFIED:
150                 if (maction->flow_action_raw.sub_type ==
151                     MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
152                         if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
153                                 return -EINVAL;
154                         action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
155                         action->modify_hdr =
156                                 maction->flow_action_raw.modify_hdr;
157                         return 0;
158                 }
159                 if (maction->flow_action_raw.sub_type ==
160                     MLX5_IB_FLOW_ACTION_DECAP) {
161                         if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
162                                 return -EINVAL;
163                         action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
164                         return 0;
165                 }
166                 if (maction->flow_action_raw.sub_type ==
167                     MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
168                         if (action->action &
169                             MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
170                                 return -EINVAL;
171                         action->action |=
172                                 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
173                         action->pkt_reformat =
174                                 maction->flow_action_raw.pkt_reformat;
175                         return 0;
176                 }
177                 fallthrough;
178         default:
179                 return -EOPNOTSUPP;
180         }
181 }
182
183 static int parse_flow_attr(struct mlx5_core_dev *mdev,
184                            struct mlx5_flow_spec *spec,
185                            const union ib_flow_spec *ib_spec,
186                            const struct ib_flow_attr *flow_attr,
187                            struct mlx5_flow_act *action, u32 prev_type)
188 {
189         struct mlx5_flow_context *flow_context = &spec->flow_context;
190         u32 *match_c = spec->match_criteria;
191         u32 *match_v = spec->match_value;
192         void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
193                                            misc_parameters);
194         void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
195                                            misc_parameters);
196         void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
197                                             misc_parameters_2);
198         void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
199                                             misc_parameters_2);
200         void *headers_c;
201         void *headers_v;
202         int match_ipv;
203         int ret;
204
205         if (ib_spec->type & IB_FLOW_SPEC_INNER) {
206                 headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
207                                          inner_headers);
208                 headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
209                                          inner_headers);
210                 match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
211                                         ft_field_support.inner_ip_version);
212         } else {
213                 headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
214                                          outer_headers);
215                 headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
216                                          outer_headers);
217                 match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
218                                         ft_field_support.outer_ip_version);
219         }
220
221         switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
222         case IB_FLOW_SPEC_ETH:
223                 if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
224                         return -EOPNOTSUPP;
225
226                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
227                                              dmac_47_16),
228                                 ib_spec->eth.mask.dst_mac);
229                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
230                                              dmac_47_16),
231                                 ib_spec->eth.val.dst_mac);
232
233                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
234                                              smac_47_16),
235                                 ib_spec->eth.mask.src_mac);
236                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
237                                              smac_47_16),
238                                 ib_spec->eth.val.src_mac);
239
240                 if (ib_spec->eth.mask.vlan_tag) {
241                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
242                                  cvlan_tag, 1);
243                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
244                                  cvlan_tag, 1);
245
246                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
247                                  first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
248                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
249                                  first_vid, ntohs(ib_spec->eth.val.vlan_tag));
250
251                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
252                                  first_cfi,
253                                  ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
254                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
255                                  first_cfi,
256                                  ntohs(ib_spec->eth.val.vlan_tag) >> 12);
257
258                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
259                                  first_prio,
260                                  ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
261                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
262                                  first_prio,
263                                  ntohs(ib_spec->eth.val.vlan_tag) >> 13);
264                 }
265                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
266                          ethertype, ntohs(ib_spec->eth.mask.ether_type));
267                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
268                          ethertype, ntohs(ib_spec->eth.val.ether_type));
269                 break;
270         case IB_FLOW_SPEC_IPV4:
271                 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
272                         return -EOPNOTSUPP;
273
274                 if (match_ipv) {
275                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
276                                  ip_version, 0xf);
277                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
278                                  ip_version, MLX5_FS_IPV4_VERSION);
279                 } else {
280                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
281                                  ethertype, 0xffff);
282                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
283                                  ethertype, ETH_P_IP);
284                 }
285
286                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
287                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
288                        &ib_spec->ipv4.mask.src_ip,
289                        sizeof(ib_spec->ipv4.mask.src_ip));
290                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
291                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
292                        &ib_spec->ipv4.val.src_ip,
293                        sizeof(ib_spec->ipv4.val.src_ip));
294                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
295                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
296                        &ib_spec->ipv4.mask.dst_ip,
297                        sizeof(ib_spec->ipv4.mask.dst_ip));
298                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
299                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
300                        &ib_spec->ipv4.val.dst_ip,
301                        sizeof(ib_spec->ipv4.val.dst_ip));
302
303                 set_tos(headers_c, headers_v,
304                         ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
305
306                 if (set_proto(headers_c, headers_v,
307                               ib_spec->ipv4.mask.proto,
308                               ib_spec->ipv4.val.proto))
309                         return -EINVAL;
310                 break;
311         case IB_FLOW_SPEC_IPV6:
312                 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
313                         return -EOPNOTSUPP;
314
315                 if (match_ipv) {
316                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
317                                  ip_version, 0xf);
318                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
319                                  ip_version, MLX5_FS_IPV6_VERSION);
320                 } else {
321                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
322                                  ethertype, 0xffff);
323                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
324                                  ethertype, ETH_P_IPV6);
325                 }
326
327                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
328                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
329                        &ib_spec->ipv6.mask.src_ip,
330                        sizeof(ib_spec->ipv6.mask.src_ip));
331                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
332                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
333                        &ib_spec->ipv6.val.src_ip,
334                        sizeof(ib_spec->ipv6.val.src_ip));
335                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
336                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
337                        &ib_spec->ipv6.mask.dst_ip,
338                        sizeof(ib_spec->ipv6.mask.dst_ip));
339                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
340                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
341                        &ib_spec->ipv6.val.dst_ip,
342                        sizeof(ib_spec->ipv6.val.dst_ip));
343
344                 set_tos(headers_c, headers_v,
345                         ib_spec->ipv6.mask.traffic_class,
346                         ib_spec->ipv6.val.traffic_class);
347
348                 if (set_proto(headers_c, headers_v,
349                               ib_spec->ipv6.mask.next_hdr,
350                               ib_spec->ipv6.val.next_hdr))
351                         return -EINVAL;
352
353                 set_flow_label(misc_params_c, misc_params_v,
354                                ntohl(ib_spec->ipv6.mask.flow_label),
355                                ntohl(ib_spec->ipv6.val.flow_label),
356                                ib_spec->type & IB_FLOW_SPEC_INNER);
357                 break;
358         case IB_FLOW_SPEC_ESP:
359                 return -EOPNOTSUPP;
360         case IB_FLOW_SPEC_TCP:
361                 if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
362                                          LAST_TCP_UDP_FIELD))
363                         return -EOPNOTSUPP;
364
365                 if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
366                         return -EINVAL;
367
368                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
369                          ntohs(ib_spec->tcp_udp.mask.src_port));
370                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
371                          ntohs(ib_spec->tcp_udp.val.src_port));
372
373                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
374                          ntohs(ib_spec->tcp_udp.mask.dst_port));
375                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
376                          ntohs(ib_spec->tcp_udp.val.dst_port));
377                 break;
378         case IB_FLOW_SPEC_UDP:
379                 if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
380                                          LAST_TCP_UDP_FIELD))
381                         return -EOPNOTSUPP;
382
383                 if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
384                         return -EINVAL;
385
386                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
387                          ntohs(ib_spec->tcp_udp.mask.src_port));
388                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
389                          ntohs(ib_spec->tcp_udp.val.src_port));
390
391                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
392                          ntohs(ib_spec->tcp_udp.mask.dst_port));
393                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
394                          ntohs(ib_spec->tcp_udp.val.dst_port));
395                 break;
396         case IB_FLOW_SPEC_GRE:
397                 if (ib_spec->gre.mask.c_ks_res0_ver)
398                         return -EOPNOTSUPP;
399
400                 if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
401                         return -EINVAL;
402
403                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
404                          0xff);
405                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
406                          IPPROTO_GRE);
407
408                 MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
409                          ntohs(ib_spec->gre.mask.protocol));
410                 MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
411                          ntohs(ib_spec->gre.val.protocol));
412
413                 memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
414                                     gre_key.nvgre.hi),
415                        &ib_spec->gre.mask.key,
416                        sizeof(ib_spec->gre.mask.key));
417                 memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
418                                     gre_key.nvgre.hi),
419                        &ib_spec->gre.val.key,
420                        sizeof(ib_spec->gre.val.key));
421                 break;
422         case IB_FLOW_SPEC_MPLS:
423                 switch (prev_type) {
424                 case IB_FLOW_SPEC_UDP:
425                         if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
426                                                    ft_field_support.outer_first_mpls_over_udp),
427                                                    &ib_spec->mpls.mask.tag))
428                                 return -EOPNOTSUPP;
429
430                         memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
431                                             outer_first_mpls_over_udp),
432                                &ib_spec->mpls.val.tag,
433                                sizeof(ib_spec->mpls.val.tag));
434                         memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
435                                             outer_first_mpls_over_udp),
436                                &ib_spec->mpls.mask.tag,
437                                sizeof(ib_spec->mpls.mask.tag));
438                         break;
439                 case IB_FLOW_SPEC_GRE:
440                         if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
441                                                    ft_field_support.outer_first_mpls_over_gre),
442                                                    &ib_spec->mpls.mask.tag))
443                                 return -EOPNOTSUPP;
444
445                         memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
446                                             outer_first_mpls_over_gre),
447                                &ib_spec->mpls.val.tag,
448                                sizeof(ib_spec->mpls.val.tag));
449                         memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
450                                             outer_first_mpls_over_gre),
451                                &ib_spec->mpls.mask.tag,
452                                sizeof(ib_spec->mpls.mask.tag));
453                         break;
454                 default:
455                         if (ib_spec->type & IB_FLOW_SPEC_INNER) {
456                                 if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
457                                                            ft_field_support.inner_first_mpls),
458                                                            &ib_spec->mpls.mask.tag))
459                                         return -EOPNOTSUPP;
460
461                                 memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
462                                                     inner_first_mpls),
463                                        &ib_spec->mpls.val.tag,
464                                        sizeof(ib_spec->mpls.val.tag));
465                                 memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
466                                                     inner_first_mpls),
467                                        &ib_spec->mpls.mask.tag,
468                                        sizeof(ib_spec->mpls.mask.tag));
469                         } else {
470                                 if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
471                                                            ft_field_support.outer_first_mpls),
472                                                            &ib_spec->mpls.mask.tag))
473                                         return -EOPNOTSUPP;
474
475                                 memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
476                                                     outer_first_mpls),
477                                        &ib_spec->mpls.val.tag,
478                                        sizeof(ib_spec->mpls.val.tag));
479                                 memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
480                                                     outer_first_mpls),
481                                        &ib_spec->mpls.mask.tag,
482                                        sizeof(ib_spec->mpls.mask.tag));
483                         }
484                 }
485                 break;
486         case IB_FLOW_SPEC_VXLAN_TUNNEL:
487                 if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
488                                          LAST_TUNNEL_FIELD))
489                         return -EOPNOTSUPP;
490
491                 MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
492                          ntohl(ib_spec->tunnel.mask.tunnel_id));
493                 MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
494                          ntohl(ib_spec->tunnel.val.tunnel_id));
495                 break;
496         case IB_FLOW_SPEC_ACTION_TAG:
497                 if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
498                                          LAST_FLOW_TAG_FIELD))
499                         return -EOPNOTSUPP;
500                 if (ib_spec->flow_tag.tag_id >= BIT(24))
501                         return -EINVAL;
502
503                 flow_context->flow_tag = ib_spec->flow_tag.tag_id;
504                 flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
505                 break;
506         case IB_FLOW_SPEC_ACTION_DROP:
507                 if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
508                                          LAST_DROP_FIELD))
509                         return -EOPNOTSUPP;
510                 action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
511                 break;
512         case IB_FLOW_SPEC_ACTION_HANDLE:
513                 ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
514                         flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
515                 if (ret)
516                         return ret;
517                 break;
518         case IB_FLOW_SPEC_ACTION_COUNT:
519                 if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
520                                          LAST_COUNTERS_FIELD))
521                         return -EOPNOTSUPP;
522
523                 /* for now support only one counters spec per flow */
524                 if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
525                         return -EINVAL;
526
527                 action->counters = ib_spec->flow_count.counters;
528                 action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
529                 break;
530         default:
531                 return -EINVAL;
532         }
533
534         return 0;
535 }
536
537 /* If a flow could catch both multicast and unicast packets,
538  * it won't fall into the multicast flow steering table and this rule
539  * could steal other multicast packets.
540  */
541 static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
542 {
543         union ib_flow_spec *flow_spec;
544
545         if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
546             ib_attr->num_of_specs < 1)
547                 return false;
548
549         flow_spec = (union ib_flow_spec *)(ib_attr + 1);
550         if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
551                 struct ib_flow_spec_ipv4 *ipv4_spec;
552
553                 ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
554                 if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
555                         return true;
556
557                 return false;
558         }
559
560         if (flow_spec->type == IB_FLOW_SPEC_ETH) {
561                 struct ib_flow_spec_eth *eth_spec;
562
563                 eth_spec = (struct ib_flow_spec_eth *)flow_spec;
564                 return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
565                        is_multicast_ether_addr(eth_spec->val.dst_mac);
566         }
567
568         return false;
569 }
570
571 static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
572                                const struct ib_flow_attr *flow_attr,
573                                bool check_inner)
574 {
575         union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
576         int match_ipv = check_inner ?
577                         MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
578                                         ft_field_support.inner_ip_version) :
579                         MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
580                                         ft_field_support.outer_ip_version);
581         int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
582         bool ipv4_spec_valid, ipv6_spec_valid;
583         unsigned int ip_spec_type = 0;
584         bool has_ethertype = false;
585         unsigned int spec_index;
586         bool mask_valid = true;
587         u16 eth_type = 0;
588         bool type_valid;
589
590         /* Validate that ethertype is correct */
591         for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
592                 if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
593                     ib_spec->eth.mask.ether_type) {
594                         mask_valid = (ib_spec->eth.mask.ether_type ==
595                                       htons(0xffff));
596                         has_ethertype = true;
597                         eth_type = ntohs(ib_spec->eth.val.ether_type);
598                 } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
599                            (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
600                         ip_spec_type = ib_spec->type;
601                 }
602                 ib_spec = (void *)ib_spec + ib_spec->size;
603         }
604
605         type_valid = (!has_ethertype) || (!ip_spec_type);
606         if (!type_valid && mask_valid) {
607                 ipv4_spec_valid = (eth_type == ETH_P_IP) &&
608                         (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
609                 ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
610                         (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
611
612                 type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
613                              (((eth_type == ETH_P_MPLS_UC) ||
614                                (eth_type == ETH_P_MPLS_MC)) && match_ipv);
615         }
616
617         return type_valid;
618 }
619
620 static bool is_valid_attr(struct mlx5_core_dev *mdev,
621                           const struct ib_flow_attr *flow_attr)
622 {
623         return is_valid_ethertype(mdev, flow_attr, false) &&
624                is_valid_ethertype(mdev, flow_attr, true);
625 }
626
627 static void put_flow_table(struct mlx5_ib_dev *dev,
628                            struct mlx5_ib_flow_prio *prio, bool ft_added)
629 {
630         prio->refcount -= !!ft_added;
631         if (!prio->refcount) {
632                 mlx5_destroy_flow_table(prio->flow_table);
633                 prio->flow_table = NULL;
634         }
635 }
636
637 static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
638 {
639         struct mlx5_ib_flow_handler *handler = container_of(flow_id,
640                                                           struct mlx5_ib_flow_handler,
641                                                           ibflow);
642         struct mlx5_ib_flow_handler *iter, *tmp;
643         struct mlx5_ib_dev *dev = handler->dev;
644
645         mutex_lock(&dev->flow_db->lock);
646
647         list_for_each_entry_safe(iter, tmp, &handler->list, list) {
648                 mlx5_del_flow_rules(iter->rule);
649                 put_flow_table(dev, iter->prio, true);
650                 list_del(&iter->list);
651                 kfree(iter);
652         }
653
654         mlx5_del_flow_rules(handler->rule);
655         put_flow_table(dev, handler->prio, true);
656         mlx5_ib_counters_clear_description(handler->ibcounters);
657         mutex_unlock(&dev->flow_db->lock);
658         if (handler->flow_matcher)
659                 atomic_dec(&handler->flow_matcher->usecnt);
660         kfree(handler);
661
662         return 0;
663 }
664
665 static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
666 {
667         priority *= 2;
668         if (!dont_trap)
669                 priority++;
670         return priority;
671 }
672
673 enum flow_table_type {
674         MLX5_IB_FT_RX,
675         MLX5_IB_FT_TX
676 };
677
678 #define MLX5_FS_MAX_TYPES        6
679 #define MLX5_FS_MAX_ENTRIES      BIT(16)
680
681 static bool mlx5_ib_shared_ft_allowed(struct ib_device *device)
682 {
683         struct mlx5_ib_dev *dev = to_mdev(device);
684
685         return MLX5_CAP_GEN(dev->mdev, shared_object_to_user_object_allowed);
686 }
687
688 static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
689                                            struct mlx5_flow_namespace *ns,
690                                            struct mlx5_ib_flow_prio *prio,
691                                            int priority,
692                                            int num_entries, int num_groups,
693                                            u32 flags)
694 {
695         struct mlx5_flow_table_attr ft_attr = {};
696         struct mlx5_flow_table *ft;
697
698         ft_attr.prio = priority;
699         ft_attr.max_fte = num_entries;
700         ft_attr.flags = flags;
701         ft_attr.autogroup.max_num_groups = num_groups;
702         ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
703         if (IS_ERR(ft))
704                 return ERR_CAST(ft);
705
706         prio->flow_table = ft;
707         prio->refcount = 0;
708         return prio;
709 }
710
711 static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
712                                                 struct ib_flow_attr *flow_attr,
713                                                 enum flow_table_type ft_type)
714 {
715         bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
716         struct mlx5_flow_namespace *ns = NULL;
717         enum mlx5_flow_namespace_type fn_type;
718         struct mlx5_ib_flow_prio *prio;
719         struct mlx5_flow_table *ft;
720         int max_table_size;
721         int num_entries;
722         int num_groups;
723         bool esw_encap;
724         u32 flags = 0;
725         int priority;
726
727         max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
728                                                        log_max_ft_size));
729         esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
730                 DEVLINK_ESWITCH_ENCAP_MODE_NONE;
731         switch (flow_attr->type) {
732         case IB_FLOW_ATTR_NORMAL:
733                 if (flow_is_multicast_only(flow_attr) && !dont_trap)
734                         priority = MLX5_IB_FLOW_MCAST_PRIO;
735                 else
736                         priority = ib_prio_to_core_prio(flow_attr->priority,
737                                                         dont_trap);
738                 if (ft_type == MLX5_IB_FT_RX) {
739                         fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
740                         prio = &dev->flow_db->prios[priority];
741                         if (!dev->is_rep && !esw_encap &&
742                             MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
743                                 flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
744                         if (!dev->is_rep && !esw_encap &&
745                             MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
746                                                       reformat_l3_tunnel_to_l2))
747                                 flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
748                 } else {
749                         max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(
750                                 dev->mdev, log_max_ft_size));
751                         fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
752                         prio = &dev->flow_db->egress_prios[priority];
753                         if (!dev->is_rep && !esw_encap &&
754                             MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
755                                 flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
756                 }
757                 ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
758                 num_entries = MLX5_FS_MAX_ENTRIES;
759                 num_groups = MLX5_FS_MAX_TYPES;
760                 break;
761         case IB_FLOW_ATTR_ALL_DEFAULT:
762         case IB_FLOW_ATTR_MC_DEFAULT:
763                 ns = mlx5_get_flow_namespace(dev->mdev,
764                                              MLX5_FLOW_NAMESPACE_LEFTOVERS);
765                 build_leftovers_ft_param(&priority, &num_entries, &num_groups);
766                 prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
767                 break;
768         case IB_FLOW_ATTR_SNIFFER:
769                 if (!MLX5_CAP_FLOWTABLE(dev->mdev,
770                                         allow_sniffer_and_nic_rx_shared_tir))
771                         return ERR_PTR(-EOPNOTSUPP);
772
773                 ns = mlx5_get_flow_namespace(
774                         dev->mdev, ft_type == MLX5_IB_FT_RX ?
775                                            MLX5_FLOW_NAMESPACE_SNIFFER_RX :
776                                            MLX5_FLOW_NAMESPACE_SNIFFER_TX);
777
778                 prio = &dev->flow_db->sniffer[ft_type];
779                 priority = 0;
780                 num_entries = 1;
781                 num_groups = 1;
782                 break;
783         default:
784                 break;
785         }
786
787         if (!ns)
788                 return ERR_PTR(-EOPNOTSUPP);
789
790         max_table_size = min_t(int, num_entries, max_table_size);
791
792         ft = prio->flow_table;
793         if (!ft)
794                 return _get_prio(dev, ns, prio, priority, max_table_size,
795                                  num_groups, flags);
796
797         return prio;
798 }
799
800 enum {
801         RDMA_RX_ECN_OPCOUNTER_PRIO,
802         RDMA_RX_CNP_OPCOUNTER_PRIO,
803 };
804
805 enum {
806         RDMA_TX_CNP_OPCOUNTER_PRIO,
807 };
808
809 static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
810                               struct mlx5_flow_spec *spec)
811 {
812         if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
813                                         ft_field_support.source_vhca_port) ||
814             !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
815                                         ft_field_support.source_vhca_port))
816                 return -EOPNOTSUPP;
817
818         MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria,
819                          misc_parameters.source_vhca_port);
820         MLX5_SET(fte_match_param, &spec->match_value,
821                  misc_parameters.source_vhca_port, port_num);
822
823         return 0;
824 }
825
826 static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num,
827                            struct mlx5_flow_spec *spec, int ipv)
828 {
829         if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
830                                         ft_field_support.outer_ip_version))
831                 return -EOPNOTSUPP;
832
833         if (mlx5_core_mp_enabled(dev->mdev) &&
834             set_vhca_port_spec(dev, port_num, spec))
835                 return -EOPNOTSUPP;
836
837         MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
838                          outer_headers.ip_ecn);
839         MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn,
840                  INET_ECN_CE);
841         MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
842                          outer_headers.ip_version);
843         MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
844                  ipv);
845
846         spec->match_criteria_enable =
847                 get_match_criteria_enable(spec->match_criteria);
848
849         return 0;
850 }
851
852 static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
853                         struct mlx5_flow_spec *spec)
854 {
855         if (mlx5_core_mp_enabled(dev->mdev) &&
856             set_vhca_port_spec(dev, port_num, spec))
857                 return -EOPNOTSUPP;
858
859         MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
860                          misc_parameters.bth_opcode);
861         MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode,
862                  IB_BTH_OPCODE_CNP);
863
864         spec->match_criteria_enable =
865                 get_match_criteria_enable(spec->match_criteria);
866
867         return 0;
868 }
869
870 int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
871                          struct mlx5_ib_op_fc *opfc,
872                          enum mlx5_ib_optional_counter_type type)
873 {
874         enum mlx5_flow_namespace_type fn_type;
875         int priority, i, err, spec_num;
876         struct mlx5_flow_act flow_act = {};
877         struct mlx5_flow_destination dst;
878         struct mlx5_flow_namespace *ns;
879         struct mlx5_ib_flow_prio *prio;
880         struct mlx5_flow_spec *spec;
881
882         spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
883         if (!spec)
884                 return -ENOMEM;
885
886         switch (type) {
887         case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
888                 if (set_ecn_ce_spec(dev, port_num, &spec[0],
889                                     MLX5_FS_IPV4_VERSION) ||
890                     set_ecn_ce_spec(dev, port_num, &spec[1],
891                                     MLX5_FS_IPV6_VERSION)) {
892                         err = -EOPNOTSUPP;
893                         goto free;
894                 }
895                 spec_num = 2;
896                 fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
897                 priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
898                 break;
899
900         case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
901                 if (!MLX5_CAP_FLOWTABLE(dev->mdev,
902                                         ft_field_support_2_nic_receive_rdma.bth_opcode) ||
903                     set_cnp_spec(dev, port_num, &spec[0])) {
904                         err = -EOPNOTSUPP;
905                         goto free;
906                 }
907                 spec_num = 1;
908                 fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
909                 priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
910                 break;
911
912         case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
913                 if (!MLX5_CAP_FLOWTABLE(dev->mdev,
914                                         ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
915                     set_cnp_spec(dev, port_num, &spec[0])) {
916                         err = -EOPNOTSUPP;
917                         goto free;
918                 }
919                 spec_num = 1;
920                 fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
921                 priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
922                 break;
923
924         default:
925                 err = -EOPNOTSUPP;
926                 goto free;
927         }
928
929         ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
930         if (!ns) {
931                 err = -EOPNOTSUPP;
932                 goto free;
933         }
934
935         prio = &dev->flow_db->opfcs[type];
936         if (!prio->flow_table) {
937                 prio = _get_prio(dev, ns, prio, priority,
938                                  dev->num_ports * MAX_OPFC_RULES, 1, 0);
939                 if (IS_ERR(prio)) {
940                         err = PTR_ERR(prio);
941                         goto free;
942                 }
943         }
944
945         dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
946         dst.counter_id = mlx5_fc_id(opfc->fc);
947
948         flow_act.action =
949                 MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
950
951         for (i = 0; i < spec_num; i++) {
952                 opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
953                                                     &flow_act, &dst, 1);
954                 if (IS_ERR(opfc->rule[i])) {
955                         err = PTR_ERR(opfc->rule[i]);
956                         goto del_rules;
957                 }
958         }
959         prio->refcount += spec_num;
960         kfree(spec);
961
962         return 0;
963
964 del_rules:
965         for (i -= 1; i >= 0; i--)
966                 mlx5_del_flow_rules(opfc->rule[i]);
967         put_flow_table(dev, prio, false);
968 free:
969         kfree(spec);
970         return err;
971 }
972
973 void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
974                              struct mlx5_ib_op_fc *opfc,
975                              enum mlx5_ib_optional_counter_type type)
976 {
977         int i;
978
979         for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
980                 mlx5_del_flow_rules(opfc->rule[i]);
981                 put_flow_table(dev, &dev->flow_db->opfcs[type], true);
982         }
983 }
984
985 static void set_underlay_qp(struct mlx5_ib_dev *dev,
986                             struct mlx5_flow_spec *spec,
987                             u32 underlay_qpn)
988 {
989         void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
990                                            spec->match_criteria,
991                                            misc_parameters);
992         void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
993                                            misc_parameters);
994
995         if (underlay_qpn &&
996             MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
997                                       ft_field_support.bth_dst_qp)) {
998                 MLX5_SET(fte_match_set_misc,
999                          misc_params_v, bth_dst_qp, underlay_qpn);
1000                 MLX5_SET(fte_match_set_misc,
1001                          misc_params_c, bth_dst_qp, 0xffffff);
1002         }
1003 }
1004
1005 static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
1006                                          struct mlx5_flow_spec *spec,
1007                                          struct mlx5_eswitch_rep *rep)
1008 {
1009         struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
1010         void *misc;
1011
1012         if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
1013                 misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1014                                     misc_parameters_2);
1015
1016                 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1017                          mlx5_eswitch_get_vport_metadata_for_match(rep->esw,
1018                                                                    rep->vport));
1019                 misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1020                                     misc_parameters_2);
1021
1022                 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1023                          mlx5_eswitch_get_vport_metadata_mask());
1024         } else {
1025                 misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1026                                     misc_parameters);
1027
1028                 MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
1029
1030                 misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1031                                     misc_parameters);
1032
1033                 MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
1034         }
1035 }
1036
1037 static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
1038                                                       struct mlx5_ib_flow_prio *ft_prio,
1039                                                       const struct ib_flow_attr *flow_attr,
1040                                                       struct mlx5_flow_destination *dst,
1041                                                       u32 underlay_qpn,
1042                                                       struct mlx5_ib_create_flow *ucmd)
1043 {
1044         struct mlx5_flow_table  *ft = ft_prio->flow_table;
1045         struct mlx5_ib_flow_handler *handler;
1046         struct mlx5_flow_act flow_act = {};
1047         struct mlx5_flow_spec *spec;
1048         struct mlx5_flow_destination dest_arr[2] = {};
1049         struct mlx5_flow_destination *rule_dst = dest_arr;
1050         const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
1051         unsigned int spec_index;
1052         u32 prev_type = 0;
1053         int err = 0;
1054         int dest_num = 0;
1055         bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
1056
1057         if (!is_valid_attr(dev->mdev, flow_attr))
1058                 return ERR_PTR(-EINVAL);
1059
1060         if (dev->is_rep && is_egress)
1061                 return ERR_PTR(-EINVAL);
1062
1063         spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1064         handler = kzalloc(sizeof(*handler), GFP_KERNEL);
1065         if (!handler || !spec) {
1066                 err = -ENOMEM;
1067                 goto free;
1068         }
1069
1070         INIT_LIST_HEAD(&handler->list);
1071
1072         for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
1073                 err = parse_flow_attr(dev->mdev, spec,
1074                                       ib_flow, flow_attr, &flow_act,
1075                                       prev_type);
1076                 if (err < 0)
1077                         goto free;
1078
1079                 prev_type = ((union ib_flow_spec *)ib_flow)->type;
1080                 ib_flow += ((union ib_flow_spec *)ib_flow)->size;
1081         }
1082
1083         if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
1084                 memcpy(&dest_arr[0], dst, sizeof(*dst));
1085                 dest_num++;
1086         }
1087
1088         if (!flow_is_multicast_only(flow_attr))
1089                 set_underlay_qp(dev, spec, underlay_qpn);
1090
1091         if (dev->is_rep && flow_attr->type != IB_FLOW_ATTR_SNIFFER) {
1092                 struct mlx5_eswitch_rep *rep;
1093
1094                 rep = dev->port[flow_attr->port - 1].rep;
1095                 if (!rep) {
1096                         err = -EINVAL;
1097                         goto free;
1098                 }
1099
1100                 mlx5_ib_set_rule_source_port(dev, spec, rep);
1101         }
1102
1103         spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
1104
1105         if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1106                 struct mlx5_ib_mcounters *mcounters;
1107
1108                 err = mlx5_ib_flow_counters_set_data(flow_act.counters, ucmd);
1109                 if (err)
1110                         goto free;
1111
1112                 mcounters = to_mcounters(flow_act.counters);
1113                 handler->ibcounters = flow_act.counters;
1114                 dest_arr[dest_num].type =
1115                         MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1116                 dest_arr[dest_num].counter_id =
1117                         mlx5_fc_id(mcounters->hw_cntrs_hndl);
1118                 dest_num++;
1119         }
1120
1121         if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
1122                 if (!dest_num)
1123                         rule_dst = NULL;
1124         } else {
1125                 if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)
1126                         flow_act.action |=
1127                                 MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
1128                 if (is_egress)
1129                         flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1130                 else if (dest_num)
1131                         flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1132         }
1133
1134         if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG)  &&
1135             (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
1136              flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
1137                 mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
1138                              spec->flow_context.flow_tag, flow_attr->type);
1139                 err = -EINVAL;
1140                 goto free;
1141         }
1142         handler->rule = mlx5_add_flow_rules(ft, spec,
1143                                             &flow_act,
1144                                             rule_dst, dest_num);
1145
1146         if (IS_ERR(handler->rule)) {
1147                 err = PTR_ERR(handler->rule);
1148                 goto free;
1149         }
1150
1151         ft_prio->refcount++;
1152         handler->prio = ft_prio;
1153         handler->dev = dev;
1154
1155         ft_prio->flow_table = ft;
1156 free:
1157         if (err && handler) {
1158                 mlx5_ib_counters_clear_description(handler->ibcounters);
1159                 kfree(handler);
1160         }
1161         kvfree(spec);
1162         return err ? ERR_PTR(err) : handler;
1163 }
1164
1165 static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
1166                                                      struct mlx5_ib_flow_prio *ft_prio,
1167                                                      const struct ib_flow_attr *flow_attr,
1168                                                      struct mlx5_flow_destination *dst)
1169 {
1170         return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
1171 }
1172
1173 enum {
1174         LEFTOVERS_MC,
1175         LEFTOVERS_UC,
1176 };
1177
1178 static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
1179                                                           struct mlx5_ib_flow_prio *ft_prio,
1180                                                           struct ib_flow_attr *flow_attr,
1181                                                           struct mlx5_flow_destination *dst)
1182 {
1183         struct mlx5_ib_flow_handler *handler_ucast = NULL;
1184         struct mlx5_ib_flow_handler *handler = NULL;
1185
1186         static struct {
1187                 struct ib_flow_attr     flow_attr;
1188                 struct ib_flow_spec_eth eth_flow;
1189         } leftovers_specs[] = {
1190                 [LEFTOVERS_MC] = {
1191                         .flow_attr = {
1192                                 .num_of_specs = 1,
1193                                 .size = sizeof(leftovers_specs[0])
1194                         },
1195                         .eth_flow = {
1196                                 .type = IB_FLOW_SPEC_ETH,
1197                                 .size = sizeof(struct ib_flow_spec_eth),
1198                                 .mask = {.dst_mac = {0x1} },
1199                                 .val =  {.dst_mac = {0x1} }
1200                         }
1201                 },
1202                 [LEFTOVERS_UC] = {
1203                         .flow_attr = {
1204                                 .num_of_specs = 1,
1205                                 .size = sizeof(leftovers_specs[0])
1206                         },
1207                         .eth_flow = {
1208                                 .type = IB_FLOW_SPEC_ETH,
1209                                 .size = sizeof(struct ib_flow_spec_eth),
1210                                 .mask = {.dst_mac = {0x1} },
1211                                 .val = {.dst_mac = {} }
1212                         }
1213                 }
1214         };
1215
1216         handler = create_flow_rule(dev, ft_prio,
1217                                    &leftovers_specs[LEFTOVERS_MC].flow_attr,
1218                                    dst);
1219         if (!IS_ERR(handler) &&
1220             flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
1221                 handler_ucast = create_flow_rule(dev, ft_prio,
1222                                                  &leftovers_specs[LEFTOVERS_UC].flow_attr,
1223                                                  dst);
1224                 if (IS_ERR(handler_ucast)) {
1225                         mlx5_del_flow_rules(handler->rule);
1226                         ft_prio->refcount--;
1227                         kfree(handler);
1228                         handler = handler_ucast;
1229                 } else {
1230                         list_add(&handler_ucast->list, &handler->list);
1231                 }
1232         }
1233
1234         return handler;
1235 }
1236
1237 static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
1238                                                         struct mlx5_ib_flow_prio *ft_rx,
1239                                                         struct mlx5_ib_flow_prio *ft_tx,
1240                                                         struct mlx5_flow_destination *dst)
1241 {
1242         struct mlx5_ib_flow_handler *handler_rx;
1243         struct mlx5_ib_flow_handler *handler_tx;
1244         int err;
1245         static const struct ib_flow_attr flow_attr  = {
1246                 .num_of_specs = 0,
1247                 .type = IB_FLOW_ATTR_SNIFFER,
1248                 .size = sizeof(flow_attr)
1249         };
1250
1251         handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
1252         if (IS_ERR(handler_rx)) {
1253                 err = PTR_ERR(handler_rx);
1254                 goto err;
1255         }
1256
1257         handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
1258         if (IS_ERR(handler_tx)) {
1259                 err = PTR_ERR(handler_tx);
1260                 goto err_tx;
1261         }
1262
1263         list_add(&handler_tx->list, &handler_rx->list);
1264
1265         return handler_rx;
1266
1267 err_tx:
1268         mlx5_del_flow_rules(handler_rx->rule);
1269         ft_rx->refcount--;
1270         kfree(handler_rx);
1271 err:
1272         return ERR_PTR(err);
1273 }
1274
1275 static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
1276                                            struct ib_flow_attr *flow_attr,
1277                                            struct ib_udata *udata)
1278 {
1279         struct mlx5_ib_dev *dev = to_mdev(qp->device);
1280         struct mlx5_ib_qp *mqp = to_mqp(qp);
1281         struct mlx5_ib_flow_handler *handler = NULL;
1282         struct mlx5_flow_destination *dst = NULL;
1283         struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
1284         struct mlx5_ib_flow_prio *ft_prio;
1285         bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
1286         struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
1287         size_t min_ucmd_sz, required_ucmd_sz;
1288         int err;
1289         int underlay_qpn;
1290
1291         if (udata && udata->inlen) {
1292                 min_ucmd_sz = offsetofend(struct mlx5_ib_create_flow, reserved);
1293                 if (udata->inlen < min_ucmd_sz)
1294                         return ERR_PTR(-EOPNOTSUPP);
1295
1296                 err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
1297                 if (err)
1298                         return ERR_PTR(err);
1299
1300                 /* currently supports only one counters data */
1301                 if (ucmd_hdr.ncounters_data > 1)
1302                         return ERR_PTR(-EINVAL);
1303
1304                 required_ucmd_sz = min_ucmd_sz +
1305                         sizeof(struct mlx5_ib_flow_counters_data) *
1306                         ucmd_hdr.ncounters_data;
1307                 if (udata->inlen > required_ucmd_sz &&
1308                     !ib_is_udata_cleared(udata, required_ucmd_sz,
1309                                          udata->inlen - required_ucmd_sz))
1310                         return ERR_PTR(-EOPNOTSUPP);
1311
1312                 ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
1313                 if (!ucmd)
1314                         return ERR_PTR(-ENOMEM);
1315
1316                 err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
1317                 if (err)
1318                         goto free_ucmd;
1319         }
1320
1321         if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
1322                 err = -ENOMEM;
1323                 goto free_ucmd;
1324         }
1325
1326         if (flow_attr->flags &
1327             ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS)) {
1328                 err = -EINVAL;
1329                 goto free_ucmd;
1330         }
1331
1332         if (is_egress &&
1333             (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
1334              flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
1335                 err = -EINVAL;
1336                 goto free_ucmd;
1337         }
1338
1339         dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1340         if (!dst) {
1341                 err = -ENOMEM;
1342                 goto free_ucmd;
1343         }
1344
1345         mutex_lock(&dev->flow_db->lock);
1346
1347         ft_prio = get_flow_table(dev, flow_attr,
1348                                  is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
1349         if (IS_ERR(ft_prio)) {
1350                 err = PTR_ERR(ft_prio);
1351                 goto unlock;
1352         }
1353         if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
1354                 ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
1355                 if (IS_ERR(ft_prio_tx)) {
1356                         err = PTR_ERR(ft_prio_tx);
1357                         ft_prio_tx = NULL;
1358                         goto destroy_ft;
1359                 }
1360         }
1361
1362         if (is_egress) {
1363                 dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
1364         } else {
1365                 dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1366                 if (mqp->is_rss)
1367                         dst->tir_num = mqp->rss_qp.tirn;
1368                 else
1369                         dst->tir_num = mqp->raw_packet_qp.rq.tirn;
1370         }
1371
1372         switch (flow_attr->type) {
1373         case IB_FLOW_ATTR_NORMAL:
1374                 underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
1375                                        mqp->underlay_qpn :
1376                                        0;
1377                 handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
1378                                             underlay_qpn, ucmd);
1379                 break;
1380         case IB_FLOW_ATTR_ALL_DEFAULT:
1381         case IB_FLOW_ATTR_MC_DEFAULT:
1382                 handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst);
1383                 break;
1384         case IB_FLOW_ATTR_SNIFFER:
1385                 handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
1386                 break;
1387         default:
1388                 err = -EINVAL;
1389                 goto destroy_ft;
1390         }
1391
1392         if (IS_ERR(handler)) {
1393                 err = PTR_ERR(handler);
1394                 handler = NULL;
1395                 goto destroy_ft;
1396         }
1397
1398         mutex_unlock(&dev->flow_db->lock);
1399         kfree(dst);
1400         kfree(ucmd);
1401
1402         return &handler->ibflow;
1403
1404 destroy_ft:
1405         put_flow_table(dev, ft_prio, false);
1406         if (ft_prio_tx)
1407                 put_flow_table(dev, ft_prio_tx, false);
1408 unlock:
1409         mutex_unlock(&dev->flow_db->lock);
1410         kfree(dst);
1411 free_ucmd:
1412         kfree(ucmd);
1413         return ERR_PTR(err);
1414 }
1415
1416 static struct mlx5_ib_flow_prio *
1417 _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
1418                 enum mlx5_flow_namespace_type ns_type,
1419                 bool mcast)
1420 {
1421         struct mlx5_flow_namespace *ns = NULL;
1422         struct mlx5_ib_flow_prio *prio = NULL;
1423         int max_table_size = 0;
1424         bool esw_encap;
1425         u32 flags = 0;
1426         int priority;
1427
1428         if (mcast)
1429                 priority = MLX5_IB_FLOW_MCAST_PRIO;
1430         else
1431                 priority = ib_prio_to_core_prio(user_priority, false);
1432
1433         esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
1434                 DEVLINK_ESWITCH_ENCAP_MODE_NONE;
1435         switch (ns_type) {
1436         case MLX5_FLOW_NAMESPACE_BYPASS:
1437                 max_table_size = BIT(
1438                         MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size));
1439                 if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
1440                         flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
1441                 if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
1442                                               reformat_l3_tunnel_to_l2) &&
1443                     !esw_encap)
1444                         flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1445                 break;
1446         case MLX5_FLOW_NAMESPACE_EGRESS:
1447                 max_table_size = BIT(
1448                         MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
1449                 if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) &&
1450                     !esw_encap)
1451                         flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1452                 break;
1453         case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
1454                 max_table_size = BIT(
1455                         MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
1456                 if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
1457                         flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
1458                 if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev,
1459                                                reformat_l3_tunnel_to_l2) &&
1460                     esw_encap)
1461                         flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1462                 priority = user_priority;
1463                 break;
1464         case MLX5_FLOW_NAMESPACE_RDMA_RX:
1465                 max_table_size = BIT(
1466                         MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, log_max_ft_size));
1467                 priority = user_priority;
1468                 break;
1469         case MLX5_FLOW_NAMESPACE_RDMA_TX:
1470                 max_table_size = BIT(
1471                         MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
1472                 priority = user_priority;
1473                 break;
1474         default:
1475                 break;
1476         }
1477
1478         max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
1479
1480         ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
1481         if (!ns)
1482                 return ERR_PTR(-EOPNOTSUPP);
1483
1484         switch (ns_type) {
1485         case MLX5_FLOW_NAMESPACE_BYPASS:
1486                 prio = &dev->flow_db->prios[priority];
1487                 break;
1488         case MLX5_FLOW_NAMESPACE_EGRESS:
1489                 prio = &dev->flow_db->egress_prios[priority];
1490                 break;
1491         case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
1492                 prio = &dev->flow_db->fdb[priority];
1493                 break;
1494         case MLX5_FLOW_NAMESPACE_RDMA_RX:
1495                 prio = &dev->flow_db->rdma_rx[priority];
1496                 break;
1497         case MLX5_FLOW_NAMESPACE_RDMA_TX:
1498                 prio = &dev->flow_db->rdma_tx[priority];
1499                 break;
1500         default: return ERR_PTR(-EINVAL);
1501         }
1502
1503         if (!prio)
1504                 return ERR_PTR(-EINVAL);
1505
1506         if (prio->flow_table)
1507                 return prio;
1508
1509         return _get_prio(dev, ns, prio, priority, max_table_size,
1510                          MLX5_FS_MAX_TYPES, flags);
1511 }
1512
1513 static struct mlx5_ib_flow_handler *
1514 _create_raw_flow_rule(struct mlx5_ib_dev *dev,
1515                       struct mlx5_ib_flow_prio *ft_prio,
1516                       struct mlx5_flow_destination *dst,
1517                       struct mlx5_ib_flow_matcher  *fs_matcher,
1518                       struct mlx5_flow_context *flow_context,
1519                       struct mlx5_flow_act *flow_act,
1520                       void *cmd_in, int inlen,
1521                       int dst_num)
1522 {
1523         struct mlx5_ib_flow_handler *handler;
1524         struct mlx5_flow_spec *spec;
1525         struct mlx5_flow_table *ft = ft_prio->flow_table;
1526         int err = 0;
1527
1528         spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1529         handler = kzalloc(sizeof(*handler), GFP_KERNEL);
1530         if (!handler || !spec) {
1531                 err = -ENOMEM;
1532                 goto free;
1533         }
1534
1535         INIT_LIST_HEAD(&handler->list);
1536
1537         memcpy(spec->match_value, cmd_in, inlen);
1538         memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
1539                fs_matcher->mask_len);
1540         spec->match_criteria_enable = fs_matcher->match_criteria_enable;
1541         spec->flow_context = *flow_context;
1542
1543         handler->rule = mlx5_add_flow_rules(ft, spec,
1544                                             flow_act, dst, dst_num);
1545
1546         if (IS_ERR(handler->rule)) {
1547                 err = PTR_ERR(handler->rule);
1548                 goto free;
1549         }
1550
1551         ft_prio->refcount++;
1552         handler->prio = ft_prio;
1553         handler->dev = dev;
1554         ft_prio->flow_table = ft;
1555
1556 free:
1557         if (err)
1558                 kfree(handler);
1559         kvfree(spec);
1560         return err ? ERR_PTR(err) : handler;
1561 }
1562
1563 static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
1564                                 void *match_v)
1565 {
1566         void *match_c;
1567         void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
1568         void *dmac, *dmac_mask;
1569         void *ipv4, *ipv4_mask;
1570
1571         if (!(fs_matcher->match_criteria_enable &
1572               (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
1573                 return false;
1574
1575         match_c = fs_matcher->matcher_mask.match_params;
1576         match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
1577                                            outer_headers);
1578         match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
1579                                            outer_headers);
1580
1581         dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
1582                             dmac_47_16);
1583         dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
1584                                  dmac_47_16);
1585
1586         if (is_multicast_ether_addr(dmac) &&
1587             is_multicast_ether_addr(dmac_mask))
1588                 return true;
1589
1590         ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
1591                             dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
1592
1593         ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
1594                                  dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
1595
1596         if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
1597             ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
1598                 return true;
1599
1600         return false;
1601 }
1602
1603 static struct mlx5_ib_flow_handler *raw_fs_rule_add(
1604         struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
1605         struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act,
1606         u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type)
1607 {
1608         struct mlx5_flow_destination *dst;
1609         struct mlx5_ib_flow_prio *ft_prio;
1610         struct mlx5_ib_flow_handler *handler;
1611         int dst_num = 0;
1612         bool mcast;
1613         int err;
1614
1615         if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
1616                 return ERR_PTR(-EOPNOTSUPP);
1617
1618         if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
1619                 return ERR_PTR(-ENOMEM);
1620
1621         dst = kcalloc(2, sizeof(*dst), GFP_KERNEL);
1622         if (!dst)
1623                 return ERR_PTR(-ENOMEM);
1624
1625         mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
1626         mutex_lock(&dev->flow_db->lock);
1627
1628         ft_prio = _get_flow_table(dev, fs_matcher->priority,
1629                                   fs_matcher->ns_type, mcast);
1630         if (IS_ERR(ft_prio)) {
1631                 err = PTR_ERR(ft_prio);
1632                 goto unlock;
1633         }
1634
1635         switch (dest_type) {
1636         case MLX5_FLOW_DESTINATION_TYPE_TIR:
1637                 dst[dst_num].type = dest_type;
1638                 dst[dst_num++].tir_num = dest_id;
1639                 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1640                 break;
1641         case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
1642                 dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
1643                 dst[dst_num++].ft_num = dest_id;
1644                 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1645                 break;
1646         case MLX5_FLOW_DESTINATION_TYPE_PORT:
1647                 dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
1648                 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1649                 break;
1650         default:
1651                 break;
1652         }
1653
1654         if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1655                 dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1656                 dst[dst_num].counter_id = counter_id;
1657                 dst_num++;
1658         }
1659
1660         handler = _create_raw_flow_rule(dev, ft_prio, dst_num ? dst : NULL,
1661                                         fs_matcher, flow_context, flow_act,
1662                                         cmd_in, inlen, dst_num);
1663
1664         if (IS_ERR(handler)) {
1665                 err = PTR_ERR(handler);
1666                 goto destroy_ft;
1667         }
1668
1669         mutex_unlock(&dev->flow_db->lock);
1670         atomic_inc(&fs_matcher->usecnt);
1671         handler->flow_matcher = fs_matcher;
1672
1673         kfree(dst);
1674
1675         return handler;
1676
1677 destroy_ft:
1678         put_flow_table(dev, ft_prio, false);
1679 unlock:
1680         mutex_unlock(&dev->flow_db->lock);
1681         kfree(dst);
1682
1683         return ERR_PTR(err);
1684 }
1685
1686 static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
1687 {
1688         switch (maction->flow_action_raw.sub_type) {
1689         case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
1690                 mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
1691                                            maction->flow_action_raw.modify_hdr);
1692                 break;
1693         case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
1694                 mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
1695                                              maction->flow_action_raw.pkt_reformat);
1696                 break;
1697         case MLX5_IB_FLOW_ACTION_DECAP:
1698                 break;
1699         default:
1700                 break;
1701         }
1702 }
1703
1704 static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
1705 {
1706         struct mlx5_ib_flow_action *maction = to_mflow_act(action);
1707
1708         switch (action->type) {
1709         case IB_FLOW_ACTION_UNSPECIFIED:
1710                 destroy_flow_action_raw(maction);
1711                 break;
1712         default:
1713                 WARN_ON(true);
1714                 break;
1715         }
1716
1717         kfree(maction);
1718         return 0;
1719 }
1720
1721 static int
1722 mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
1723                              enum mlx5_flow_namespace_type *namespace)
1724 {
1725         switch (table_type) {
1726         case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
1727                 *namespace = MLX5_FLOW_NAMESPACE_BYPASS;
1728                 break;
1729         case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
1730                 *namespace = MLX5_FLOW_NAMESPACE_EGRESS;
1731                 break;
1732         case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
1733                 *namespace = MLX5_FLOW_NAMESPACE_FDB_BYPASS;
1734                 break;
1735         case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
1736                 *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
1737                 break;
1738         case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
1739                 *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
1740                 break;
1741         default:
1742                 return -EINVAL;
1743         }
1744
1745         return 0;
1746 }
1747
1748 static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
1749         [MLX5_IB_FLOW_TYPE_NORMAL] = {
1750                 .type = UVERBS_ATTR_TYPE_PTR_IN,
1751                 .u.ptr = {
1752                         .len = sizeof(u16), /* data is priority */
1753                         .min_len = sizeof(u16),
1754                 }
1755         },
1756         [MLX5_IB_FLOW_TYPE_SNIFFER] = {
1757                 .type = UVERBS_ATTR_TYPE_PTR_IN,
1758                 UVERBS_ATTR_NO_DATA(),
1759         },
1760         [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = {
1761                 .type = UVERBS_ATTR_TYPE_PTR_IN,
1762                 UVERBS_ATTR_NO_DATA(),
1763         },
1764         [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = {
1765                 .type = UVERBS_ATTR_TYPE_PTR_IN,
1766                 UVERBS_ATTR_NO_DATA(),
1767         },
1768 };
1769
1770 static bool is_flow_dest(void *obj, int *dest_id, int *dest_type)
1771 {
1772         struct devx_obj *devx_obj = obj;
1773         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
1774
1775         switch (opcode) {
1776         case MLX5_CMD_OP_DESTROY_TIR:
1777                 *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1778                 *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
1779                                     obj_id);
1780                 return true;
1781
1782         case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
1783                 *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1784                 *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
1785                                     table_id);
1786                 return true;
1787         default:
1788                 return false;
1789         }
1790 }
1791
1792 static int get_dests(struct uverbs_attr_bundle *attrs,
1793                      struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id,
1794                      int *dest_type, struct ib_qp **qp, u32 *flags)
1795 {
1796         bool dest_devx, dest_qp;
1797         void *devx_obj;
1798         int err;
1799
1800         dest_devx = uverbs_attr_is_valid(attrs,
1801                                          MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
1802         dest_qp = uverbs_attr_is_valid(attrs,
1803                                        MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
1804
1805         *flags = 0;
1806         err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
1807                                  MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS |
1808                                          MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP);
1809         if (err)
1810                 return err;
1811
1812         /* Both flags are not allowed */
1813         if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS &&
1814             *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
1815                 return -EINVAL;
1816
1817         if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
1818                 if (dest_devx && (dest_qp || *flags))
1819                         return -EINVAL;
1820                 else if (dest_qp && *flags)
1821                         return -EINVAL;
1822         }
1823
1824         /* Allow only DEVX object, drop as dest for FDB */
1825         if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
1826             !(dest_devx || (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)))
1827                 return -EINVAL;
1828
1829         /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
1830         if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
1831             ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
1832                 return -EINVAL;
1833
1834         *qp = NULL;
1835         if (dest_devx) {
1836                 devx_obj =
1837                         uverbs_attr_get_obj(attrs,
1838                                             MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
1839
1840                 /* Verify that the given DEVX object is a flow
1841                  * steering destination.
1842                  */
1843                 if (!is_flow_dest(devx_obj, dest_id, dest_type))
1844                         return -EINVAL;
1845                 /* Allow only flow table as dest when inserting to FDB or RDMA_RX */
1846                 if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS ||
1847                      fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
1848                     *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
1849                         return -EINVAL;
1850         } else if (dest_qp) {
1851                 struct mlx5_ib_qp *mqp;
1852
1853                 *qp = uverbs_attr_get_obj(attrs,
1854                                           MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
1855                 if (IS_ERR(*qp))
1856                         return PTR_ERR(*qp);
1857
1858                 if ((*qp)->qp_type != IB_QPT_RAW_PACKET)
1859                         return -EINVAL;
1860
1861                 mqp = to_mqp(*qp);
1862                 if (mqp->is_rss)
1863                         *dest_id = mqp->rss_qp.tirn;
1864                 else
1865                         *dest_id = mqp->raw_packet_qp.rq.tirn;
1866                 *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1867         } else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
1868                     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) &&
1869                    !(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) {
1870                 *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
1871         }
1872
1873         if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
1874             (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
1875              fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX))
1876                 return -EINVAL;
1877
1878         return 0;
1879 }
1880
1881 static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id)
1882 {
1883         struct devx_obj *devx_obj = obj;
1884         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
1885
1886         if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
1887
1888                 if (offset && offset >= devx_obj->flow_counter_bulk_size)
1889                         return false;
1890
1891                 *counter_id = MLX5_GET(dealloc_flow_counter_in,
1892                                        devx_obj->dinbox,
1893                                        flow_counter_id);
1894                 *counter_id += offset;
1895                 return true;
1896         }
1897
1898         return false;
1899 }
1900
1901 #define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
1902 static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
1903         struct uverbs_attr_bundle *attrs)
1904 {
1905         struct mlx5_flow_context flow_context = {.flow_tag =
1906                 MLX5_FS_DEFAULT_FLOW_TAG};
1907         u32 *offset_attr, offset = 0, counter_id = 0;
1908         int dest_id, dest_type = -1, inlen, len, ret, i;
1909         struct mlx5_ib_flow_handler *flow_handler;
1910         struct mlx5_ib_flow_matcher *fs_matcher;
1911         struct ib_uobject **arr_flow_actions;
1912         struct ib_uflow_resources *uflow_res;
1913         struct mlx5_flow_act flow_act = {};
1914         struct ib_qp *qp = NULL;
1915         void *devx_obj, *cmd_in;
1916         struct ib_uobject *uobj;
1917         struct mlx5_ib_dev *dev;
1918         u32 flags;
1919
1920         if (!capable(CAP_NET_RAW))
1921                 return -EPERM;
1922
1923         fs_matcher = uverbs_attr_get_obj(attrs,
1924                                          MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
1925         uobj =  uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
1926         dev = mlx5_udata_to_mdev(&attrs->driver_udata);
1927
1928         if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags))
1929                 return -EINVAL;
1930
1931         if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS)
1932                 flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
1933
1934         if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
1935                 flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
1936
1937         len = uverbs_attr_get_uobjs_arr(attrs,
1938                 MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
1939         if (len) {
1940                 devx_obj = arr_flow_actions[0]->object;
1941
1942                 if (uverbs_attr_is_valid(attrs,
1943                                          MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) {
1944
1945                         int num_offsets = uverbs_attr_ptr_get_array_size(
1946                                 attrs,
1947                                 MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
1948                                 sizeof(u32));
1949
1950                         if (num_offsets != 1)
1951                                 return -EINVAL;
1952
1953                         offset_attr = uverbs_attr_get_alloced_ptr(
1954                                 attrs,
1955                                 MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET);
1956                         offset = *offset_attr;
1957                 }
1958
1959                 if (!is_flow_counter(devx_obj, offset, &counter_id))
1960                         return -EINVAL;
1961
1962                 flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1963         }
1964
1965         cmd_in = uverbs_attr_get_alloced_ptr(
1966                 attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
1967         inlen = uverbs_attr_get_len(attrs,
1968                                     MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
1969
1970         uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
1971         if (!uflow_res)
1972                 return -ENOMEM;
1973
1974         len = uverbs_attr_get_uobjs_arr(attrs,
1975                 MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
1976         for (i = 0; i < len; i++) {
1977                 struct mlx5_ib_flow_action *maction =
1978                         to_mflow_act(arr_flow_actions[i]->object);
1979
1980                 ret = parse_flow_flow_action(maction, false, &flow_act);
1981                 if (ret)
1982                         goto err_out;
1983                 flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
1984                                    arr_flow_actions[i]->object);
1985         }
1986
1987         ret = uverbs_copy_from(&flow_context.flow_tag, attrs,
1988                                MLX5_IB_ATTR_CREATE_FLOW_TAG);
1989         if (!ret) {
1990                 if (flow_context.flow_tag >= BIT(24)) {
1991                         ret = -EINVAL;
1992                         goto err_out;
1993                 }
1994                 flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
1995         }
1996
1997         flow_handler =
1998                 raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act,
1999                                 counter_id, cmd_in, inlen, dest_id, dest_type);
2000         if (IS_ERR(flow_handler)) {
2001                 ret = PTR_ERR(flow_handler);
2002                 goto err_out;
2003         }
2004
2005         ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
2006
2007         return 0;
2008 err_out:
2009         ib_uverbs_flow_resources_free(uflow_res);
2010         return ret;
2011 }
2012
2013 static int flow_matcher_cleanup(struct ib_uobject *uobject,
2014                                 enum rdma_remove_reason why,
2015                                 struct uverbs_attr_bundle *attrs)
2016 {
2017         struct mlx5_ib_flow_matcher *obj = uobject->object;
2018
2019         if (atomic_read(&obj->usecnt))
2020                 return -EBUSY;
2021
2022         kfree(obj);
2023         return 0;
2024 }
2025
2026 static int steering_anchor_create_ft(struct mlx5_ib_dev *dev,
2027                                      struct mlx5_ib_flow_prio *ft_prio,
2028                                      enum mlx5_flow_namespace_type ns_type)
2029 {
2030         struct mlx5_flow_table_attr ft_attr = {};
2031         struct mlx5_flow_namespace *ns;
2032         struct mlx5_flow_table *ft;
2033
2034         if (ft_prio->anchor.ft)
2035                 return 0;
2036
2037         ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
2038         if (!ns)
2039                 return -EOPNOTSUPP;
2040
2041         ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
2042         ft_attr.uid = MLX5_SHARED_RESOURCE_UID;
2043         ft_attr.prio = 0;
2044         ft_attr.max_fte = 2;
2045         ft_attr.level = 1;
2046
2047         ft = mlx5_create_flow_table(ns, &ft_attr);
2048         if (IS_ERR(ft))
2049                 return PTR_ERR(ft);
2050
2051         ft_prio->anchor.ft = ft;
2052
2053         return 0;
2054 }
2055
2056 static void steering_anchor_destroy_ft(struct mlx5_ib_flow_prio *ft_prio)
2057 {
2058         if (ft_prio->anchor.ft) {
2059                 mlx5_destroy_flow_table(ft_prio->anchor.ft);
2060                 ft_prio->anchor.ft = NULL;
2061         }
2062 }
2063
2064 static int
2065 steering_anchor_create_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
2066 {
2067         int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
2068         struct mlx5_flow_group *fg;
2069         void *flow_group_in;
2070         int err = 0;
2071
2072         if (ft_prio->anchor.fg_drop)
2073                 return 0;
2074
2075         flow_group_in = kvzalloc(inlen, GFP_KERNEL);
2076         if (!flow_group_in)
2077                 return -ENOMEM;
2078
2079         MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
2080         MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
2081
2082         fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
2083         if (IS_ERR(fg)) {
2084                 err = PTR_ERR(fg);
2085                 goto out;
2086         }
2087
2088         ft_prio->anchor.fg_drop = fg;
2089
2090 out:
2091         kvfree(flow_group_in);
2092
2093         return err;
2094 }
2095
2096 static void
2097 steering_anchor_destroy_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
2098 {
2099         if (ft_prio->anchor.fg_drop) {
2100                 mlx5_destroy_flow_group(ft_prio->anchor.fg_drop);
2101                 ft_prio->anchor.fg_drop = NULL;
2102         }
2103 }
2104
2105 static int
2106 steering_anchor_create_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2107 {
2108         int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
2109         struct mlx5_flow_group *fg;
2110         void *flow_group_in;
2111         int err = 0;
2112
2113         if (ft_prio->anchor.fg_goto_table)
2114                 return 0;
2115
2116         flow_group_in = kvzalloc(inlen, GFP_KERNEL);
2117         if (!flow_group_in)
2118                 return -ENOMEM;
2119
2120         fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
2121         if (IS_ERR(fg)) {
2122                 err = PTR_ERR(fg);
2123                 goto out;
2124         }
2125         ft_prio->anchor.fg_goto_table = fg;
2126
2127 out:
2128         kvfree(flow_group_in);
2129
2130         return err;
2131 }
2132
2133 static void
2134 steering_anchor_destroy_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2135 {
2136         if (ft_prio->anchor.fg_goto_table) {
2137                 mlx5_destroy_flow_group(ft_prio->anchor.fg_goto_table);
2138                 ft_prio->anchor.fg_goto_table = NULL;
2139         }
2140 }
2141
2142 static int
2143 steering_anchor_create_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
2144 {
2145         struct mlx5_flow_act flow_act = {};
2146         struct mlx5_flow_handle *handle;
2147
2148         if (ft_prio->anchor.rule_drop)
2149                 return 0;
2150
2151         flow_act.fg = ft_prio->anchor.fg_drop;
2152         flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
2153
2154         handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
2155                                      NULL, 0);
2156         if (IS_ERR(handle))
2157                 return PTR_ERR(handle);
2158
2159         ft_prio->anchor.rule_drop = handle;
2160
2161         return 0;
2162 }
2163
2164 static void steering_anchor_destroy_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
2165 {
2166         if (ft_prio->anchor.rule_drop) {
2167                 mlx5_del_flow_rules(ft_prio->anchor.rule_drop);
2168                 ft_prio->anchor.rule_drop = NULL;
2169         }
2170 }
2171
2172 static int
2173 steering_anchor_create_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2174 {
2175         struct mlx5_flow_destination dest = {};
2176         struct mlx5_flow_act flow_act = {};
2177         struct mlx5_flow_handle *handle;
2178
2179         if (ft_prio->anchor.rule_goto_table)
2180                 return 0;
2181
2182         flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
2183         flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
2184         flow_act.fg = ft_prio->anchor.fg_goto_table;
2185
2186         dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
2187         dest.ft = ft_prio->flow_table;
2188
2189         handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
2190                                      &dest, 1);
2191         if (IS_ERR(handle))
2192                 return PTR_ERR(handle);
2193
2194         ft_prio->anchor.rule_goto_table = handle;
2195
2196         return 0;
2197 }
2198
2199 static void
2200 steering_anchor_destroy_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2201 {
2202         if (ft_prio->anchor.rule_goto_table) {
2203                 mlx5_del_flow_rules(ft_prio->anchor.rule_goto_table);
2204                 ft_prio->anchor.rule_goto_table = NULL;
2205         }
2206 }
2207
2208 static int steering_anchor_create_res(struct mlx5_ib_dev *dev,
2209                                       struct mlx5_ib_flow_prio *ft_prio,
2210                                       enum mlx5_flow_namespace_type ns_type)
2211 {
2212         int err;
2213
2214         err = steering_anchor_create_ft(dev, ft_prio, ns_type);
2215         if (err)
2216                 return err;
2217
2218         err = steering_anchor_create_fg_drop(ft_prio);
2219         if (err)
2220                 goto destroy_ft;
2221
2222         err = steering_anchor_create_fg_goto_table(ft_prio);
2223         if (err)
2224                 goto destroy_fg_drop;
2225
2226         err = steering_anchor_create_rule_drop(ft_prio);
2227         if (err)
2228                 goto destroy_fg_goto_table;
2229
2230         err = steering_anchor_create_rule_goto_table(ft_prio);
2231         if (err)
2232                 goto destroy_rule_drop;
2233
2234         return 0;
2235
2236 destroy_rule_drop:
2237         steering_anchor_destroy_rule_drop(ft_prio);
2238 destroy_fg_goto_table:
2239         steering_anchor_destroy_fg_goto_table(ft_prio);
2240 destroy_fg_drop:
2241         steering_anchor_destroy_fg_drop(ft_prio);
2242 destroy_ft:
2243         steering_anchor_destroy_ft(ft_prio);
2244
2245         return err;
2246 }
2247
2248 static void mlx5_steering_anchor_destroy_res(struct mlx5_ib_flow_prio *ft_prio)
2249 {
2250         steering_anchor_destroy_rule_goto_table(ft_prio);
2251         steering_anchor_destroy_rule_drop(ft_prio);
2252         steering_anchor_destroy_fg_goto_table(ft_prio);
2253         steering_anchor_destroy_fg_drop(ft_prio);
2254         steering_anchor_destroy_ft(ft_prio);
2255 }
2256
2257 static int steering_anchor_cleanup(struct ib_uobject *uobject,
2258                                    enum rdma_remove_reason why,
2259                                    struct uverbs_attr_bundle *attrs)
2260 {
2261         struct mlx5_ib_steering_anchor *obj = uobject->object;
2262
2263         if (atomic_read(&obj->usecnt))
2264                 return -EBUSY;
2265
2266         mutex_lock(&obj->dev->flow_db->lock);
2267         if (!--obj->ft_prio->anchor.rule_goto_table_ref)
2268                 steering_anchor_destroy_rule_goto_table(obj->ft_prio);
2269
2270         put_flow_table(obj->dev, obj->ft_prio, true);
2271         mutex_unlock(&obj->dev->flow_db->lock);
2272
2273         kfree(obj);
2274         return 0;
2275 }
2276
2277 static void fs_cleanup_anchor(struct mlx5_ib_flow_prio *prio,
2278                               int count)
2279 {
2280         while (count--)
2281                 mlx5_steering_anchor_destroy_res(&prio[count]);
2282 }
2283
2284 void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev)
2285 {
2286         fs_cleanup_anchor(dev->flow_db->prios, MLX5_IB_NUM_FLOW_FT);
2287         fs_cleanup_anchor(dev->flow_db->egress_prios, MLX5_IB_NUM_FLOW_FT);
2288         fs_cleanup_anchor(dev->flow_db->sniffer, MLX5_IB_NUM_SNIFFER_FTS);
2289         fs_cleanup_anchor(dev->flow_db->egress, MLX5_IB_NUM_EGRESS_FTS);
2290         fs_cleanup_anchor(dev->flow_db->fdb, MLX5_IB_NUM_FDB_FTS);
2291         fs_cleanup_anchor(dev->flow_db->rdma_rx, MLX5_IB_NUM_FLOW_FT);
2292         fs_cleanup_anchor(dev->flow_db->rdma_tx, MLX5_IB_NUM_FLOW_FT);
2293 }
2294
2295 static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
2296                               struct mlx5_ib_flow_matcher *obj)
2297 {
2298         enum mlx5_ib_uapi_flow_table_type ft_type =
2299                 MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX;
2300         u32 flags;
2301         int err;
2302
2303         /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older
2304          * users should switch to it. We leave this to not break userspace
2305          */
2306         if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) &&
2307             uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS))
2308                 return -EINVAL;
2309
2310         if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) {
2311                 err = uverbs_get_const(&ft_type, attrs,
2312                                        MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE);
2313                 if (err)
2314                         return err;
2315
2316                 err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type);
2317                 if (err)
2318                         return err;
2319
2320                 return 0;
2321         }
2322
2323         if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) {
2324                 err = uverbs_get_flags32(&flags, attrs,
2325                                          MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
2326                                          IB_FLOW_ATTR_FLAGS_EGRESS);
2327                 if (err)
2328                         return err;
2329
2330                 if (flags)
2331                         return mlx5_ib_ft_type_to_namespace(
2332                                 MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX,
2333                                 &obj->ns_type);
2334         }
2335
2336         obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
2337
2338         return 0;
2339 }
2340
2341 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
2342         struct uverbs_attr_bundle *attrs)
2343 {
2344         struct ib_uobject *uobj = uverbs_attr_get_uobject(
2345                 attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
2346         struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
2347         struct mlx5_ib_flow_matcher *obj;
2348         int err;
2349
2350         obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
2351         if (!obj)
2352                 return -ENOMEM;
2353
2354         obj->mask_len = uverbs_attr_get_len(
2355                 attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
2356         err = uverbs_copy_from(&obj->matcher_mask,
2357                                attrs,
2358                                MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
2359         if (err)
2360                 goto end;
2361
2362         obj->flow_type = uverbs_attr_get_enum_id(
2363                 attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
2364
2365         if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) {
2366                 err = uverbs_copy_from(&obj->priority,
2367                                        attrs,
2368                                        MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
2369                 if (err)
2370                         goto end;
2371         }
2372
2373         err = uverbs_copy_from(&obj->match_criteria_enable,
2374                                attrs,
2375                                MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA);
2376         if (err)
2377                 goto end;
2378
2379         err = mlx5_ib_matcher_ns(attrs, obj);
2380         if (err)
2381                 goto end;
2382
2383         if (obj->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
2384             mlx5_eswitch_mode(dev->mdev) != MLX5_ESWITCH_OFFLOADS) {
2385                 err = -EINVAL;
2386                 goto end;
2387         }
2388
2389         uobj->object = obj;
2390         obj->mdev = dev->mdev;
2391         atomic_set(&obj->usecnt, 0);
2392         return 0;
2393
2394 end:
2395         kfree(obj);
2396         return err;
2397 }
2398
2399 static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
2400         struct uverbs_attr_bundle *attrs)
2401 {
2402         struct ib_uobject *uobj = uverbs_attr_get_uobject(
2403                 attrs, MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE);
2404         struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
2405         enum mlx5_ib_uapi_flow_table_type ib_uapi_ft_type;
2406         enum mlx5_flow_namespace_type ns_type;
2407         struct mlx5_ib_steering_anchor *obj;
2408         struct mlx5_ib_flow_prio *ft_prio;
2409         u16 priority;
2410         u32 ft_id;
2411         int err;
2412
2413         if (!capable(CAP_NET_RAW))
2414                 return -EPERM;
2415
2416         err = uverbs_get_const(&ib_uapi_ft_type, attrs,
2417                                MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE);
2418         if (err)
2419                 return err;
2420
2421         err = mlx5_ib_ft_type_to_namespace(ib_uapi_ft_type, &ns_type);
2422         if (err)
2423                 return err;
2424
2425         err = uverbs_copy_from(&priority, attrs,
2426                                MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY);
2427         if (err)
2428                 return err;
2429
2430         obj = kzalloc(sizeof(*obj), GFP_KERNEL);
2431         if (!obj)
2432                 return -ENOMEM;
2433
2434         mutex_lock(&dev->flow_db->lock);
2435
2436         ft_prio = _get_flow_table(dev, priority, ns_type, 0);
2437         if (IS_ERR(ft_prio)) {
2438                 err = PTR_ERR(ft_prio);
2439                 goto free_obj;
2440         }
2441
2442         ft_prio->refcount++;
2443
2444         if (!ft_prio->anchor.rule_goto_table_ref) {
2445                 err = steering_anchor_create_res(dev, ft_prio, ns_type);
2446                 if (err)
2447                         goto put_flow_table;
2448         }
2449
2450         ft_prio->anchor.rule_goto_table_ref++;
2451
2452         ft_id = mlx5_flow_table_id(ft_prio->anchor.ft);
2453
2454         err = uverbs_copy_to(attrs, MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
2455                              &ft_id, sizeof(ft_id));
2456         if (err)
2457                 goto destroy_res;
2458
2459         mutex_unlock(&dev->flow_db->lock);
2460
2461         uobj->object = obj;
2462         obj->dev = dev;
2463         obj->ft_prio = ft_prio;
2464         atomic_set(&obj->usecnt, 0);
2465
2466         return 0;
2467
2468 destroy_res:
2469         --ft_prio->anchor.rule_goto_table_ref;
2470         mlx5_steering_anchor_destroy_res(ft_prio);
2471 put_flow_table:
2472         put_flow_table(dev, ft_prio, true);
2473         mutex_unlock(&dev->flow_db->lock);
2474 free_obj:
2475         kfree(obj);
2476
2477         return err;
2478 }
2479
2480 static struct ib_flow_action *
2481 mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
2482                              enum mlx5_ib_uapi_flow_table_type ft_type,
2483                              u8 num_actions, void *in)
2484 {
2485         enum mlx5_flow_namespace_type namespace;
2486         struct mlx5_ib_flow_action *maction;
2487         int ret;
2488
2489         ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
2490         if (ret)
2491                 return ERR_PTR(-EINVAL);
2492
2493         maction = kzalloc(sizeof(*maction), GFP_KERNEL);
2494         if (!maction)
2495                 return ERR_PTR(-ENOMEM);
2496
2497         maction->flow_action_raw.modify_hdr =
2498                 mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in);
2499
2500         if (IS_ERR(maction->flow_action_raw.modify_hdr)) {
2501                 ret = PTR_ERR(maction->flow_action_raw.modify_hdr);
2502                 kfree(maction);
2503                 return ERR_PTR(ret);
2504         }
2505         maction->flow_action_raw.sub_type =
2506                 MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
2507         maction->flow_action_raw.dev = dev;
2508
2509         return &maction->ib_action;
2510 }
2511
2512 static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
2513 {
2514         return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
2515                                          max_modify_header_actions) ||
2516                MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
2517                                          max_modify_header_actions) ||
2518                MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
2519                                          max_modify_header_actions);
2520 }
2521
2522 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
2523         struct uverbs_attr_bundle *attrs)
2524 {
2525         struct ib_uobject *uobj = uverbs_attr_get_uobject(
2526                 attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
2527         struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
2528         enum mlx5_ib_uapi_flow_table_type ft_type;
2529         struct ib_flow_action *action;
2530         int num_actions;
2531         void *in;
2532         int ret;
2533
2534         if (!mlx5_ib_modify_header_supported(mdev))
2535                 return -EOPNOTSUPP;
2536
2537         in = uverbs_attr_get_alloced_ptr(attrs,
2538                 MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
2539
2540         num_actions = uverbs_attr_ptr_get_array_size(
2541                 attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
2542                 MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto));
2543         if (num_actions < 0)
2544                 return num_actions;
2545
2546         ret = uverbs_get_const(&ft_type, attrs,
2547                                MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
2548         if (ret)
2549                 return ret;
2550         action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
2551         if (IS_ERR(action))
2552                 return PTR_ERR(action);
2553
2554         uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev,
2555                                        IB_FLOW_ACTION_UNSPECIFIED);
2556
2557         return 0;
2558 }
2559
2560 static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
2561                                                       u8 packet_reformat_type,
2562                                                       u8 ft_type)
2563 {
2564         switch (packet_reformat_type) {
2565         case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
2566                 if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
2567                         return MLX5_CAP_FLOWTABLE(ibdev->mdev,
2568                                                   encap_general_header);
2569                 break;
2570         case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
2571                 if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
2572                         return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
2573                                 reformat_l2_to_l3_tunnel);
2574                 break;
2575         case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
2576                 if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
2577                         return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
2578                                 reformat_l3_tunnel_to_l2);
2579                 break;
2580         case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
2581                 if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
2582                         return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
2583                 break;
2584         default:
2585                 break;
2586         }
2587
2588         return false;
2589 }
2590
2591 static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
2592 {
2593         switch (dv_prt) {
2594         case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
2595                 *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
2596                 break;
2597         case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
2598                 *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
2599                 break;
2600         case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
2601                 *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
2602                 break;
2603         default:
2604                 return -EINVAL;
2605         }
2606
2607         return 0;
2608 }
2609
2610 static int mlx5_ib_flow_action_create_packet_reformat_ctx(
2611         struct mlx5_ib_dev *dev,
2612         struct mlx5_ib_flow_action *maction,
2613         u8 ft_type, u8 dv_prt,
2614         void *in, size_t len)
2615 {
2616         struct mlx5_pkt_reformat_params reformat_params;
2617         enum mlx5_flow_namespace_type namespace;
2618         u8 prm_prt;
2619         int ret;
2620
2621         ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
2622         if (ret)
2623                 return ret;
2624
2625         ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
2626         if (ret)
2627                 return ret;
2628
2629         memset(&reformat_params, 0, sizeof(reformat_params));
2630         reformat_params.type = prm_prt;
2631         reformat_params.size = len;
2632         reformat_params.data = in;
2633         maction->flow_action_raw.pkt_reformat =
2634                 mlx5_packet_reformat_alloc(dev->mdev, &reformat_params,
2635                                            namespace);
2636         if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
2637                 ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
2638                 return ret;
2639         }
2640
2641         maction->flow_action_raw.sub_type =
2642                 MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
2643         maction->flow_action_raw.dev = dev;
2644
2645         return 0;
2646 }
2647
2648 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
2649         struct uverbs_attr_bundle *attrs)
2650 {
2651         struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
2652                 MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
2653         struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
2654         enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
2655         enum mlx5_ib_uapi_flow_table_type ft_type;
2656         struct mlx5_ib_flow_action *maction;
2657         int ret;
2658
2659         ret = uverbs_get_const(&ft_type, attrs,
2660                                MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
2661         if (ret)
2662                 return ret;
2663
2664         ret = uverbs_get_const(&dv_prt, attrs,
2665                                MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
2666         if (ret)
2667                 return ret;
2668
2669         if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
2670                 return -EOPNOTSUPP;
2671
2672         maction = kzalloc(sizeof(*maction), GFP_KERNEL);
2673         if (!maction)
2674                 return -ENOMEM;
2675
2676         if (dv_prt ==
2677             MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
2678                 maction->flow_action_raw.sub_type =
2679                         MLX5_IB_FLOW_ACTION_DECAP;
2680                 maction->flow_action_raw.dev = mdev;
2681         } else {
2682                 void *in;
2683                 int len;
2684
2685                 in = uverbs_attr_get_alloced_ptr(attrs,
2686                         MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
2687                 if (IS_ERR(in)) {
2688                         ret = PTR_ERR(in);
2689                         goto free_maction;
2690                 }
2691
2692                 len = uverbs_attr_get_len(attrs,
2693                         MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
2694
2695                 ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
2696                         maction, ft_type, dv_prt, in, len);
2697                 if (ret)
2698                         goto free_maction;
2699         }
2700
2701         uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev,
2702                                        IB_FLOW_ACTION_UNSPECIFIED);
2703         return 0;
2704
2705 free_maction:
2706         kfree(maction);
2707         return ret;
2708 }
2709
2710 DECLARE_UVERBS_NAMED_METHOD(
2711         MLX5_IB_METHOD_CREATE_FLOW,
2712         UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
2713                         UVERBS_OBJECT_FLOW,
2714                         UVERBS_ACCESS_NEW,
2715                         UA_MANDATORY),
2716         UVERBS_ATTR_PTR_IN(
2717                 MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
2718                 UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
2719                 UA_MANDATORY,
2720                 UA_ALLOC_AND_COPY),
2721         UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
2722                         MLX5_IB_OBJECT_FLOW_MATCHER,
2723                         UVERBS_ACCESS_READ,
2724                         UA_MANDATORY),
2725         UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
2726                         UVERBS_OBJECT_QP,
2727                         UVERBS_ACCESS_READ),
2728         UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
2729                         MLX5_IB_OBJECT_DEVX_OBJ,
2730                         UVERBS_ACCESS_READ),
2731         UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
2732                              UVERBS_OBJECT_FLOW_ACTION,
2733                              UVERBS_ACCESS_READ, 1,
2734                              MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
2735                              UA_OPTIONAL),
2736         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
2737                            UVERBS_ATTR_TYPE(u32),
2738                            UA_OPTIONAL),
2739         UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
2740                              MLX5_IB_OBJECT_DEVX_OBJ,
2741                              UVERBS_ACCESS_READ, 1, 1,
2742                              UA_OPTIONAL),
2743         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
2744                            UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
2745                            UA_OPTIONAL,
2746                            UA_ALLOC_AND_COPY),
2747         UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
2748                              enum mlx5_ib_create_flow_flags,
2749                              UA_OPTIONAL));
2750
2751 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2752         MLX5_IB_METHOD_DESTROY_FLOW,
2753         UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
2754                         UVERBS_OBJECT_FLOW,
2755                         UVERBS_ACCESS_DESTROY,
2756                         UA_MANDATORY));
2757
2758 ADD_UVERBS_METHODS(mlx5_ib_fs,
2759                    UVERBS_OBJECT_FLOW,
2760                    &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW),
2761                    &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
2762
2763 DECLARE_UVERBS_NAMED_METHOD(
2764         MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
2765         UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
2766                         UVERBS_OBJECT_FLOW_ACTION,
2767                         UVERBS_ACCESS_NEW,
2768                         UA_MANDATORY),
2769         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
2770                            UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
2771                                    set_add_copy_action_in_auto)),
2772                            UA_MANDATORY,
2773                            UA_ALLOC_AND_COPY),
2774         UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
2775                              enum mlx5_ib_uapi_flow_table_type,
2776                              UA_MANDATORY));
2777
2778 DECLARE_UVERBS_NAMED_METHOD(
2779         MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
2780         UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
2781                         UVERBS_OBJECT_FLOW_ACTION,
2782                         UVERBS_ACCESS_NEW,
2783                         UA_MANDATORY),
2784         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
2785                            UVERBS_ATTR_MIN_SIZE(1),
2786                            UA_ALLOC_AND_COPY,
2787                            UA_OPTIONAL),
2788         UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
2789                              enum mlx5_ib_uapi_flow_action_packet_reformat_type,
2790                              UA_MANDATORY),
2791         UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
2792                              enum mlx5_ib_uapi_flow_table_type,
2793                              UA_MANDATORY));
2794
2795 ADD_UVERBS_METHODS(
2796         mlx5_ib_flow_actions,
2797         UVERBS_OBJECT_FLOW_ACTION,
2798         &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
2799         &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
2800
2801 DECLARE_UVERBS_NAMED_METHOD(
2802         MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
2803         UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
2804                         MLX5_IB_OBJECT_FLOW_MATCHER,
2805                         UVERBS_ACCESS_NEW,
2806                         UA_MANDATORY),
2807         UVERBS_ATTR_PTR_IN(
2808                 MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
2809                 UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
2810                 UA_MANDATORY),
2811         UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
2812                             mlx5_ib_flow_type,
2813                             UA_MANDATORY),
2814         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
2815                            UVERBS_ATTR_TYPE(u8),
2816                            UA_MANDATORY),
2817         UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
2818                              enum ib_flow_flags,
2819                              UA_OPTIONAL),
2820         UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
2821                              enum mlx5_ib_uapi_flow_table_type,
2822                              UA_OPTIONAL));
2823
2824 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2825         MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
2826         UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE,
2827                         MLX5_IB_OBJECT_FLOW_MATCHER,
2828                         UVERBS_ACCESS_DESTROY,
2829                         UA_MANDATORY));
2830
2831 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
2832                             UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup),
2833                             &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
2834                             &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
2835
2836 DECLARE_UVERBS_NAMED_METHOD(
2837         MLX5_IB_METHOD_STEERING_ANCHOR_CREATE,
2838         UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE,
2839                         MLX5_IB_OBJECT_STEERING_ANCHOR,
2840                         UVERBS_ACCESS_NEW,
2841                         UA_MANDATORY),
2842         UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE,
2843                              enum mlx5_ib_uapi_flow_table_type,
2844                              UA_MANDATORY),
2845         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY,
2846                            UVERBS_ATTR_TYPE(u16),
2847                            UA_MANDATORY),
2848         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
2849                            UVERBS_ATTR_TYPE(u32),
2850                            UA_MANDATORY));
2851
2852 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2853         MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY,
2854         UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_DESTROY_HANDLE,
2855                         MLX5_IB_OBJECT_STEERING_ANCHOR,
2856                         UVERBS_ACCESS_DESTROY,
2857                         UA_MANDATORY));
2858
2859 DECLARE_UVERBS_NAMED_OBJECT(
2860         MLX5_IB_OBJECT_STEERING_ANCHOR,
2861         UVERBS_TYPE_ALLOC_IDR(steering_anchor_cleanup),
2862         &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE),
2863         &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY));
2864
2865 const struct uapi_definition mlx5_ib_flow_defs[] = {
2866         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2867                 MLX5_IB_OBJECT_FLOW_MATCHER),
2868         UAPI_DEF_CHAIN_OBJ_TREE(
2869                 UVERBS_OBJECT_FLOW,
2870                 &mlx5_ib_fs),
2871         UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
2872                                 &mlx5_ib_flow_actions),
2873         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2874                 MLX5_IB_OBJECT_STEERING_ANCHOR,
2875                 UAPI_DEF_IS_OBJ_SUPPORTED(mlx5_ib_shared_ft_allowed)),
2876         {},
2877 };
2878
2879 static const struct ib_device_ops flow_ops = {
2880         .create_flow = mlx5_ib_create_flow,
2881         .destroy_flow = mlx5_ib_destroy_flow,
2882         .destroy_flow_action = mlx5_ib_destroy_flow_action,
2883 };
2884
2885 int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
2886 {
2887         dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
2888
2889         if (!dev->flow_db)
2890                 return -ENOMEM;
2891
2892         mutex_init(&dev->flow_db->lock);
2893
2894         ib_set_device_ops(&dev->ib_dev, &flow_ops);
2895         return 0;
2896 }